From 5deaa5a8c5e899f1cf873c88bc7c30cdd138d4fa Mon Sep 17 00:00:00 2001 From: larryro <371767072@qq.com> Date: Wed, 27 May 2026 18:22:20 +0800 Subject: [PATCH 01/41] refactor(platform,cli): uniform org-first config layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapse three divergent layout shapes (per-domain, @-prefixed, default-at-root) into one uniform rule: ///... applies to every org including `default`, in the live data tree, the builtin catalog, the repo `examples/`, and the operator's host workspace. `default` is just another org — the canonical template, same shape. Repo + builtin: - git mv examples// → examples/default// (retention collapses to single examples/default/retention.json; branding moves under examples/default/branding/) - services/convex/Dockerfile: one COPY examples/default/ → /app/builtin/default/ - Sweep load-bearing path strings in tests, GitHub raw URL, retention error messages, and docs (en/fr/de) Convex resolvers (6 domains): single TALE_CONFIG_DIR root; resolveXxxDir(org) = join(root, org, '') for every org. Drop per-domain env overrides (AGENTS_DIR, WORKFLOWS_DIR, PROVIDERS_DIR, INTEGRATIONS_DIR, SKILLS_DIR) — platform entrypoint now unconditionally removes them from the Convex deployment env on every boot. scaffold.ts: - Default org is scaffold-able (no early-return); source from /default/ with realpath-based copy-onto-self guard - New `override` arg with per-domain semantics (flat: per-file overwrite, bundle: rm-replace per bundle, tree: per-file recursive, retention: single-file copy); always preserves *.secrets.json + .history/ - cleanupOrgFilesystem: lstat symlink-hijack defense, two-phase rename-then-delete, dropped force:true on rm to surface real errors, removes one // subtree instead of per-domain loop - New reseed_all_orgs.ts internal action — cursor-loop pagination, sorted slugs, per-org try/catch, structured per-org return shape Platform consumers (previously hardcoding the old layout): - server.ts + vite-plugins/serve-branding-images.ts: branding-images path → default/branding/images (would have 404'd post-rewrite) - lib/config-watcher.ts: parseConfigChange rewritten for // shape (SSE invalidation would have silently dropped events for non-default orgs otherwise) - config_store/store.ts: orgFirst option; retention flipped to /retention.json with per-org-dir list() enumeration Bash entrypoint (services/convex/docker-entrypoint.sh): - mkdir creates only convex/ + default/ (legacy per-domain dirs gone) - All run_seed loops retargeted source /app/builtin/default/ → dest /app/data/default/; new branding seed loop closes a long-standing gap - atomic_cp helper (cp tmp + mv) for crash safety - Marker name carries -orgfirst layout token so downgrade re-seeds cleanly into legacy paths CLI (tools/cli/): - Delete the entire lib/upgrade/ auto-migration framework + four importers (deploy/start/update/init); -y/--yes on `tale start` kept as hidden no-op + warn-once for one-release back-compat - tale init: scaffolds default//... (was flat); recursive gitignore globs (**/.history/, **/*.secrets.json); OpenRouter secret lands at default/providers/openrouter.secrets.json - tale deploy --override: rewrites to 1:1 host→/app/data/ push with allowlist filter (org-slug regex + reserved-domain-name denylist to detect legacy flat layout). Naive blocklist would have shipped .env / .git/ / .tale/ into /app/data/. - New tale deploy --override-all (implies --all): runs server-side reseed via docker exec -i bash -s into the proven scripts/2026-03-28-migrate-convex-data.sh:120-131 pattern; TTY-gated confirm; non-zero exit on any per-org failure. - New tale migrate config-layout [--dry-run] [--cleanup-old]: cp (not mv) so old paths stay readable for rollback; baked-in script.sh piped to docker exec -i bash -s; --cleanup-old sha-verifies before unlinking - exec.ts: stdin support for the bash-via-stdin pattern Tests: rewrote scaffold.test.ts (28 cases incl. override per-domain semantics, symlink defenses, copy-onto-self guard, retention single file, cleanupOrgFilesystem symlink hijack); rewrote skills/file_utils.test.ts and branding/queries.test.ts for org-first; retention.test.ts uses new example path. embedded-files.ts regenerated. bun run check passes: 36/36 tasks, 70927 tests, zero lint warnings. Operator runbook (2 commands, zero downtime): 1. tale migrate config-layout — copies providers/*.secrets.json to new paths; old paths preserved 2. tale deploy --override-all -y — implies --all; recreates convex with new entrypoint, then triggers reseed-all-orgs action 3. (Optional) tale migrate config-layout --cleanup-old — sha-verifies new == old, unlinks olds (rollback insurance until then) Behavior change — per-domain env overrides (AGENTS_DIR etc.) are no longer honored. Operators with custom paths must set TALE_CONFIG_DIR to a root and use the // subtree. --- .dockerignore | 2 +- docs/de/develop/integrations.md | 2 +- docs/de/platform/integrations/overview.md | 4 +- docs/de/platform/models.md | 4 +- .../de/self-hosted/configuration/providers.md | 2 +- docs/en/develop/integrations.md | 2 +- docs/en/platform/integrations/overview.md | 4 +- docs/en/platform/models.md | 4 +- .../en/self-hosted/configuration/providers.md | 2 +- docs/fr/develop/integrations.md | 2 +- docs/fr/platform/integrations/overview.md | 4 +- docs/fr/platform/models.md | 4 +- .../fr/self-hosted/configuration/providers.md | 2 +- examples/{ => default}/agents/chat-agent.json | 0 .../{ => default}/agents/crm-assistant.json | 0 .../{ => default}/agents/image-creator.json | 0 .../agents/integration-assistant.json | 0 examples/{ => default}/agents/researcher.json | 0 examples/{ => default}/agents/translator.json | 0 .../agents/workflow-assistant.json | 0 examples/{ => default}/branding/branding.json | 0 .../integrations/ai-image/config.json | 0 .../integrations/ai-image/connector.ts | 0 .../integrations/ai-image/icon.svg | 0 .../integrations/circuly/config.json | 0 .../integrations/circuly/connector.ts | 0 .../integrations/circuly/icon.svg | 0 .../integrations/discord/config.json | 0 .../integrations/discord/connector.ts | 0 .../integrations/discord/icon.svg | 0 .../integrations/github/config.json | 0 .../integrations/github/connector.ts | 0 .../integrations/github/icon.svg | 0 .../integrations/gmail/config.json | 0 .../integrations/gmail/connector.ts | 0 .../{ => default}/integrations/gmail/icon.svg | 0 .../integrations/google_drive/config.json | 0 .../integrations/google_drive/connector.ts | 0 .../integrations/google_drive/icon.svg | 0 .../integrations/outlook/config.json | 0 .../integrations/outlook/connector.ts | 0 .../integrations/outlook/icon.svg | 0 .../integrations/protel/config.json | 0 .../integrations/protel/icon.svg | 0 .../integrations/shopify/config.json | 0 .../integrations/shopify/connector.ts | 0 .../integrations/shopify/icon.svg | 0 .../integrations/slack/config.json | 0 .../integrations/slack/connector.ts | 0 .../{ => default}/integrations/slack/icon.svg | 0 .../integrations/tavily/config.json | 0 .../integrations/tavily/connector.ts | 0 .../integrations/tavily/icon.svg | 0 .../integrations/teams/config.json | 0 .../integrations/teams/connector.ts | 0 .../{ => default}/integrations/teams/icon.svg | 0 .../integrations/twilio/config.json | 0 .../integrations/twilio/connector.ts | 0 .../integrations/twilio/icon.svg | 0 examples/{ => default}/providers/openai.json | 0 .../{ => default}/providers/openrouter.json | 0 .../providers/vercel-gateway.json | 0 .../default.json => default/retention.json} | 0 .../{ => default}/skills/pptx/LICENSE.txt | 0 examples/{ => default}/skills/pptx/SKILL.md | 0 examples/{ => default}/skills/pptx/editing.md | 0 .../{ => default}/skills/pptx/pptxgenjs.md | 0 .../skills/pptx/scripts/__init__.py | 0 .../skills/pptx/scripts/add_slide.py | 0 .../skills/pptx/scripts/clean.py | 0 .../pptx/scripts/office/helpers/__init__.py | 0 .../pptx/scripts/office/helpers/merge_runs.py | 0 .../office/helpers/simplify_redlines.py | 0 .../skills/pptx/scripts/office/pack.py | 0 .../schemas/ISO-IEC29500-4_2016/dml-chart.xsd | 0 .../ISO-IEC29500-4_2016/dml-chartDrawing.xsd | 0 .../ISO-IEC29500-4_2016/dml-diagram.xsd | 0 .../ISO-IEC29500-4_2016/dml-lockedCanvas.xsd | 0 .../schemas/ISO-IEC29500-4_2016/dml-main.xsd | 0 .../ISO-IEC29500-4_2016/dml-picture.xsd | 0 .../dml-spreadsheetDrawing.xsd | 0 .../dml-wordprocessingDrawing.xsd | 0 .../schemas/ISO-IEC29500-4_2016/pml.xsd | 0 .../shared-additionalCharacteristics.xsd | 0 .../shared-bibliography.xsd | 0 .../shared-commonSimpleTypes.xsd | 0 .../shared-customXmlDataProperties.xsd | 0 .../shared-customXmlSchemaProperties.xsd | 0 .../shared-documentPropertiesCustom.xsd | 0 .../shared-documentPropertiesExtended.xsd | 0 .../shared-documentPropertiesVariantTypes.xsd | 0 .../ISO-IEC29500-4_2016/shared-math.xsd | 0 .../shared-relationshipReference.xsd | 0 .../schemas/ISO-IEC29500-4_2016/sml.xsd | 0 .../schemas/ISO-IEC29500-4_2016/vml-main.xsd | 0 .../ISO-IEC29500-4_2016/vml-officeDrawing.xsd | 0 .../vml-presentationDrawing.xsd | 0 .../vml-spreadsheetDrawing.xsd | 0 .../vml-wordprocessingDrawing.xsd | 0 .../schemas/ISO-IEC29500-4_2016/wml.xsd | 0 .../schemas/ISO-IEC29500-4_2016/xml.xsd | 0 .../ecma/fouth-edition/opc-contentTypes.xsd | 0 .../ecma/fouth-edition/opc-coreProperties.xsd | 0 .../schemas/ecma/fouth-edition/opc-digSig.xsd | 0 .../ecma/fouth-edition/opc-relationships.xsd | 0 .../pptx/scripts/office/schemas/mce/mc.xsd | 0 .../office/schemas/microsoft/wml-2010.xsd | 0 .../office/schemas/microsoft/wml-2012.xsd | 0 .../office/schemas/microsoft/wml-2018.xsd | 0 .../office/schemas/microsoft/wml-cex-2018.xsd | 0 .../office/schemas/microsoft/wml-cid-2016.xsd | 0 .../microsoft/wml-sdtdatahash-2020.xsd | 0 .../schemas/microsoft/wml-symex-2015.xsd | 0 .../skills/pptx/scripts/office/soffice.py | 0 .../skills/pptx/scripts/office/unpack.py | 0 .../skills/pptx/scripts/office/validate.py | 0 .../scripts/office/validators/__init__.py | 0 .../pptx/scripts/office/validators/base.py | 0 .../pptx/scripts/office/validators/docx.py | 0 .../pptx/scripts/office/validators/pptx.py | 0 .../scripts/office/validators/redlining.py | 0 .../skills/pptx/scripts/thumbnail.py | 0 .../workflows/circuly/sync-customers.json | 0 .../workflows/circuly/sync-products.json | 0 .../workflows/circuly/sync-subscriptions.json | 0 .../general/conversation-auto-archive.json | 0 .../workflows/general/conversation-sync.json | 0 .../general/customer-status-assessment.json | 0 .../workflows/general/document-rag-sync.json | 0 .../product-relationship-analysis.json | 0 .../workflows/gmail/email-sync.json | 0 .../workflows/google_drive/sync.json | 0 .../workflows/onedrive/sync.json | 0 .../workflows/outlook/email-sync.json | 0 .../workflows/shopify/sync-customers.json | 0 .../workflows/shopify/sync-products.json | 0 services/convex/Dockerfile | 12 +- services/convex/docker-entrypoint.sh | 143 +++-- .../constants/integration-templates.ts | 2 +- services/platform/convex/_generated/api.d.ts | 2 + services/platform/convex/agents/file_utils.ts | 16 +- .../platform/convex/branding/file_actions.ts | 10 +- .../platform/convex/branding/file_utils.ts | 18 +- .../platform/convex/branding/queries.test.ts | 24 +- .../convex/governance/retention_actions.ts | 4 +- .../governance/retention_bounds_proposal.ts | 2 +- .../convex/governance/retention_floors.ts | 2 +- .../convex/integrations/file_utils.ts | 23 +- .../convex/lib/config_store/actions.ts | 3 + .../platform/convex/lib/config_store/store.ts | 92 ++- .../gmail_draft_filtering.test.ts | 2 +- .../outlook_draft_filtering.test.ts | 2 +- .../convex/organizations/reseed_all_orgs.ts | 106 ++++ .../convex/organizations/scaffold.test.ts | 433 ++++++++------ .../platform/convex/organizations/scaffold.ts | 530 ++++++++++++------ .../platform/convex/providers/file_utils.ts | 14 +- .../platform/convex/skills/file_actions.ts | 6 +- .../platform/convex/skills/file_utils.test.ts | 65 ++- services/platform/convex/skills/file_utils.ts | 29 +- .../platform/convex/workflows/file_utils.ts | 22 +- services/platform/docker-entrypoint.sh | 41 +- services/platform/env.sh | 9 +- services/platform/lib/config-watcher.ts | 56 +- .../platform/lib/shared/schemas/governance.ts | 2 +- .../lib/shared/schemas/retention.test.ts | 8 +- .../utils/example-agents-normalized.test.ts | 20 +- services/platform/server.ts | 5 +- .../vite-plugins/serve-branding-images.ts | 6 +- tools/cli/src/commands/deploy/index.ts | 22 +- tools/cli/src/commands/migrate.ts | 41 ++ tools/cli/src/commands/start/index.ts | 71 ++- tools/cli/src/index.ts | 2 + tools/cli/src/lib/actions/deploy.ts | 302 +++++----- tools/cli/src/lib/actions/init.ts | 110 ++-- .../src/lib/actions/migrate-config-layout.ts | 104 ++++ tools/cli/src/lib/actions/reseed-all-orgs.ts | 116 ++++ tools/cli/src/lib/actions/start.ts | 66 +-- tools/cli/src/lib/actions/update.ts | 21 +- tools/cli/src/lib/docker/exec.ts | 37 +- .../src/lib/migrate-config-layout/script.sh | 173 ++++++ tools/cli/src/lib/project/fetch-reference.ts | 8 +- .../migrations/adopt-convex-stateful.ts | 67 --- .../migrations/namespace-caddy-config.ts | 77 --- .../upgrade/migrations/namespace-volumes.ts | 171 ------ .../lib/upgrade/migrations/split-convex.ts | 154 ----- tools/cli/src/lib/upgrade/registry.ts | 23 - tools/cli/src/lib/upgrade/runner.test.ts | 230 -------- tools/cli/src/lib/upgrade/runner.ts | 199 ------- tools/cli/src/lib/upgrade/state.ts | 94 ---- tools/cli/src/lib/upgrade/types.ts | 71 --- tools/cli/src/lib/upgrade/volume-helpers.ts | 333 ----------- 191 files changed, 1907 insertions(+), 2332 deletions(-) rename examples/{ => default}/agents/chat-agent.json (100%) rename examples/{ => default}/agents/crm-assistant.json (100%) rename examples/{ => default}/agents/image-creator.json (100%) rename examples/{ => default}/agents/integration-assistant.json (100%) rename examples/{ => default}/agents/researcher.json (100%) rename examples/{ => default}/agents/translator.json (100%) rename examples/{ => default}/agents/workflow-assistant.json (100%) rename examples/{ => default}/branding/branding.json (100%) rename examples/{ => default}/integrations/ai-image/config.json (100%) rename examples/{ => default}/integrations/ai-image/connector.ts (100%) rename examples/{ => default}/integrations/ai-image/icon.svg (100%) rename examples/{ => default}/integrations/circuly/config.json (100%) rename examples/{ => default}/integrations/circuly/connector.ts (100%) rename examples/{ => default}/integrations/circuly/icon.svg (100%) rename examples/{ => default}/integrations/discord/config.json (100%) rename examples/{ => default}/integrations/discord/connector.ts (100%) rename examples/{ => default}/integrations/discord/icon.svg (100%) rename examples/{ => default}/integrations/github/config.json (100%) rename examples/{ => default}/integrations/github/connector.ts (100%) rename examples/{ => default}/integrations/github/icon.svg (100%) rename examples/{ => default}/integrations/gmail/config.json (100%) rename examples/{ => default}/integrations/gmail/connector.ts (100%) rename examples/{ => default}/integrations/gmail/icon.svg (100%) rename examples/{ => default}/integrations/google_drive/config.json (100%) rename examples/{ => default}/integrations/google_drive/connector.ts (100%) rename examples/{ => default}/integrations/google_drive/icon.svg (100%) rename examples/{ => default}/integrations/outlook/config.json (100%) rename examples/{ => default}/integrations/outlook/connector.ts (100%) rename examples/{ => default}/integrations/outlook/icon.svg (100%) rename examples/{ => default}/integrations/protel/config.json (100%) rename examples/{ => default}/integrations/protel/icon.svg (100%) rename examples/{ => default}/integrations/shopify/config.json (100%) rename examples/{ => default}/integrations/shopify/connector.ts (100%) rename examples/{ => default}/integrations/shopify/icon.svg (100%) rename examples/{ => default}/integrations/slack/config.json (100%) rename examples/{ => default}/integrations/slack/connector.ts (100%) rename examples/{ => default}/integrations/slack/icon.svg (100%) rename examples/{ => default}/integrations/tavily/config.json (100%) rename examples/{ => default}/integrations/tavily/connector.ts (100%) rename examples/{ => default}/integrations/tavily/icon.svg (100%) rename examples/{ => default}/integrations/teams/config.json (100%) rename examples/{ => default}/integrations/teams/connector.ts (100%) rename examples/{ => default}/integrations/teams/icon.svg (100%) rename examples/{ => default}/integrations/twilio/config.json (100%) rename examples/{ => default}/integrations/twilio/connector.ts (100%) rename examples/{ => default}/integrations/twilio/icon.svg (100%) rename examples/{ => default}/providers/openai.json (100%) rename examples/{ => default}/providers/openrouter.json (100%) rename examples/{ => default}/providers/vercel-gateway.json (100%) rename examples/{retention/default.json => default/retention.json} (100%) rename examples/{ => default}/skills/pptx/LICENSE.txt (100%) rename examples/{ => default}/skills/pptx/SKILL.md (100%) rename examples/{ => default}/skills/pptx/editing.md (100%) rename examples/{ => default}/skills/pptx/pptxgenjs.md (100%) rename examples/{ => default}/skills/pptx/scripts/__init__.py (100%) rename examples/{ => default}/skills/pptx/scripts/add_slide.py (100%) rename examples/{ => default}/skills/pptx/scripts/clean.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/helpers/__init__.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/helpers/merge_runs.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/helpers/simplify_redlines.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/pack.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/mce/mc.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd (100%) rename examples/{ => default}/skills/pptx/scripts/office/soffice.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/unpack.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/validate.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/validators/__init__.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/validators/base.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/validators/docx.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/validators/pptx.py (100%) rename examples/{ => default}/skills/pptx/scripts/office/validators/redlining.py (100%) rename examples/{ => default}/skills/pptx/scripts/thumbnail.py (100%) rename examples/{ => default}/workflows/circuly/sync-customers.json (100%) rename examples/{ => default}/workflows/circuly/sync-products.json (100%) rename examples/{ => default}/workflows/circuly/sync-subscriptions.json (100%) rename examples/{ => default}/workflows/general/conversation-auto-archive.json (100%) rename examples/{ => default}/workflows/general/conversation-sync.json (100%) rename examples/{ => default}/workflows/general/customer-status-assessment.json (100%) rename examples/{ => default}/workflows/general/document-rag-sync.json (100%) rename examples/{ => default}/workflows/general/product-relationship-analysis.json (100%) rename examples/{ => default}/workflows/gmail/email-sync.json (100%) rename examples/{ => default}/workflows/google_drive/sync.json (100%) rename examples/{ => default}/workflows/onedrive/sync.json (100%) rename examples/{ => default}/workflows/outlook/email-sync.json (100%) rename examples/{ => default}/workflows/shopify/sync-customers.json (100%) rename examples/{ => default}/workflows/shopify/sync-products.json (100%) create mode 100644 services/platform/convex/organizations/reseed_all_orgs.ts create mode 100644 tools/cli/src/commands/migrate.ts create mode 100644 tools/cli/src/lib/actions/migrate-config-layout.ts create mode 100644 tools/cli/src/lib/actions/reseed-all-orgs.ts create mode 100644 tools/cli/src/lib/migrate-config-layout/script.sh delete mode 100644 tools/cli/src/lib/upgrade/migrations/adopt-convex-stateful.ts delete mode 100644 tools/cli/src/lib/upgrade/migrations/namespace-caddy-config.ts delete mode 100644 tools/cli/src/lib/upgrade/migrations/namespace-volumes.ts delete mode 100644 tools/cli/src/lib/upgrade/migrations/split-convex.ts delete mode 100644 tools/cli/src/lib/upgrade/registry.ts delete mode 100644 tools/cli/src/lib/upgrade/runner.test.ts delete mode 100644 tools/cli/src/lib/upgrade/runner.ts delete mode 100644 tools/cli/src/lib/upgrade/state.ts delete mode 100644 tools/cli/src/lib/upgrade/types.ts delete mode 100644 tools/cli/src/lib/upgrade/volume-helpers.ts diff --git a/.dockerignore b/.dockerignore index 6b2f4d3fec..4f2099eb36 100644 --- a/.dockerignore +++ b/.dockerignore @@ -22,7 +22,7 @@ services/platform/.env*.local # Provider secrets # ============================================================================ # *.secrets.json carries credentials in either form (SOPS-encrypted or -# plaintext). The convex image ships seeds from examples/providers/, but +# plaintext). The convex image ships seeds from examples/default/providers/, but # the secrets siblings should never bake into a layer — the entrypoint # already filters them at seed time, but exclude them from the build # context entirely so they cannot leak via image inspection. diff --git a/docs/de/develop/integrations.md b/docs/de/develop/integrations.md index e55bd8a932..fbc213ece1 100644 --- a/docs/de/develop/integrations.md +++ b/docs/de/develop/integrations.md @@ -49,7 +49,7 @@ Die Operation taucht auf Agents als Tool-Familie auf, sobald die Org Credentials | MCP-Server | Die Brücke muss ein langlebiger Prozess sein — lokale Dateien, eine eigene CLI, ein System, das aus dem Netz von Tale unerreichbar ist. | | Connector-TS | Das REST-Manifest deckt 80 % der API ab, aber eine Operation braucht Response-Formung, die das Manifest nicht deklarieren kann. | -Die ausgelieferten Integrations unter [Platform > Integrations](/de/platform/integrations/overview) sind der Katalog der REST-Manifeste, die Tale ausliefert — lies ihre Configs in `examples/integrations/` für die Muster, die du kopierst. +Die ausgelieferten Integrations unter [Platform > Integrations](/de/platform/integrations/overview) sind der Katalog der REST-Manifeste, die Tale ausliefert — lies ihre Configs in `examples/default/integrations/` für die Muster, die du kopierst. ## SQL-Adapter diff --git a/docs/de/platform/integrations/overview.md b/docs/de/platform/integrations/overview.md index 6a7c3ea899..2b56f09d82 100644 --- a/docs/de/platform/integrations/overview.md +++ b/docs/de/platform/integrations/overview.md @@ -5,7 +5,7 @@ description: Drittsysteme, aus denen Tale liest und in die es schreibt — Kommu Integrationen sind die Brücken zwischen Tale und dem Rest deines Stacks. Agents rufen sie als Tools auf, Workflows triggern sie an Schritten, und die Dokumenten-Pipeline zieht Dateien aus ihnen. Jede Integration ist eine einzige JSON-Konfiguration plus eine Credential, die die Org einmal speichert; einmal verbunden, kann alles in Tale sie ohne erneute Authentifizierung nutzen. Diese Übersicht benennt die ausgelieferten Integrationen, gruppiert danach, was sie tun. -Die Form einer Integration ist über jeden Eintrag unten gleich — eine OpenAI-kompatible REST-Oberfläche oder ein OAuth2-Tanz, mit in einer JSON-Konfiguration unter `examples/integrations/` deklarierten Operationen. Benutzerdefinierte Integrationen folgen derselben Form; eine Code-Änderung brauchst du nicht, um eine hinzuzufügen. +Die Form einer Integration ist über jeden Eintrag unten gleich — eine OpenAI-kompatible REST-Oberfläche oder ein OAuth2-Tanz, mit in einer JSON-Konfiguration unter `examples/default/integrations/` deklarierten Operationen. Benutzerdefinierte Integrationen folgen derselben Form; eine Code-Änderung brauchst du nicht, um eine hinzuzufügen. ## Wie Integrationen sich von MCP unterscheiden @@ -65,7 +65,7 @@ Microsoft 365 deckt auch Identität ab. Sie unter **Einstellungen > Integratione ## Eine eigene Integration hinzufügen -Eigene Integrationen folgen derselben JSON-Form wie die oben. Leg eine Konfiguration in `TALE_CONFIG_DIR/integrations//config.json` ab, die die Operationen, die Auth-Methode und die erlaubten Hosts deklariert; die Integration erscheint in **Einstellungen > Integrationen**, damit User sie verbinden können. Die Form und die Validierungsregeln leben neben den ausgelieferten Konfigurationen in `examples/integrations/`. +Eigene Integrationen folgen derselben JSON-Form wie die oben. Leg eine Konfiguration in `TALE_CONFIG_DIR/integrations//config.json` ab, die die Operationen, die Auth-Methode und die erlaubten Hosts deklariert; die Integration erscheint in **Einstellungen > Integrationen**, damit User sie verbinden können. Die Form und die Validierungsregeln leben neben den ausgelieferten Konfigurationen in `examples/default/integrations/`. Für reichere oder selbst gehostete Brücken sind [MCP-Server](/de/platform/integrations/mcp-servers) die alternative Oberfläche — jeder MCP-Server, den du registrierst, fügt seine Tools dem Agent-Werkzeuggürtel hinzu mit pro-Tool-Genehmigung. diff --git a/docs/de/platform/models.md b/docs/de/platform/models.md index f0e582366b..60c060bad8 100644 --- a/docs/de/platform/models.md +++ b/docs/de/platform/models.md @@ -3,9 +3,9 @@ title: Modelle out of the box description: Welche Provider und Modelle eine frische Tale-Instanz mitbringt — OpenRouter für Chat und Vision, OpenAI für Sprache, Vercel AI Gateway für Bildgenerierung. --- -Eine frische Tale-Instanz bringt drei konfigurierte Provider mit: OpenRouter für Chat, Vision und Embeddings; OpenAI für Speech-to-Text und Text-to-Speech; Vercel AI Gateway für Bildgenerierung. Die Default-Agents in `examples/agents/` greifen auf Modelle in einem dieser drei Buckets zu, und die meisten Teams bleiben wochenlang bei den Defaults, bevor sie etwas tauschen. Diese Seite listet, was ausgeliefert wird, und verlinkt auf den vollen Katalog jedes Providers. +Eine frische Tale-Instanz bringt drei konfigurierte Provider mit: OpenRouter für Chat, Vision und Embeddings; OpenAI für Speech-to-Text und Text-to-Speech; Vercel AI Gateway für Bildgenerierung. Die Default-Agents in `examples/default/agents/` greifen auf Modelle in einem dieser drei Buckets zu, und die meisten Teams bleiben wochenlang bei den Defaults, bevor sie etwas tauschen. Diese Seite listet, was ausgeliefert wird, und verlinkt auf den vollen Katalog jedes Providers. -Modelle driften schneller als Docs. Die Listen unten stimmen zum Zeitpunkt, an dem `examples/providers/*.json` geschrieben wurde; die kanonische Wahrheit sind die JSON-Dateien, und das kanonische „was heute erreichbar ist" zeigt die Seite **Einstellungen > Provider** auf deiner Instanz. +Modelle driften schneller als Docs. Die Listen unten stimmen zum Zeitpunkt, an dem `examples/default/providers/*.json` geschrieben wurde; die kanonische Wahrheit sind die JSON-Dateien, und das kanonische „was heute erreichbar ist" zeigt die Seite **Einstellungen > Provider** auf deiner Instanz. ## Die drei Provider diff --git a/docs/de/self-hosted/configuration/providers.md b/docs/de/self-hosted/configuration/providers.md index de714067a9..f8e904321d 100644 --- a/docs/de/self-hosted/configuration/providers.md +++ b/docs/de/self-hosted/configuration/providers.md @@ -31,7 +31,7 @@ Die Referenz ist das Dateiformat auf Platte und die Reihenfolge der Operationen, } ``` -Die vollständige Menge der Felder lebt in [`examples/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json` und `vercel-gateway.json` decken die drei Formen ab, die du wahrscheinlich brauchst. +Die vollständige Menge der Felder lebt in [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json` und `vercel-gateway.json` decken die drei Formen ab, die du wahrscheinlich brauchst. ## Die Secrets-Datei diff --git a/docs/en/develop/integrations.md b/docs/en/develop/integrations.md index b24f5e5aa7..7a6a195ecc 100644 --- a/docs/en/develop/integrations.md +++ b/docs/en/develop/integrations.md @@ -49,7 +49,7 @@ The operation surfaces on agents as a tool family the moment the org connects cr | MCP server | The bridge needs to be a long-lived process — local files, a CLI you own, a system that cannot be reached from Tale's network. | | Connector TS | The REST manifest covers 80 % of the API but one operation needs response shaping the manifest cannot declare. | -The shipped integrations under [Platform > Integrations](/platform/integrations/overview) are the catalogue of REST manifests Tale ships — read their configs in `examples/integrations/` for the patterns you will copy. +The shipped integrations under [Platform > Integrations](/platform/integrations/overview) are the catalogue of REST manifests Tale ships — read their configs in `examples/default/integrations/` for the patterns you will copy. ## SQL adapters diff --git a/docs/en/platform/integrations/overview.md b/docs/en/platform/integrations/overview.md index 937f93f647..788d78febb 100644 --- a/docs/en/platform/integrations/overview.md +++ b/docs/en/platform/integrations/overview.md @@ -5,7 +5,7 @@ description: Third-party systems Tale can read from and write to — communicati Integrations are the bridges between Tale and the rest of your stack. Agents call them as tools, workflows trigger them at steps, and the documents pipeline pulls files from them. Each integration is a single JSON config plus a credential the org stores once; once connected, anything in Tale can use it without re-authentication. This overview names the shipped integrations grouped by what they do. -The shape of an integration is the same across every entry below — an OpenAI-compatible REST surface or an OAuth2 dance, with operations declared in a JSON config under `examples/integrations/`. Custom integrations follow the same shape; you do not need a code change to add one. +The shape of an integration is the same across every entry below — an OpenAI-compatible REST surface or an OAuth2 dance, with operations declared in a JSON config under `examples/default/integrations/`. Custom integrations follow the same shape; you do not need a code change to add one. ## How integrations differ from MCP @@ -65,7 +65,7 @@ Microsoft 365 also covers identity. Connecting it under **Settings > Integration ## Adding a custom integration -Custom integrations follow the same JSON shape as the ones above. Drop a config into `TALE_CONFIG_DIR/integrations//config.json` declaring the operations, auth method, and allowed hosts; the integration appears in **Settings > Integrations** for users to connect. The shape and validation rules live alongside the shipped configs in `examples/integrations/`. +Custom integrations follow the same JSON shape as the ones above. Drop a config into `TALE_CONFIG_DIR/integrations//config.json` declaring the operations, auth method, and allowed hosts; the integration appears in **Settings > Integrations** for users to connect. The shape and validation rules live alongside the shipped configs in `examples/default/integrations/`. For richer or self-hosted bridges, [MCP servers](/platform/integrations/mcp-servers) are the alternative surface — every MCP server you register adds its tools to the agent toolbelt with per-tool approval. diff --git a/docs/en/platform/models.md b/docs/en/platform/models.md index ab2e8e46b9..a6d94e6b83 100644 --- a/docs/en/platform/models.md +++ b/docs/en/platform/models.md @@ -3,9 +3,9 @@ title: Models out of the box description: Which providers and models a fresh Tale instance ships with — OpenRouter for chat and vision, OpenAI for voice, Vercel AI Gateway for image generation. --- -A fresh Tale instance ships with three providers configured: OpenRouter for chat, vision, and embeddings; OpenAI for speech-to-text and text-to-speech; Vercel AI Gateway for image generation. The default agents in `examples/agents/` reach for models in one of those three buckets, and most teams stay on the defaults for weeks before swapping anything. This page lists what is shipped and links to each provider's full catalogue. +A fresh Tale instance ships with three providers configured: OpenRouter for chat, vision, and embeddings; OpenAI for speech-to-text and text-to-speech; Vercel AI Gateway for image generation. The default agents in `examples/default/agents/` reach for models in one of those three buckets, and most teams stay on the defaults for weeks before swapping anything. This page lists what is shipped and links to each provider's full catalogue. -Models drift faster than docs. The lists below are correct at the time `examples/providers/*.json` was written; the canonical truth is the JSON files, and the canonical "what is reachable today" is what the **Settings > Providers** page shows on your instance. +Models drift faster than docs. The lists below are correct at the time `examples/default/providers/*.json` was written; the canonical truth is the JSON files, and the canonical "what is reachable today" is what the **Settings > Providers** page shows on your instance. ## The three providers diff --git a/docs/en/self-hosted/configuration/providers.md b/docs/en/self-hosted/configuration/providers.md index fe9f005d32..799f97a0f2 100644 --- a/docs/en/self-hosted/configuration/providers.md +++ b/docs/en/self-hosted/configuration/providers.md @@ -31,7 +31,7 @@ The reference is the file format on disk and the order operations follow when ad } ``` -The full set of fields lives in [`examples/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json`, and `vercel-gateway.json` cover the three shapes you are likely to need. +The full set of fields lives in [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json`, and `vercel-gateway.json` cover the three shapes you are likely to need. ## The secrets file diff --git a/docs/fr/develop/integrations.md b/docs/fr/develop/integrations.md index bf7393aebe..0b266ed580 100644 --- a/docs/fr/develop/integrations.md +++ b/docs/fr/develop/integrations.md @@ -49,7 +49,7 @@ L'operation apparaît sur les agents comme une famille de tools dès que l'org b | Serveur MCP | Le pont doit être un processus de longue durée — fichiers locaux, une CLI à toi, un système inatteignable depuis le réseau de Tale. | | Connecteur TS | Le manifeste REST couvre 80 % de l'API mais une operation a besoin d'une mise en forme que le manifeste ne sait pas déclarer. | -Les intégrations livrées sous [Platform > Intégrations](/fr/platform/integrations/overview) sont le catalogue des manifestes REST que Tale livre — lis leurs configs dans `examples/integrations/` pour les motifs que tu copieras. +Les intégrations livrées sous [Platform > Intégrations](/fr/platform/integrations/overview) sont le catalogue des manifestes REST que Tale livre — lis leurs configs dans `examples/default/integrations/` pour les motifs que tu copieras. ## Adaptateurs SQL diff --git a/docs/fr/platform/integrations/overview.md b/docs/fr/platform/integrations/overview.md index ab3b7c0e15..cd4d1ecefb 100644 --- a/docs/fr/platform/integrations/overview.md +++ b/docs/fr/platform/integrations/overview.md @@ -5,7 +5,7 @@ description: Systèmes tiers que Tale lit et écrit — communication, stockage, Les intégrations sont les ponts entre Tale et le reste de ta pile. Les agents les appellent comme outils, les workflows les déclenchent à des étapes, et la pipeline de documents en tire des fichiers. Chaque intégration est une seule configuration JSON plus un identifiant que l'organisation enregistre une fois ; une fois connectée, n'importe quoi dans Tale peut l'utiliser sans nouvelle authentification. Cet aperçu nomme les intégrations livrées, groupées par ce qu'elles font. -La forme d'une intégration est la même pour chaque entrée ci-dessous — une surface REST compatible OpenAI ou une danse OAuth2, avec des opérations déclarées dans une configuration JSON sous `examples/integrations/`. Les intégrations personnalisées suivent la même forme ; tu n'as pas besoin de modifier le code pour en ajouter une. +La forme d'une intégration est la même pour chaque entrée ci-dessous — une surface REST compatible OpenAI ou une danse OAuth2, avec des opérations déclarées dans une configuration JSON sous `examples/default/integrations/`. Les intégrations personnalisées suivent la même forme ; tu n'as pas besoin de modifier le code pour en ajouter une. ## En quoi les intégrations diffèrent de MCP @@ -65,7 +65,7 @@ Microsoft 365 couvre aussi l'identité. La connecter sous **Paramètres > Intég ## Ajouter une intégration personnalisée -Les intégrations personnalisées suivent la même forme JSON que celles ci-dessus. Dépose une configuration dans `TALE_CONFIG_DIR/integrations//config.json` déclarant les opérations, la méthode d'auth et les hôtes autorisés ; l'intégration apparaît sous **Paramètres > Intégrations** pour que les utilisateurs la connectent. La forme et les règles de validation vivent à côté des configurations livrées dans `examples/integrations/`. +Les intégrations personnalisées suivent la même forme JSON que celles ci-dessus. Dépose une configuration dans `TALE_CONFIG_DIR/integrations//config.json` déclarant les opérations, la méthode d'auth et les hôtes autorisés ; l'intégration apparaît sous **Paramètres > Intégrations** pour que les utilisateurs la connectent. La forme et les règles de validation vivent à côté des configurations livrées dans `examples/default/integrations/`. Pour des ponts plus riches ou auto-hébergés, les [serveurs MCP](/fr/platform/integrations/mcp-servers) sont la surface alternative — chaque serveur MCP que tu enregistres ajoute ses outils à la ceinture d'outils de l'agent avec approbation par outil. diff --git a/docs/fr/platform/models.md b/docs/fr/platform/models.md index 84cddaf9f5..ba1180e796 100644 --- a/docs/fr/platform/models.md +++ b/docs/fr/platform/models.md @@ -3,9 +3,9 @@ title: Modèles livrés en standard description: Quels fournisseurs et modèles une instance Tale toute neuve embarque — OpenRouter pour le chat et la vision, OpenAI pour la voix, Vercel AI Gateway pour la génération d'images. --- -Une instance Tale toute neuve embarque trois fournisseurs configurés : OpenRouter pour le chat, la vision et les embeddings ; OpenAI pour la reconnaissance et la synthèse vocales ; Vercel AI Gateway pour la génération d'images. Les agents par défaut dans `examples/agents/` puisent dans l'un de ces trois seaux, et la plupart des équipes restent sur les défauts pendant des semaines avant d'en changer. Cette page liste ce qui est livré et renvoie vers le catalogue complet de chaque fournisseur. +Une instance Tale toute neuve embarque trois fournisseurs configurés : OpenRouter pour le chat, la vision et les embeddings ; OpenAI pour la reconnaissance et la synthèse vocales ; Vercel AI Gateway pour la génération d'images. Les agents par défaut dans `examples/default/agents/` puisent dans l'un de ces trois seaux, et la plupart des équipes restent sur les défauts pendant des semaines avant d'en changer. Cette page liste ce qui est livré et renvoie vers le catalogue complet de chaque fournisseur. -Les modèles dérivent plus vite que la doc. Les listes ci-dessous sont correctes au moment où `examples/providers/*.json` a été écrit ; la vérité canonique, ce sont les fichiers JSON, et le « ce qui est joignable aujourd'hui » canonique est ce que montre la page **Paramètres > Providers** sur ton instance. +Les modèles dérivent plus vite que la doc. Les listes ci-dessous sont correctes au moment où `examples/default/providers/*.json` a été écrit ; la vérité canonique, ce sont les fichiers JSON, et le « ce qui est joignable aujourd'hui » canonique est ce que montre la page **Paramètres > Providers** sur ton instance. ## Les trois fournisseurs diff --git a/docs/fr/self-hosted/configuration/providers.md b/docs/fr/self-hosted/configuration/providers.md index 7f73ea9d34..a63161119c 100644 --- a/docs/fr/self-hosted/configuration/providers.md +++ b/docs/fr/self-hosted/configuration/providers.md @@ -31,7 +31,7 @@ La référence est le format de fichier sur disque et l'ordre des opérations à } ``` -L'ensemble complet des champs vit dans [`examples/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json` et `vercel-gateway.json` couvrent les trois formes dont tu auras probablement besoin. +L'ensemble complet des champs vit dans [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json` et `vercel-gateway.json` couvrent les trois formes dont tu auras probablement besoin. ## Le fichier de secrets diff --git a/examples/agents/chat-agent.json b/examples/default/agents/chat-agent.json similarity index 100% rename from examples/agents/chat-agent.json rename to examples/default/agents/chat-agent.json diff --git a/examples/agents/crm-assistant.json b/examples/default/agents/crm-assistant.json similarity index 100% rename from examples/agents/crm-assistant.json rename to examples/default/agents/crm-assistant.json diff --git a/examples/agents/image-creator.json b/examples/default/agents/image-creator.json similarity index 100% rename from examples/agents/image-creator.json rename to examples/default/agents/image-creator.json diff --git a/examples/agents/integration-assistant.json b/examples/default/agents/integration-assistant.json similarity index 100% rename from examples/agents/integration-assistant.json rename to examples/default/agents/integration-assistant.json diff --git a/examples/agents/researcher.json b/examples/default/agents/researcher.json similarity index 100% rename from examples/agents/researcher.json rename to examples/default/agents/researcher.json diff --git a/examples/agents/translator.json b/examples/default/agents/translator.json similarity index 100% rename from examples/agents/translator.json rename to examples/default/agents/translator.json diff --git a/examples/agents/workflow-assistant.json b/examples/default/agents/workflow-assistant.json similarity index 100% rename from examples/agents/workflow-assistant.json rename to examples/default/agents/workflow-assistant.json diff --git a/examples/branding/branding.json b/examples/default/branding/branding.json similarity index 100% rename from examples/branding/branding.json rename to examples/default/branding/branding.json diff --git a/examples/integrations/ai-image/config.json b/examples/default/integrations/ai-image/config.json similarity index 100% rename from examples/integrations/ai-image/config.json rename to examples/default/integrations/ai-image/config.json diff --git a/examples/integrations/ai-image/connector.ts b/examples/default/integrations/ai-image/connector.ts similarity index 100% rename from examples/integrations/ai-image/connector.ts rename to examples/default/integrations/ai-image/connector.ts diff --git a/examples/integrations/ai-image/icon.svg b/examples/default/integrations/ai-image/icon.svg similarity index 100% rename from examples/integrations/ai-image/icon.svg rename to examples/default/integrations/ai-image/icon.svg diff --git a/examples/integrations/circuly/config.json b/examples/default/integrations/circuly/config.json similarity index 100% rename from examples/integrations/circuly/config.json rename to examples/default/integrations/circuly/config.json diff --git a/examples/integrations/circuly/connector.ts b/examples/default/integrations/circuly/connector.ts similarity index 100% rename from examples/integrations/circuly/connector.ts rename to examples/default/integrations/circuly/connector.ts diff --git a/examples/integrations/circuly/icon.svg b/examples/default/integrations/circuly/icon.svg similarity index 100% rename from examples/integrations/circuly/icon.svg rename to examples/default/integrations/circuly/icon.svg diff --git a/examples/integrations/discord/config.json b/examples/default/integrations/discord/config.json similarity index 100% rename from examples/integrations/discord/config.json rename to examples/default/integrations/discord/config.json diff --git a/examples/integrations/discord/connector.ts b/examples/default/integrations/discord/connector.ts similarity index 100% rename from examples/integrations/discord/connector.ts rename to examples/default/integrations/discord/connector.ts diff --git a/examples/integrations/discord/icon.svg b/examples/default/integrations/discord/icon.svg similarity index 100% rename from examples/integrations/discord/icon.svg rename to examples/default/integrations/discord/icon.svg diff --git a/examples/integrations/github/config.json b/examples/default/integrations/github/config.json similarity index 100% rename from examples/integrations/github/config.json rename to examples/default/integrations/github/config.json diff --git a/examples/integrations/github/connector.ts b/examples/default/integrations/github/connector.ts similarity index 100% rename from examples/integrations/github/connector.ts rename to examples/default/integrations/github/connector.ts diff --git a/examples/integrations/github/icon.svg b/examples/default/integrations/github/icon.svg similarity index 100% rename from examples/integrations/github/icon.svg rename to examples/default/integrations/github/icon.svg diff --git a/examples/integrations/gmail/config.json b/examples/default/integrations/gmail/config.json similarity index 100% rename from examples/integrations/gmail/config.json rename to examples/default/integrations/gmail/config.json diff --git a/examples/integrations/gmail/connector.ts b/examples/default/integrations/gmail/connector.ts similarity index 100% rename from examples/integrations/gmail/connector.ts rename to examples/default/integrations/gmail/connector.ts diff --git a/examples/integrations/gmail/icon.svg b/examples/default/integrations/gmail/icon.svg similarity index 100% rename from examples/integrations/gmail/icon.svg rename to examples/default/integrations/gmail/icon.svg diff --git a/examples/integrations/google_drive/config.json b/examples/default/integrations/google_drive/config.json similarity index 100% rename from examples/integrations/google_drive/config.json rename to examples/default/integrations/google_drive/config.json diff --git a/examples/integrations/google_drive/connector.ts b/examples/default/integrations/google_drive/connector.ts similarity index 100% rename from examples/integrations/google_drive/connector.ts rename to examples/default/integrations/google_drive/connector.ts diff --git a/examples/integrations/google_drive/icon.svg b/examples/default/integrations/google_drive/icon.svg similarity index 100% rename from examples/integrations/google_drive/icon.svg rename to examples/default/integrations/google_drive/icon.svg diff --git a/examples/integrations/outlook/config.json b/examples/default/integrations/outlook/config.json similarity index 100% rename from examples/integrations/outlook/config.json rename to examples/default/integrations/outlook/config.json diff --git a/examples/integrations/outlook/connector.ts b/examples/default/integrations/outlook/connector.ts similarity index 100% rename from examples/integrations/outlook/connector.ts rename to examples/default/integrations/outlook/connector.ts diff --git a/examples/integrations/outlook/icon.svg b/examples/default/integrations/outlook/icon.svg similarity index 100% rename from examples/integrations/outlook/icon.svg rename to examples/default/integrations/outlook/icon.svg diff --git a/examples/integrations/protel/config.json b/examples/default/integrations/protel/config.json similarity index 100% rename from examples/integrations/protel/config.json rename to examples/default/integrations/protel/config.json diff --git a/examples/integrations/protel/icon.svg b/examples/default/integrations/protel/icon.svg similarity index 100% rename from examples/integrations/protel/icon.svg rename to examples/default/integrations/protel/icon.svg diff --git a/examples/integrations/shopify/config.json b/examples/default/integrations/shopify/config.json similarity index 100% rename from examples/integrations/shopify/config.json rename to examples/default/integrations/shopify/config.json diff --git a/examples/integrations/shopify/connector.ts b/examples/default/integrations/shopify/connector.ts similarity index 100% rename from examples/integrations/shopify/connector.ts rename to examples/default/integrations/shopify/connector.ts diff --git a/examples/integrations/shopify/icon.svg b/examples/default/integrations/shopify/icon.svg similarity index 100% rename from examples/integrations/shopify/icon.svg rename to examples/default/integrations/shopify/icon.svg diff --git a/examples/integrations/slack/config.json b/examples/default/integrations/slack/config.json similarity index 100% rename from examples/integrations/slack/config.json rename to examples/default/integrations/slack/config.json diff --git a/examples/integrations/slack/connector.ts b/examples/default/integrations/slack/connector.ts similarity index 100% rename from examples/integrations/slack/connector.ts rename to examples/default/integrations/slack/connector.ts diff --git a/examples/integrations/slack/icon.svg b/examples/default/integrations/slack/icon.svg similarity index 100% rename from examples/integrations/slack/icon.svg rename to examples/default/integrations/slack/icon.svg diff --git a/examples/integrations/tavily/config.json b/examples/default/integrations/tavily/config.json similarity index 100% rename from examples/integrations/tavily/config.json rename to examples/default/integrations/tavily/config.json diff --git a/examples/integrations/tavily/connector.ts b/examples/default/integrations/tavily/connector.ts similarity index 100% rename from examples/integrations/tavily/connector.ts rename to examples/default/integrations/tavily/connector.ts diff --git a/examples/integrations/tavily/icon.svg b/examples/default/integrations/tavily/icon.svg similarity index 100% rename from examples/integrations/tavily/icon.svg rename to examples/default/integrations/tavily/icon.svg diff --git a/examples/integrations/teams/config.json b/examples/default/integrations/teams/config.json similarity index 100% rename from examples/integrations/teams/config.json rename to examples/default/integrations/teams/config.json diff --git a/examples/integrations/teams/connector.ts b/examples/default/integrations/teams/connector.ts similarity index 100% rename from examples/integrations/teams/connector.ts rename to examples/default/integrations/teams/connector.ts diff --git a/examples/integrations/teams/icon.svg b/examples/default/integrations/teams/icon.svg similarity index 100% rename from examples/integrations/teams/icon.svg rename to examples/default/integrations/teams/icon.svg diff --git a/examples/integrations/twilio/config.json b/examples/default/integrations/twilio/config.json similarity index 100% rename from examples/integrations/twilio/config.json rename to examples/default/integrations/twilio/config.json diff --git a/examples/integrations/twilio/connector.ts b/examples/default/integrations/twilio/connector.ts similarity index 100% rename from examples/integrations/twilio/connector.ts rename to examples/default/integrations/twilio/connector.ts diff --git a/examples/integrations/twilio/icon.svg b/examples/default/integrations/twilio/icon.svg similarity index 100% rename from examples/integrations/twilio/icon.svg rename to examples/default/integrations/twilio/icon.svg diff --git a/examples/providers/openai.json b/examples/default/providers/openai.json similarity index 100% rename from examples/providers/openai.json rename to examples/default/providers/openai.json diff --git a/examples/providers/openrouter.json b/examples/default/providers/openrouter.json similarity index 100% rename from examples/providers/openrouter.json rename to examples/default/providers/openrouter.json diff --git a/examples/providers/vercel-gateway.json b/examples/default/providers/vercel-gateway.json similarity index 100% rename from examples/providers/vercel-gateway.json rename to examples/default/providers/vercel-gateway.json diff --git a/examples/retention/default.json b/examples/default/retention.json similarity index 100% rename from examples/retention/default.json rename to examples/default/retention.json diff --git a/examples/skills/pptx/LICENSE.txt b/examples/default/skills/pptx/LICENSE.txt similarity index 100% rename from examples/skills/pptx/LICENSE.txt rename to examples/default/skills/pptx/LICENSE.txt diff --git a/examples/skills/pptx/SKILL.md b/examples/default/skills/pptx/SKILL.md similarity index 100% rename from examples/skills/pptx/SKILL.md rename to examples/default/skills/pptx/SKILL.md diff --git a/examples/skills/pptx/editing.md b/examples/default/skills/pptx/editing.md similarity index 100% rename from examples/skills/pptx/editing.md rename to examples/default/skills/pptx/editing.md diff --git a/examples/skills/pptx/pptxgenjs.md b/examples/default/skills/pptx/pptxgenjs.md similarity index 100% rename from examples/skills/pptx/pptxgenjs.md rename to examples/default/skills/pptx/pptxgenjs.md diff --git a/examples/skills/pptx/scripts/__init__.py b/examples/default/skills/pptx/scripts/__init__.py similarity index 100% rename from examples/skills/pptx/scripts/__init__.py rename to examples/default/skills/pptx/scripts/__init__.py diff --git a/examples/skills/pptx/scripts/add_slide.py b/examples/default/skills/pptx/scripts/add_slide.py similarity index 100% rename from examples/skills/pptx/scripts/add_slide.py rename to examples/default/skills/pptx/scripts/add_slide.py diff --git a/examples/skills/pptx/scripts/clean.py b/examples/default/skills/pptx/scripts/clean.py similarity index 100% rename from examples/skills/pptx/scripts/clean.py rename to examples/default/skills/pptx/scripts/clean.py diff --git a/examples/skills/pptx/scripts/office/helpers/__init__.py b/examples/default/skills/pptx/scripts/office/helpers/__init__.py similarity index 100% rename from examples/skills/pptx/scripts/office/helpers/__init__.py rename to examples/default/skills/pptx/scripts/office/helpers/__init__.py diff --git a/examples/skills/pptx/scripts/office/helpers/merge_runs.py b/examples/default/skills/pptx/scripts/office/helpers/merge_runs.py similarity index 100% rename from examples/skills/pptx/scripts/office/helpers/merge_runs.py rename to examples/default/skills/pptx/scripts/office/helpers/merge_runs.py diff --git a/examples/skills/pptx/scripts/office/helpers/simplify_redlines.py b/examples/default/skills/pptx/scripts/office/helpers/simplify_redlines.py similarity index 100% rename from examples/skills/pptx/scripts/office/helpers/simplify_redlines.py rename to examples/default/skills/pptx/scripts/office/helpers/simplify_redlines.py diff --git a/examples/skills/pptx/scripts/office/pack.py b/examples/default/skills/pptx/scripts/office/pack.py similarity index 100% rename from examples/skills/pptx/scripts/office/pack.py rename to examples/default/skills/pptx/scripts/office/pack.py diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd b/examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd b/examples/default/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd b/examples/default/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd b/examples/default/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd b/examples/default/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd rename to examples/default/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/mce/mc.xsd b/examples/default/skills/pptx/scripts/office/schemas/mce/mc.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/mce/mc.xsd rename to examples/default/skills/pptx/scripts/office/schemas/mce/mc.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd b/examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd rename to examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd b/examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd rename to examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd b/examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd rename to examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd b/examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd rename to examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd b/examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd rename to examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd b/examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd rename to examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd diff --git a/examples/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd b/examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd similarity index 100% rename from examples/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd rename to examples/default/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd diff --git a/examples/skills/pptx/scripts/office/soffice.py b/examples/default/skills/pptx/scripts/office/soffice.py similarity index 100% rename from examples/skills/pptx/scripts/office/soffice.py rename to examples/default/skills/pptx/scripts/office/soffice.py diff --git a/examples/skills/pptx/scripts/office/unpack.py b/examples/default/skills/pptx/scripts/office/unpack.py similarity index 100% rename from examples/skills/pptx/scripts/office/unpack.py rename to examples/default/skills/pptx/scripts/office/unpack.py diff --git a/examples/skills/pptx/scripts/office/validate.py b/examples/default/skills/pptx/scripts/office/validate.py similarity index 100% rename from examples/skills/pptx/scripts/office/validate.py rename to examples/default/skills/pptx/scripts/office/validate.py diff --git a/examples/skills/pptx/scripts/office/validators/__init__.py b/examples/default/skills/pptx/scripts/office/validators/__init__.py similarity index 100% rename from examples/skills/pptx/scripts/office/validators/__init__.py rename to examples/default/skills/pptx/scripts/office/validators/__init__.py diff --git a/examples/skills/pptx/scripts/office/validators/base.py b/examples/default/skills/pptx/scripts/office/validators/base.py similarity index 100% rename from examples/skills/pptx/scripts/office/validators/base.py rename to examples/default/skills/pptx/scripts/office/validators/base.py diff --git a/examples/skills/pptx/scripts/office/validators/docx.py b/examples/default/skills/pptx/scripts/office/validators/docx.py similarity index 100% rename from examples/skills/pptx/scripts/office/validators/docx.py rename to examples/default/skills/pptx/scripts/office/validators/docx.py diff --git a/examples/skills/pptx/scripts/office/validators/pptx.py b/examples/default/skills/pptx/scripts/office/validators/pptx.py similarity index 100% rename from examples/skills/pptx/scripts/office/validators/pptx.py rename to examples/default/skills/pptx/scripts/office/validators/pptx.py diff --git a/examples/skills/pptx/scripts/office/validators/redlining.py b/examples/default/skills/pptx/scripts/office/validators/redlining.py similarity index 100% rename from examples/skills/pptx/scripts/office/validators/redlining.py rename to examples/default/skills/pptx/scripts/office/validators/redlining.py diff --git a/examples/skills/pptx/scripts/thumbnail.py b/examples/default/skills/pptx/scripts/thumbnail.py similarity index 100% rename from examples/skills/pptx/scripts/thumbnail.py rename to examples/default/skills/pptx/scripts/thumbnail.py diff --git a/examples/workflows/circuly/sync-customers.json b/examples/default/workflows/circuly/sync-customers.json similarity index 100% rename from examples/workflows/circuly/sync-customers.json rename to examples/default/workflows/circuly/sync-customers.json diff --git a/examples/workflows/circuly/sync-products.json b/examples/default/workflows/circuly/sync-products.json similarity index 100% rename from examples/workflows/circuly/sync-products.json rename to examples/default/workflows/circuly/sync-products.json diff --git a/examples/workflows/circuly/sync-subscriptions.json b/examples/default/workflows/circuly/sync-subscriptions.json similarity index 100% rename from examples/workflows/circuly/sync-subscriptions.json rename to examples/default/workflows/circuly/sync-subscriptions.json diff --git a/examples/workflows/general/conversation-auto-archive.json b/examples/default/workflows/general/conversation-auto-archive.json similarity index 100% rename from examples/workflows/general/conversation-auto-archive.json rename to examples/default/workflows/general/conversation-auto-archive.json diff --git a/examples/workflows/general/conversation-sync.json b/examples/default/workflows/general/conversation-sync.json similarity index 100% rename from examples/workflows/general/conversation-sync.json rename to examples/default/workflows/general/conversation-sync.json diff --git a/examples/workflows/general/customer-status-assessment.json b/examples/default/workflows/general/customer-status-assessment.json similarity index 100% rename from examples/workflows/general/customer-status-assessment.json rename to examples/default/workflows/general/customer-status-assessment.json diff --git a/examples/workflows/general/document-rag-sync.json b/examples/default/workflows/general/document-rag-sync.json similarity index 100% rename from examples/workflows/general/document-rag-sync.json rename to examples/default/workflows/general/document-rag-sync.json diff --git a/examples/workflows/general/product-relationship-analysis.json b/examples/default/workflows/general/product-relationship-analysis.json similarity index 100% rename from examples/workflows/general/product-relationship-analysis.json rename to examples/default/workflows/general/product-relationship-analysis.json diff --git a/examples/workflows/gmail/email-sync.json b/examples/default/workflows/gmail/email-sync.json similarity index 100% rename from examples/workflows/gmail/email-sync.json rename to examples/default/workflows/gmail/email-sync.json diff --git a/examples/workflows/google_drive/sync.json b/examples/default/workflows/google_drive/sync.json similarity index 100% rename from examples/workflows/google_drive/sync.json rename to examples/default/workflows/google_drive/sync.json diff --git a/examples/workflows/onedrive/sync.json b/examples/default/workflows/onedrive/sync.json similarity index 100% rename from examples/workflows/onedrive/sync.json rename to examples/default/workflows/onedrive/sync.json diff --git a/examples/workflows/outlook/email-sync.json b/examples/default/workflows/outlook/email-sync.json similarity index 100% rename from examples/workflows/outlook/email-sync.json rename to examples/default/workflows/outlook/email-sync.json diff --git a/examples/workflows/shopify/sync-customers.json b/examples/default/workflows/shopify/sync-customers.json similarity index 100% rename from examples/workflows/shopify/sync-customers.json rename to examples/default/workflows/shopify/sync-customers.json diff --git a/examples/workflows/shopify/sync-products.json b/examples/default/workflows/shopify/sync-products.json similarity index 100% rename from examples/workflows/shopify/sync-products.json rename to examples/default/workflows/shopify/sync-products.json diff --git a/services/convex/Dockerfile b/services/convex/Dockerfile index 0a19100dbf..6ad955e277 100644 --- a/services/convex/Dockerfile +++ b/services/convex/Dockerfile @@ -105,9 +105,9 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ && groupadd --system --gid 1001 app || true \ && useradd --system --uid 1001 --gid app app || true \ && mkdir -p /home/app && chown app:app /home/app && chmod 755 /home/app \ - && mkdir -p /app/data/convex /app/data/agents /app/data/workflows /app/data/integrations /app/data/providers /app/data/branding /app/data/retention /app/data/skills \ + && mkdir -p /app/data/convex /app/data/default \ /dashboard \ - /app/builtin/agents /app/builtin/workflows /app/builtin/integrations /app/builtin/providers /app/builtin/branding /app/builtin/retention /app/builtin/skills \ + /app/builtin/default \ && chown -R app:app /app/data /dashboard /app/builtin \ # Strip system bloat from Convex backend base (~155 MB) && ARCH_LIB="/usr/lib/$(dpkg --print-architecture | sed 's/amd64/x86_64-linux-gnu/;s/arm64/aarch64-linux-gnu/')" \ @@ -173,13 +173,7 @@ COPY --from=convex-dashboard --chown=app:app /app /dashboard # Builtin seed assets (one-time copy on fresh volume; .history/ preserves user edits). # Sources come from repo-root examples/ (same as platform Dockerfile). # ---------------------------------------------------------------------------- -COPY --chown=app:app examples/agents/ /app/builtin/agents/ -COPY --chown=app:app examples/workflows/ /app/builtin/workflows/ -COPY --chown=app:app examples/integrations/ /app/builtin/integrations/ -COPY --chown=app:app examples/providers/ /app/builtin/providers/ -COPY --chown=app:app examples/branding/ /app/builtin/branding/ -COPY --chown=app:app examples/retention/ /app/builtin/retention/ -COPY --chown=app:app examples/skills/ /app/builtin/skills/ +COPY --chown=app:app examples/default/ /app/builtin/default/ # ---------------------------------------------------------------------------- # Entrypoint scripts diff --git a/services/convex/docker-entrypoint.sh b/services/convex/docker-entrypoint.sh index 50bf34cfd3..49870c14b3 100755 --- a/services/convex/docker-entrypoint.sh +++ b/services/convex/docker-entrypoint.sh @@ -39,9 +39,11 @@ log_section() { echo; echo "═════════════════ # ---------------------------------------------------------------------------- if [ "$(id -u)" = '0' ]; then data_dir="${TALE_CONFIG_DIR:-/app/data}" - mkdir -p "$data_dir/convex" "$data_dir/agents" "$data_dir/workflows" \ - "$data_dir/integrations" "$data_dir/providers" "$data_dir/branding" \ - "$data_dir/skills" + # Org-first layout: per-org subtrees live under `//`. + # Only create `convex/` (backend storage) and `default/` (the canonical + # org seed target) up front; per-domain dirs are created on-demand by + # `run_seed` and `scaffoldNewOrganization`. + mkdir -p "$data_dir/convex" "$data_dir/default" chown -R app:app "$data_dir" # ---------------------------------------------------------------------------- @@ -268,22 +270,43 @@ if [ -f /etc/yt-dlp-version ]; then fi # ============================================================================ -# Builtin seed (version-marker gated) +# Builtin seed (version + layout-marker gated) — org-first layout # ---------------------------------------------------------------------------- -# Marker: /app/data/.seeded-${TALE_VERSION} -# - Fresh volume or new version → run 4 seed loops +# Layout: `/default//...` (the canonical default org's +# subtree); source: `/app/builtin/default//...` (org-agnostic +# template baked into the convex image). +# +# Marker: /app/data/.seeded-${TALE_VERSION}-orgfirst +# - Fresh volume or new version (or pre-orgfirst marker) → run seed loops # - Same version restart → skip (already seeded) -# - FORCE_SEED=true → re-run regardless +# - FORCE_SEED=true → re-run regardless (overwrites builtin-named files +# in place; user-added files at the same dir and `.history/` siblings +# survive; encrypted *.secrets.json files are never written) +# +# The `-orgfirst` token in the marker name signals the layout transition: +# an older binary that doesn't recognize this marker re-seeds (idempotently) +# into its expected old paths on a hypothetical downgrade. # ---------------------------------------------------------------------------- -seed_marker="/app/data/.seeded-${TALE_VERSION:-dev}" +seed_marker="/app/data/.seeded-${TALE_VERSION:-dev}-orgfirst" data_dir="/app/data" +# Atomic file copy: write to a sibling tmp file then rename. A SIGKILL +# between open(dest, O_TRUNC) and the final write would otherwise leave a +# truncated file at $dest, which the next-run skip-if-exists check treats +# as "already seeded" — silent corruption. With atomic_cp the next run +# either sees the original (rename never happened) or the complete file. +atomic_cp() { + local src="$1" dest="$2" + local tmp="${dest}.tale-seed.$$.tmp" + cp "$src" "$tmp" && mv -f "$tmp" "$dest" +} + run_seed() { - log_section "Seeding builtin configs (TALE_VERSION=${TALE_VERSION:-dev})" + log_section "Seeding builtin configs into default org (TALE_VERSION=${TALE_VERSION:-dev})" - # --- Agents --- - local agents_dir="${data_dir}/agents" - local agents_builtin="/app/builtin/agents" + # --- Agents (flat) --- + local agents_dir="${data_dir}/default/agents" + local agents_builtin="/app/builtin/default/agents" mkdir -p "$agents_dir" if [ -d "$agents_builtin" ] && [ "$(ls -A "$agents_builtin" 2>/dev/null)" ]; then for src in "$agents_builtin"/*.json; do @@ -293,20 +316,20 @@ run_seed() { local dest="$agents_dir/$name" local history_dir="$agents_dir/.history/$slug" if [ "$FORCE_SEED" = "true" ]; then - cp "$src" "$dest"; echo " ✓ Seeded $name (forced)" + atomic_cp "$src" "$dest"; echo " ✓ Seeded $name (forced)" elif [ -f "$dest" ]; then echo " ⏭ Skipping $name (already exists)" elif [ -d "$history_dir" ] && [ "$(ls -A "$history_dir" 2>/dev/null)" ]; then echo " ⏭ Skipping $name (user has modifications in .history)" else - cp "$src" "$dest"; echo " ✓ Seeded agent $name" + atomic_cp "$src" "$dest"; echo " ✓ Seeded agent $name" fi done fi - # --- Workflows (nested paths allowed) --- - local workflows_dir="${data_dir}/workflows" - local workflows_builtin="/app/builtin/workflows" + # --- Workflows (nested folder/name.json) --- + local workflows_dir="${data_dir}/default/workflows" + local workflows_builtin="/app/builtin/default/workflows" mkdir -p "$workflows_dir" if [ -d "$workflows_builtin" ] && [ "$(ls -A "$workflows_builtin" 2>/dev/null)" ]; then find "$workflows_builtin" -name '*.json' -type f | while read -r src; do @@ -318,19 +341,19 @@ run_seed() { local history_dir="$workflows_dir/.history/$flat_slug" if [ "$FORCE_SEED" = "true" ]; then - mkdir -p "$dest_dir"; cp "$src" "$dest"; echo " ✓ Seeded workflow $rel_path (forced)"; continue + mkdir -p "$dest_dir"; atomic_cp "$src" "$dest"; echo " ✓ Seeded workflow $rel_path (forced)"; continue fi if [ -f "$dest" ]; then echo " ⏭ Skipping workflow $rel_path (already exists)"; continue; fi if [ -d "$history_dir" ] && [ "$(ls -A "$history_dir" 2>/dev/null)" ]; then echo " ⏭ Skipping workflow $rel_path (user has modifications in .history)"; continue fi - mkdir -p "$dest_dir"; cp "$src" "$dest"; echo " ✓ Seeded workflow $rel_path" + mkdir -p "$dest_dir"; atomic_cp "$src" "$dest"; echo " ✓ Seeded workflow $rel_path" done fi - # --- Integrations (directory-based) --- - local integrations_dir="${data_dir}/integrations" - local integrations_builtin="/app/builtin/integrations" + # --- Integrations (directory bundles) --- + local integrations_dir="${data_dir}/default/integrations" + local integrations_builtin="/app/builtin/default/integrations" mkdir -p "$integrations_dir" if [ -d "$integrations_builtin" ] && [ "$(ls -A "$integrations_builtin" 2>/dev/null)" ]; then for src_dir in "$integrations_builtin"/*/; do @@ -350,8 +373,8 @@ run_seed() { fi # --- Skills (directory bundles: SKILL.md + scripts/ + references/ + assets/) --- - local skills_dir="${data_dir}/skills" - local skills_builtin="/app/builtin/skills" + local skills_dir="${data_dir}/default/skills" + local skills_builtin="/app/builtin/default/skills" mkdir -p "$skills_dir" if [ -d "$skills_builtin" ] && [ "$(ls -A "$skills_builtin" 2>/dev/null)" ]; then for src_dir in "$skills_builtin"/*/; do @@ -370,8 +393,8 @@ run_seed() { fi # --- Providers (skip encrypted .secrets.json) --- - local providers_dir="${data_dir}/providers" - local providers_builtin="/app/builtin/providers" + local providers_dir="${data_dir}/default/providers" + local providers_builtin="/app/builtin/default/providers" mkdir -p "$providers_dir" if [ -d "$providers_builtin" ] && [ "$(ls -A "$providers_builtin" 2>/dev/null)" ]; then for src in "$providers_builtin"/*.json; do @@ -382,41 +405,57 @@ run_seed() { local dest="$providers_dir/$name" local history_dir="$providers_dir/.history/$slug" if [ "$FORCE_SEED" = "true" ]; then - cp "$src" "$dest"; echo " ✓ Seeded provider $name (forced)" + atomic_cp "$src" "$dest"; echo " ✓ Seeded provider $name (forced)" elif [ -f "$dest" ]; then echo " ⏭ Skipping provider $name (already exists)" elif [ -d "$history_dir" ] && [ "$(ls -A "$history_dir" 2>/dev/null)" ]; then echo " ⏭ Skipping provider $name (user has modifications in .history)" else - cp "$src" "$dest"; echo " ✓ Seeded provider $name" + atomic_cp "$src" "$dest"; echo " ✓ Seeded provider $name" fi done fi - # --- Retention (per-org JSON files: $TALE_CONFIG_DIR/retention/{slug}.json) --- - # Default org's slug is hardcoded to `default`, so default.json fits - # the {orgSlug}.json convention. Retention has no secrets to skip - # (compare with providers' .secrets.json branch above). - local retention_dir="${data_dir}/retention" - local retention_builtin="/app/builtin/retention" - mkdir -p "$retention_dir" - if [ -d "$retention_builtin" ] && [ "$(ls -A "$retention_builtin" 2>/dev/null)" ]; then - for src in "$retention_builtin"/*.json; do - [ -f "$src" ] || continue - local name="$(basename "$src")" - local slug="$(basename "$src" .json)" - local dest="$retention_dir/$name" - local history_dir="$retention_dir/.history/$slug" - if [ "$FORCE_SEED" = "true" ]; then - cp "$src" "$dest"; echo " ✓ Seeded retention $name (forced)" - elif [ -f "$dest" ]; then - echo " ⏭ Skipping retention $name (already exists)" - elif [ -d "$history_dir" ] && [ "$(ls -A "$history_dir" 2>/dev/null)" ]; then - echo " ⏭ Skipping retention $name (user has modifications in .history)" - else - cp "$src" "$dest"; echo " ✓ Seeded retention $name" - fi - done + # --- Branding (single file at default/branding/branding.json) --- + # Closes a long-standing gap: previously branding was only seeded by the + # Convex scaffold action for new orgs, never on the default-org bootstrap + # path. With org-first the default org needs the same treatment as any + # other org for consistency (uniform model). + local branding_dir="${data_dir}/default/branding" + local branding_src="/app/builtin/default/branding/branding.json" + mkdir -p "$branding_dir" + if [ -f "$branding_src" ]; then + local dest="$branding_dir/branding.json" + local history_dir="$branding_dir/.history/branding" + if [ "$FORCE_SEED" = "true" ]; then + atomic_cp "$branding_src" "$dest"; echo " ✓ Seeded branding (forced)" + elif [ -f "$dest" ]; then + echo " ⏭ Skipping branding (already exists)" + elif [ -d "$history_dir" ] && [ "$(ls -A "$history_dir" 2>/dev/null)" ]; then + echo " ⏭ Skipping branding (user has modifications in .history)" + else + atomic_cp "$branding_src" "$dest"; echo " ✓ Seeded branding" + fi + fi + + # --- Retention (single file at default/retention.json) --- + # Retention is one JSON object per org under the uniform org-first layout + # (`$TALE_CONFIG_DIR//retention.json`). The catalog ships only + # the default org's retention config; non-default orgs are seeded by the + # Convex scaffold action. + local retention_src="/app/builtin/default/retention.json" + if [ -f "$retention_src" ]; then + local dest="${data_dir}/default/retention.json" + local history_dir="${data_dir}/default/.history/retention" + if [ "$FORCE_SEED" = "true" ]; then + atomic_cp "$retention_src" "$dest"; echo " ✓ Seeded retention (forced)" + elif [ -f "$dest" ]; then + echo " ⏭ Skipping retention (already exists)" + elif [ -d "$history_dir" ] && [ "$(ls -A "$history_dir" 2>/dev/null)" ]; then + echo " ⏭ Skipping retention (user has modifications in .history)" + else + atomic_cp "$retention_src" "$dest"; echo " ✓ Seeded retention" + fi fi touch "$seed_marker" diff --git a/services/platform/app/features/settings/integrations/components/integration-upload/constants/integration-templates.ts b/services/platform/app/features/settings/integrations/components/integration-upload/constants/integration-templates.ts index 7260c87284..07854be8c2 100644 --- a/services/platform/app/features/settings/integrations/components/integration-upload/constants/integration-templates.ts +++ b/services/platform/app/features/settings/integrations/components/integration-upload/constants/integration-templates.ts @@ -1,7 +1,7 @@ // Points to `main` so templates stay current with the shipped app. // If templates require immutable pinning, replace 'main' with a release tag. const TEMPLATES_REF = 'main'; -const GITHUB_RAW_BASE = `https://raw.githubusercontent.com/tale-project/tale/${TEMPLATES_REF}/examples/integrations`; +const GITHUB_RAW_BASE = `https://raw.githubusercontent.com/tale-project/tale/${TEMPLATES_REF}/examples/default/integrations`; export interface IntegrationTemplate { name: string; diff --git a/services/platform/convex/_generated/api.d.ts b/services/platform/convex/_generated/api.d.ts index 791f5f711a..6b557cb4f2 100644 --- a/services/platform/convex/_generated/api.d.ts +++ b/services/platform/convex/_generated/api.d.ts @@ -636,6 +636,7 @@ import type * as organizations_helpers from "../organizations/helpers.js"; import type * as organizations_internal_queries from "../organizations/internal_queries.js"; import type * as organizations_queries from "../organizations/queries.js"; import type * as organizations_record_org_switch from "../organizations/record_org_switch.js"; +import type * as organizations_reseed_all_orgs from "../organizations/reseed_all_orgs.js"; import type * as organizations_resolve_org_slug from "../organizations/resolve_org_slug.js"; import type * as organizations_scaffold from "../organizations/scaffold.js"; import type * as organizations_update_organization from "../organizations/update_organization.js"; @@ -1732,6 +1733,7 @@ declare const fullApi: ApiFromModules<{ "organizations/internal_queries": typeof organizations_internal_queries; "organizations/queries": typeof organizations_queries; "organizations/record_org_switch": typeof organizations_record_org_switch; + "organizations/reseed_all_orgs": typeof organizations_reseed_all_orgs; "organizations/resolve_org_slug": typeof organizations_resolve_org_slug; "organizations/scaffold": typeof organizations_scaffold; "organizations/update_organization": typeof organizations_update_organization; diff --git a/services/platform/convex/agents/file_utils.ts b/services/platform/convex/agents/file_utils.ts index bdcefc3121..dbe0cfe7f7 100644 --- a/services/platform/convex/agents/file_utils.ts +++ b/services/platform/convex/agents/file_utils.ts @@ -125,14 +125,12 @@ export function parseAgentJson(content: string): AgentJsonConfig { return result.data; } -function getBaseDir(): string { - const dir = process.env.AGENTS_DIR; - if (dir) return dir; +function getConfigRoot(): string { const configDir = process.env.TALE_CONFIG_DIR; - if (configDir) return path.join(configDir, 'agents'); + if (configDir) return configDir; throw new Error( - 'Neither TALE_CONFIG_DIR nor AGENTS_DIR environment variable is set. ' + - 'Set TALE_CONFIG_DIR in .env to the root config directory ' + + 'TALE_CONFIG_DIR environment variable is not set. ' + + 'Set it to the root config directory ' + '(e.g., TALE_CONFIG_DIR=/path/to/tale/examples).', ); } @@ -141,11 +139,7 @@ export function resolveAgentsDir(orgSlug: string): string { if (!validateOrgSlug(orgSlug)) { throw new Error(`Invalid org slug: ${orgSlug}`); } - const baseDir = getBaseDir(); - if (orgSlug === 'default') { - return baseDir; - } - return path.join(baseDir, orgSlug); + return path.join(getConfigRoot(), orgSlug, 'agents'); } export function resolveAgentFilePath( diff --git a/services/platform/convex/branding/file_actions.ts b/services/platform/convex/branding/file_actions.ts index d58ecf120d..a10f5369be 100644 --- a/services/platform/convex/branding/file_actions.ts +++ b/services/platform/convex/branding/file_actions.ts @@ -3,9 +3,13 @@ /** * Branding file I/O actions. * - * Branding is global (not org-scoped). A single branding.json file - * at {TALE_CONFIG_DIR}/branding/branding.json applies to the entire platform. - * Images (logo, favicons) are stored on disk at {TALE_CONFIG_DIR}/branding/images/. + * Branding is global (not org-scoped). A single branding.json file at + * {TALE_CONFIG_DIR}/default/branding/branding.json applies to the entire + * platform. Images (logo, favicons) are stored on disk at + * {TALE_CONFIG_DIR}/default/branding/images/. Although on-disk files live + * under the `default` org subtree like every other domain, the read-side + * here hardcodes `'default'` — non-default orgs do not have separate + * branding today. * * Uses atomic writes (temp → fsync → rename) for data safety. * History snapshots use epoch-ms filenames with 10-entry retention. diff --git a/services/platform/convex/branding/file_utils.ts b/services/platform/convex/branding/file_utils.ts index 2cf7801298..f9ce6d5266 100644 --- a/services/platform/convex/branding/file_utils.ts +++ b/services/platform/convex/branding/file_utils.ts @@ -36,25 +36,27 @@ export type BrandingReadResult = message: string; }; -function getBaseDir(): string { +function getConfigRoot(): string { const configDir = process.env.TALE_CONFIG_DIR; - if (configDir) return path.join(configDir, 'branding'); + if (configDir) return configDir; throw new Error( 'TALE_CONFIG_DIR environment variable is not set. ' + - 'Set TALE_CONFIG_DIR in .env to the root config directory ' + + 'Set it to the root config directory ' + '(e.g., TALE_CONFIG_DIR=/path/to/tale/examples).', ); } +/** + * Resolve the branding directory for an organization. Org-first: + * `${TALE_CONFIG_DIR}//branding/`. Read-side currently hardcodes + * `'default'` (see branding/file_actions.ts call sites), so non-default + * org branding dirs are scaffolded but unread. + */ export function resolveBrandingDir(orgSlug: string): string { if (!validateOrgSlug(orgSlug)) { throw new Error(`Invalid org slug: ${orgSlug}`); } - const baseDir = getBaseDir(); - if (orgSlug === 'default') { - return baseDir; - } - return path.join(baseDir, orgSlug); + return path.join(getConfigRoot(), orgSlug, 'branding'); } export function resolveBrandingFilePath(orgSlug: string): string { diff --git a/services/platform/convex/branding/queries.test.ts b/services/platform/convex/branding/queries.test.ts index 761d353934..83013c0220 100644 --- a/services/platform/convex/branding/queries.test.ts +++ b/services/platform/convex/branding/queries.test.ts @@ -74,13 +74,15 @@ describe('serializeBrandingJson', () => { }); }); -describe('resolveBrandingDir', () => { - it('returns base dir for default org', () => { - expect(resolveBrandingDir('default')).toBe('/tmp/test-data/branding'); +describe('resolveBrandingDir (org-first)', () => { + it('default org lives at /default/branding/', () => { + expect(resolveBrandingDir('default')).toBe( + '/tmp/test-data/default/branding', + ); }); - it('returns subdirectory for named org', () => { - expect(resolveBrandingDir('acme')).toBe('/tmp/test-data/branding/acme'); + it('other orgs live at //branding/ (read-side is default-only today)', () => { + expect(resolveBrandingDir('acme')).toBe('/tmp/test-data/acme/branding'); }); it('throws for invalid org slug', () => { @@ -89,9 +91,9 @@ describe('resolveBrandingDir', () => { }); describe('resolveBrandingFilePath', () => { - it('returns branding.json path', () => { + it('returns branding.json path under /branding/', () => { expect(resolveBrandingFilePath('default')).toBe( - '/tmp/test-data/branding/branding.json', + '/tmp/test-data/default/branding/branding.json', ); }); }); @@ -140,15 +142,17 @@ describe('mimeToExtension', () => { }); describe('resolveImagesDir', () => { - it('returns images subdirectory', () => { - expect(resolveImagesDir('default')).toBe('/tmp/test-data/branding/images'); + it('returns images subdirectory under /branding/', () => { + expect(resolveImagesDir('default')).toBe( + '/tmp/test-data/default/branding/images', + ); }); }); describe('resolveImagePath', () => { it('resolves valid image filename', () => { expect(resolveImagePath('default', 'logo.png')).toBe( - '/tmp/test-data/branding/images/logo.png', + '/tmp/test-data/default/branding/images/logo.png', ); }); diff --git a/services/platform/convex/governance/retention_actions.ts b/services/platform/convex/governance/retention_actions.ts index 4a5320ceb4..c86ef5d146 100644 --- a/services/platform/convex/governance/retention_actions.ts +++ b/services/platform/convex/governance/retention_actions.ts @@ -119,7 +119,7 @@ export const getRetentionBoundsAction = action({ throw new ConvexError({ code: 'RETENTION_CONFIG_MISSING', message: - 'Retention config not yet installed. Copy examples/retention/default.json to $TALE_CONFIG_DIR/retention/default.json then reload.', + 'Retention config not yet installed. Copy examples/default/retention.json to $TALE_CONFIG_DIR/default/retention.json then reload.', }); } @@ -189,7 +189,7 @@ export const upsertRetentionPolicyAction = action({ throw new ConvexError({ code: 'RETENTION_CONFIG_MISSING', message: - 'Retention config not yet installed. Copy examples/retention/default.json to $TALE_CONFIG_DIR/retention/default.json.', + 'Retention config not yet installed. Copy examples/default/retention.json to $TALE_CONFIG_DIR/default/retention.json.', }); } const boundsByCategory = buildBoundsByCategory(orgConfig); diff --git a/services/platform/convex/governance/retention_bounds_proposal.ts b/services/platform/convex/governance/retention_bounds_proposal.ts index 3299c13a41..b4975f2eee 100644 --- a/services/platform/convex/governance/retention_bounds_proposal.ts +++ b/services/platform/convex/governance/retention_bounds_proposal.ts @@ -78,7 +78,7 @@ async function computeEffectiveAppliedBounds( throw new ConvexError({ code: 'RETENTION_CONFIG_MISSING', message: - 'Retention config not yet installed. Copy examples/retention/default.json to $TALE_CONFIG_DIR/retention/default.json then reload.', + 'Retention config not yet installed. Copy examples/default/retention.json to $TALE_CONFIG_DIR/default/retention.json then reload.', }); } const all = applyEnvTighteningAll(orgConfig); diff --git a/services/platform/convex/governance/retention_floors.ts b/services/platform/convex/governance/retention_floors.ts index d2295d0e69..f34802c0c8 100644 --- a/services/platform/convex/governance/retention_floors.ts +++ b/services/platform/convex/governance/retention_floors.ts @@ -316,7 +316,7 @@ export class RetentionConfigMissingError extends Error { readonly hint: string; constructor(category: RetentionCategory) { const hint = - 'Copy examples/retention/default.json to $TALE_CONFIG_DIR/retention/default.json'; + 'Copy examples/default/retention.json to $TALE_CONFIG_DIR/default/retention.json'; super(`Retention config missing for category=${category}. ${hint}`); this.category = category; this.hint = hint; diff --git a/services/platform/convex/integrations/file_utils.ts b/services/platform/convex/integrations/file_utils.ts index 3d1ec52ff3..e9fde32f0e 100644 --- a/services/platform/convex/integrations/file_utils.ts +++ b/services/platform/convex/integrations/file_utils.ts @@ -19,7 +19,7 @@ export { sha256 }; /** * Integration slug: lowercase alphanumeric + hyphens/underscores, flat (no nesting). - * Must match the directory name under INTEGRATIONS_DIR. + * Must match the directory name under `${TALE_CONFIG_DIR}//integrations/`. */ const INTEGRATION_SLUG_REGEX = /^[a-z0-9][a-z0-9_-]*$/; @@ -44,32 +44,25 @@ export function validateIntegrationSlug(slug: string): boolean { return INTEGRATION_SLUG_REGEX.test(slug); } -function getBaseDir(): string { - const dir = process.env.INTEGRATIONS_DIR; - if (dir) return dir; +function getConfigRoot(): string { const configDir = process.env.TALE_CONFIG_DIR; - if (configDir) return path.join(configDir, 'integrations'); + if (configDir) return configDir; throw new Error( - 'Neither TALE_CONFIG_DIR nor INTEGRATIONS_DIR environment variable is set. ' + - 'Set TALE_CONFIG_DIR in .env to the root config directory ' + + 'TALE_CONFIG_DIR environment variable is not set. ' + + 'Set it to the root config directory ' + '(e.g., TALE_CONFIG_DIR=/path/to/tale/examples).', ); } /** - * Resolve the integrations directory for an organization. - * Default org uses the base dir directly. - * Other orgs use `{baseDir}/@{orgSlug}/`. + * Resolve the integrations directory for an organization. Org-first: + * `${TALE_CONFIG_DIR}//integrations/`. */ export function resolveIntegrationsDir(orgSlug: string): string { if (!validateOrgSlug(orgSlug)) { throw new Error(`Invalid org slug: ${orgSlug}`); } - const baseDir = getBaseDir(); - if (orgSlug === 'default') { - return baseDir; - } - return path.join(baseDir, `@${orgSlug}`); + return path.join(getConfigRoot(), orgSlug, 'integrations'); } /** diff --git a/services/platform/convex/lib/config_store/actions.ts b/services/platform/convex/lib/config_store/actions.ts index 91dc9a3582..24adaafa1f 100644 --- a/services/platform/convex/lib/config_store/actions.ts +++ b/services/platform/convex/lib/config_store/actions.ts @@ -19,9 +19,12 @@ import { retentionDefaultsConfigSchema } from '../../../lib/shared/schemas/reten import { internalAction } from '../../_generated/server'; import { createFileConfigStore } from './store'; +// Retention is one JSON object per org under the uniform org-first layout: +// `$TALE_CONFIG_DIR//retention.json`. const retentionStore = createFileConfigStore( 'retention', retentionDefaultsConfigSchema, + { orgFirst: true }, ); export const readRetentionConfig = internalAction({ diff --git a/services/platform/convex/lib/config_store/store.ts b/services/platform/convex/lib/config_store/store.ts index 3231a3bc4e..29b0af8294 100644 --- a/services/platform/convex/lib/config_store/store.ts +++ b/services/platform/convex/lib/config_store/store.ts @@ -1,15 +1,18 @@ 'use node'; /** - * Generic typed read/write helper for area-specific JSON config files - * under `$TALE_CONFIG_DIR/{area}/{orgSlug}.json`. + * Generic typed read/write helper for area-specific JSON config files. * - * The area-agnostic substrate behind retention's per-org files. Wrapping - * `readJsonFile` + `atomicWrite` so callers don't reinvent path - * resolution, symlink/size guards, or atomic-rename semantics. + * Two layout shapes are supported, selected via `orgFirst`: + * + * - `orgFirst: false` (default): `$TALE_CONFIG_DIR/{area}/{orgSlug}.json`. + * The legacy per-area-dir shape; org slugs live in the filename. + * - `orgFirst: true`: `$TALE_CONFIG_DIR/{orgSlug}/{area}.json`. + * Used by retention under the uniform org-first layout — each org has + * one file per area, alongside its `agents/`, `providers/`, etc. * - * Initially used only by retention; provider/integration migrations are - * the obvious next consumers. Keep the API minimal. + * Wraps `readJsonFile` + `atomicWrite` so callers don't reinvent path + * resolution, symlink/size guards, or atomic-rename semantics. * * Known limitations (round-2 / M7): * - **Last-writer-wins.** No file-level locking — two concurrent @@ -27,7 +30,7 @@ * wired into a UI flow. */ -import { readdir } from 'node:fs/promises'; +import { readdir, stat } from 'node:fs/promises'; import path from 'node:path'; import type { z } from 'zod/v4'; @@ -46,29 +49,45 @@ export interface ConfigStore { read(orgSlug: string): Promise; /** Atomic write of the parsed/serialized config to the per-org path. */ write(orgSlug: string, value: T): Promise; - /** Enumerate `*.json` files in the area dir, returning each org slug. */ + /** Enumerate orgs that have a file for this area. */ list(): Promise>; } -function getAreaDir(area: string): string { +export interface CreateFileConfigStoreOptions { + /** + * When true, paths follow the org-first layout: + * `$TALE_CONFIG_DIR//.json`. List enumerates per-org + * directories that contain `.json`. When false (default), paths + * follow `$TALE_CONFIG_DIR//.json`. + */ + orgFirst?: boolean; +} + +function getConfigRoot(area: string): string { const configDir = process.env.TALE_CONFIG_DIR; if (!configDir) { throw new Error( `TALE_CONFIG_DIR environment variable is not set. ` + `Set TALE_CONFIG_DIR in .env to the root config directory ` + - `(e.g., TALE_CONFIG_DIR=/path/to/tale/examples) so ${area}/ ` + + `(e.g., TALE_CONFIG_DIR=/path/to/tale/examples) so ${area} ` + `can be resolved.`, ); } - return path.join(configDir, area); + return configDir; } -function resolveFilePath(area: string, orgSlug: string): string { +function resolveFilePath( + area: string, + orgSlug: string, + orgFirst: boolean, +): string { if (!validateOrgSlug(orgSlug)) { throw new Error(`Invalid org slug: ${orgSlug}`); } - const dir = getAreaDir(area); - const resolved = path.resolve(dir, `${orgSlug}.json`); + const root = getConfigRoot(area); + const dir = orgFirst ? path.join(root, orgSlug) : path.join(root, area); + const fileName = orgFirst ? `${area}.json` : `${orgSlug}.json`; + const resolved = path.resolve(dir, fileName); const expectedPrefix = path.resolve(dir); if ( !resolved.startsWith(expectedPrefix + path.sep) && @@ -87,7 +106,10 @@ function resolveFilePath(area: string, orgSlug: string): string { export function createFileConfigStore( area: string, schema: z.ZodType, + options: CreateFileConfigStoreOptions = {}, ): ConfigStore { + const orgFirst = options.orgFirst ?? false; + const parse = (content: string): T => { // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- raw JSON before Zod validation const parsed = JSON.parse(content) as unknown; @@ -100,16 +122,17 @@ export function createFileConfigStore( return { async read(orgSlug) { - const filePath = resolveFilePath(area, orgSlug); + const filePath = resolveFilePath(area, orgSlug, orgFirst); const result = await readJsonFile(filePath, MAX_FILE_SIZE_BYTES, parse); if (result.ok) return result.data; if (result.error === 'not_found') return null; - throw new Error( - `Failed to read ${area}/${orgSlug}.json: ${result.message}`, - ); + const display = orgFirst + ? `${orgSlug}/${area}.json` + : `${area}/${orgSlug}.json`; + throw new Error(`Failed to read ${display}: ${result.message}`); }, async write(orgSlug, value) { - const filePath = resolveFilePath(area, orgSlug); + const filePath = resolveFilePath(area, orgSlug, orgFirst); // Re-parse before write to surface schema errors to the caller // rather than silently corrupting the file. Cheap relative to fs. const parsed = schema.safeParse(value); @@ -122,12 +145,37 @@ export function createFileConfigStore( await atomicWrite(filePath, content); }, async list() { - const dir = getAreaDir(area); + const root = getConfigRoot(area); + if (orgFirst) { + // Each org's file lives at `//.json`. + // Enumerate org subdirs (validated by slug regex) and probe each + // for the area file. Missing root → return empty rather than + // throwing — operator hasn't seeded anything yet. + let entries: string[]; + try { + entries = await readdir(root); + } catch (err) { + if (err instanceof Error && 'code' in err && err.code === 'ENOENT') { + return []; + } + throw err; + } + const results: Array<{ orgSlug: string }> = []; + for (const name of entries) { + if (!validateOrgSlug(name)) continue; + const filePath = path.join(root, name, `${area}.json`); + const info = await stat(filePath).catch(() => null); + if (info?.isFile()) results.push({ orgSlug: name }); + } + return results; + } + + // Legacy per-area-dir layout: list `*.json` files under `//`. + const dir = path.join(root, area); let entries: string[]; try { entries = await readdir(dir); } catch (err) { - // Missing dir is fine — operator hasn't seeded anything yet. if (err instanceof Error && 'code' in err && err.code === 'ENOENT') { return []; } diff --git a/services/platform/convex/node_only/integration_sandbox/gmail_draft_filtering.test.ts b/services/platform/convex/node_only/integration_sandbox/gmail_draft_filtering.test.ts index edfe0197e3..d1ee8f8c7a 100644 --- a/services/platform/convex/node_only/integration_sandbox/gmail_draft_filtering.test.ts +++ b/services/platform/convex/node_only/integration_sandbox/gmail_draft_filtering.test.ts @@ -9,7 +9,7 @@ import { executeIntegrationImpl } from './execute_integration_impl'; const connectorTs = fs.readFileSync( path.resolve( __dirname, - '../../../../../examples/integrations/gmail/connector.ts', + '../../../../../examples/default/integrations/gmail/connector.ts', ), 'utf-8', ); diff --git a/services/platform/convex/node_only/integration_sandbox/outlook_draft_filtering.test.ts b/services/platform/convex/node_only/integration_sandbox/outlook_draft_filtering.test.ts index 2eb9db1211..d9be1a0bc8 100644 --- a/services/platform/convex/node_only/integration_sandbox/outlook_draft_filtering.test.ts +++ b/services/platform/convex/node_only/integration_sandbox/outlook_draft_filtering.test.ts @@ -9,7 +9,7 @@ import { executeIntegrationImpl } from './execute_integration_impl'; const connectorTs = fs.readFileSync( path.resolve( __dirname, - '../../../../../examples/integrations/outlook/connector.ts', + '../../../../../examples/default/integrations/outlook/connector.ts', ), 'utf-8', ); diff --git a/services/platform/convex/organizations/reseed_all_orgs.ts b/services/platform/convex/organizations/reseed_all_orgs.ts new file mode 100644 index 0000000000..4bb196cf58 --- /dev/null +++ b/services/platform/convex/organizations/reseed_all_orgs.ts @@ -0,0 +1,106 @@ +/** + * Operator-triggered re-seed: enumerate every org (incl. `default`) and + * re-invoke `scaffoldNewOrganization` with `override:true`. Driven by + * `tale deploy --override-all` via `bunx convex run organizations/reseed_all_orgs:reseedAllOrgsFromBuiltin`. + * + * Semantics: + * - Always reseeds `default` even if absent from the org list (canonical + * template org). + * - Per-org try/catch: one failure logs + continues; the full result + * map is returned so the CLI surfaces succeeded/failed counts and + * exits non-zero on any failure. + * - Deterministic order: collected slugs are sorted before processing + * so logs and partial-failure reruns are reproducible. + * - Cursor-paginated org enumeration (200/page) instead of the + * 500-page-cap pattern in older backfills — avoids silently capping + * deployments with many orgs. + * + * Note: this is an ops re-runnable tool, not a one-shot migration. Lives + * next to `scaffold.ts` (the thing it reinvokes), not in `migrations/`. + */ + +import { getString, isRecord } from '../../lib/utils/type-guards'; +import { components, internal } from '../_generated/api'; +import { internalAction } from '../_generated/server'; + +// Inlined to avoid importing from convex/lib/file_io.ts (which has 'use node' +// and would force this orchestration action into the Node runtime). Keep in +// sync with `validateOrgSlug` at services/platform/convex/lib/file_io.ts. +const ORG_SLUG_REGEX = /^[a-z0-9][a-z0-9_-]*$/; +function isValidOrgSlug(slug: string): boolean { + return slug === 'default' || ORG_SLUG_REGEX.test(slug); +} + +type OrgReseedResult = + | { slug: string; status: 'ok' } + | { slug: string; status: 'error'; error: string }; + +export const reseedAllOrgsFromBuiltin = internalAction({ + args: {}, + handler: async (ctx) => { + const slugSet = new Set(['default']); + + let cursor: string | null = null; + let isDone = false; + while (!isDone) { + const res: unknown = await ctx.runQuery( + components.betterAuth.adapter.findMany, + { + model: 'organization', + paginationOpts: { cursor, numItems: 200 }, + where: [], + }, + ); + const page = isRecord(res) && Array.isArray(res.page) ? res.page : []; + for (const raw of page) { + if (!isRecord(raw)) continue; + const slug = getString(raw, 'slug'); + if (!slug) continue; + if (!isValidOrgSlug(slug)) { + console.warn( + `[reseedAllOrgs] skipping invalid slug "${slug}" returned by betterAuth`, + ); + continue; + } + slugSet.add(slug); + } + cursor = + isRecord(res) && typeof res.continueCursor === 'string' + ? res.continueCursor + : null; + isDone = + isRecord(res) && typeof res.isDone === 'boolean' ? res.isDone : true; + } + + const slugs = Array.from(slugSet).sort(); + const results: OrgReseedResult[] = []; + + for (const slug of slugs) { + try { + await ctx.runAction( + internal.organizations.scaffold.scaffoldNewOrganization, + { orgSlug: slug, override: true }, + ); + results.push({ slug, status: 'ok' }); + console.log(`[reseedAllOrgs] reseeded "${slug}"`); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`[reseedAllOrgs] "${slug}" failed:`, message); + results.push({ slug, status: 'error', error: message }); + } + } + + const succeeded = results.filter((r) => r.status === 'ok').length; + const failed = results.length - succeeded; + console.log( + `[reseedAllOrgs] done: total=${results.length} succeeded=${succeeded} failed=${failed}`, + ); + + return { + total: results.length, + succeeded, + failed, + results, + }; + }, +}); diff --git a/services/platform/convex/organizations/scaffold.test.ts b/services/platform/convex/organizations/scaffold.test.ts index 4d997a2d44..5392eac0a3 100644 --- a/services/platform/convex/organizations/scaffold.test.ts +++ b/services/platform/convex/organizations/scaffold.test.ts @@ -20,17 +20,24 @@ vi.mock('../_generated/server', () => ({ internalAction: vi.fn((config) => config), })); -const { scaffoldNewOrganization } = await import('./scaffold'); +const { scaffoldNewOrganization, cleanupOrgFilesystem } = + await import('./scaffold'); type ActionConfig = { - handler: (ctx: never, args: { orgSlug: string }) => Promise; + handler: ( + ctx: never, + args: { orgSlug: string; override?: boolean }, + ) => Promise; }; const scaffoldHandler = (scaffoldNewOrganization as unknown as ActionConfig) .handler; +const cleanupHandler = (cleanupOrgFilesystem as unknown as ActionConfig) + .handler; -// All env vars the scaffold code path or the per-domain resolvers consult. -// Save + clear them in beforeEach so each test starts from a known-empty -// state, then restore in afterEach so we don't poison other test files. +// Under org-first only TALE_CONFIG_DIR + TALE_CONFIG_BUILTIN_DIR remain; +// per-domain env overrides (AGENTS_DIR / WORKFLOWS_DIR / PROVIDERS_DIR / +// INTEGRATIONS_DIR / SKILLS_DIR) were dropped. Still save/restore the +// legacy keys defensively so a stale shell-env value can't leak across. const ENV_KEYS = [ 'TALE_CONFIG_DIR', 'TALE_CONFIG_BUILTIN_DIR', @@ -72,227 +79,144 @@ async function writeText(filePath: string, content: string): Promise { await writeFile(filePath, content, 'utf-8'); } -describe('scaffoldNewOrganization', () => { - it('seeds workflows from the catalog and ignores the default org workspace', async () => { - // Catalog: a shipped template under workflows/shopify/sync.json. +// Catalog source path for a given domain — mirrors the org-first builtin +// layout (`/default//...`) the scaffold reads from. +function catSrc(...parts: string[]): string { + return path.join(catalogRoot, 'default', ...parts); +} + +// Per-org target path — `///...`. +function orgDst(orgSlug: string, ...parts: string[]): string { + return path.join(configRoot, orgSlug, ...parts); +} + +describe('scaffoldNewOrganization (org-first)', () => { + it('seeds workflows from the catalog into the org-first target', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; await writeText( - path.join(catalogRoot, 'workflows', 'shopify', 'sync.json'), + catSrc('workflows', 'shopify', 'sync.json'), '{"name":"sync"}', ); - // Default-org workspace: a junk workflow that must NOT propagate. - await writeText( - path.join(configRoot, 'workflows', 'junk.json'), - '{"name":"junk"}', - ); - await scaffoldHandler({} as never, { orgSlug: 'acme' }); - const acmeDir = path.join(configRoot, 'workflows', '@acme'); - expect(existsSync(path.join(acmeDir, 'shopify', 'sync.json'))).toBe(true); - expect(existsSync(path.join(acmeDir, 'junk.json'))).toBe(false); + expect( + existsSync(orgDst('acme', 'workflows', 'shopify', 'sync.json')), + ).toBe(true); }); - it('closes the agents cross-tenant leak: raw-slug subdirs in the source are not copied', async () => { - // Agents catalog contains only the shipped template. + it('seeds flat domains (agents) per-file from the catalog', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; - await writeText( - path.join(catalogRoot, 'agents', 'shipped.json'), - '{"displayName":"shipped"}', - ); - - // Default-org workspace contains another tenant's raw-slug subdir. - // Pre-fix scaffolding (which sourced from this dir) would recursively - // copy `competitor/` into the new org because the @-skip in copyTree - // doesn't catch raw slugs. Sourcing from the catalog instead must - // not see this at all. - await writeText( - path.join(configRoot, 'agents', 'competitor', 'secret.json'), - '{"displayName":"leak"}', - ); + await writeText(catSrc('agents', 'shipped.json'), '{"displayName":"x"}'); await scaffoldHandler({} as never, { orgSlug: 'acme' }); - const acmeDir = path.join(configRoot, 'agents', 'acme'); - expect(existsSync(path.join(acmeDir, 'shipped.json'))).toBe(true); - expect(existsSync(path.join(acmeDir, 'competitor'))).toBe(false); - expect(existsSync(path.join(acmeDir, 'competitor', 'secret.json'))).toBe( - false, - ); + expect(existsSync(orgDst('acme', 'agents', 'shipped.json'))).toBe(true); }); - it('flat domains (agents/providers) never recurse into catalog subdirs', async () => { + it('flat domains never recurse into catalog subdirs (defense if the catalog ever ships one)', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; - await writeText( - path.join(catalogRoot, 'agents', 'shipped.json'), - '{"displayName":"shipped"}', - ); + await writeText(catSrc('agents', 'shipped.json'), '{"displayName":"x"}'); // A subdir inside the agents catalog is unexpected (agents is file-only). - // The flat-domain guard must skip it rather than recurse. await writeText( - path.join(catalogRoot, 'agents', 'stray', 'nested.json'), + catSrc('agents', 'stray', 'nested.json'), '{"displayName":"nested"}', ); await scaffoldHandler({} as never, { orgSlug: 'acme' }); - const acmeDir = path.join(configRoot, 'agents', 'acme'); - expect(existsSync(path.join(acmeDir, 'shipped.json'))).toBe(true); - expect(existsSync(path.join(acmeDir, 'stray'))).toBe(false); + expect(existsSync(orgDst('acme', 'agents', 'shipped.json'))).toBe(true); + expect(existsSync(orgDst('acme', 'agents', 'stray'))).toBe(false); }); - it('flat-domain guard closes the agents leak on the dev fallback path (env unset)', async () => { - // No catalog env → source is the default-org workspace. A previously - // created org left a raw-slug subdir there; scaffolding a new org must - // not recurse into it. Here the flat-domain guard — not the source - // choice — is what prevents the cross-tenant copy. - await writeText( - path.join(configRoot, 'agents', 'shipped.json'), - '{"displayName":"shipped"}', - ); - await writeText( - path.join(configRoot, 'agents', 'competitor', 'secret.json'), - '{"displayName":"leak"}', - ); - - await scaffoldHandler({} as never, { orgSlug: 'acme' }); - - const acmeDir = path.join(configRoot, 'agents', 'acme'); - expect(existsSync(path.join(acmeDir, 'shipped.json'))).toBe(true); - expect(existsSync(path.join(acmeDir, 'competitor'))).toBe(false); - }); - - it('skips symlinks rather than following them', async () => { + it('skips symlinks in the catalog rather than following them', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; - const targetPayload = await mkdtemp(path.join(tmpdir(), 'scaffold-evil-')); - const targetFile = path.join(targetPayload, 'payload.json'); - await writeFile(targetFile, '{"name":"escaped"}', 'utf-8'); + const evilPayloadDir = await mkdtemp(path.join(tmpdir(), 'scaffold-evil-')); + const evilFile = path.join(evilPayloadDir, 'payload.json'); + await writeFile(evilFile, '{"name":"escaped"}', 'utf-8'); - await mkdir(path.join(catalogRoot, 'workflows'), { recursive: true }); - await symlink(targetFile, path.join(catalogRoot, 'workflows', 'evil.json')); - // Also drop a real file beside it so we know the copy loop kept running. - await writeText( - path.join(catalogRoot, 'workflows', 'legit.json'), - '{"name":"legit"}', - ); + await mkdir(catSrc('workflows'), { recursive: true }); + await symlink(evilFile, path.join(catSrc('workflows'), 'evil.json')); + await writeText(catSrc('workflows', 'legit.json'), '{"name":"legit"}'); try { await scaffoldHandler({} as never, { orgSlug: 'acme' }); - const acmeDir = path.join(configRoot, 'workflows', '@acme'); - expect(existsSync(path.join(acmeDir, 'evil.json'))).toBe(false); - expect(existsSync(path.join(acmeDir, 'legit.json'))).toBe(true); + expect(existsSync(orgDst('acme', 'workflows', 'evil.json'))).toBe(false); + expect(existsSync(orgDst('acme', 'workflows', 'legit.json'))).toBe(true); } finally { - await rm(targetPayload, { recursive: true, force: true }); + await rm(evilPayloadDir, { recursive: true, force: true }); } }); - it('falls back to domain.resolve(default) when the catalog env is unset (dev)', async () => { - // No TALE_CONFIG_BUILTIN_DIR set. Default-org workspace becomes the - // catalog — historical behavior, preserved for local dev. - await writeText( - path.join(configRoot, 'workflows', 'shopify', 'sync.json'), - '{"name":"sync"}', - ); - - await scaffoldHandler({} as never, { orgSlug: 'acme' }); - - const acmeDir = path.join(configRoot, 'workflows', '@acme'); - expect(existsSync(path.join(acmeDir, 'shopify', 'sync.json'))).toBe(true); - }); - - it('still applies the @-prefix, .history, and *.secrets.json skips when copying', async () => { + it('always skips *.secrets.json and .history/ at the catalog source', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; + await writeText(catSrc('providers', 'openai.json'), '{"name":"openai"}'); await writeText( - path.join(catalogRoot, 'providers', 'openai.json'), - '{"name":"openai"}', - ); - await writeText( - path.join(catalogRoot, 'providers', 'openai.secrets.json'), + catSrc('providers', 'openai.secrets.json'), '{"key":"redacted"}', ); - await writeText( - path.join(catalogRoot, 'providers', '.history', 'snapshot.json'), - '{}', - ); - await writeText( - path.join(catalogRoot, 'providers', '@stale-tenant', 'leak.json'), - '{}', - ); + await writeText(catSrc('providers', '.history', 'snapshot.json'), '{}'); await scaffoldHandler({} as never, { orgSlug: 'acme' }); - const acmeDir = path.join(configRoot, 'providers', 'acme'); - expect(existsSync(path.join(acmeDir, 'openai.json'))).toBe(true); - expect(existsSync(path.join(acmeDir, 'openai.secrets.json'))).toBe(false); - expect(existsSync(path.join(acmeDir, '.history'))).toBe(false); - expect(existsSync(path.join(acmeDir, '@stale-tenant'))).toBe(false); + expect(existsSync(orgDst('acme', 'providers', 'openai.json'))).toBe(true); + expect(existsSync(orgDst('acme', 'providers', 'openai.secrets.json'))).toBe( + false, + ); + expect(existsSync(orgDst('acme', 'providers', '.history'))).toBe(false); }); - it('is per-domain idempotent: a domain dir that already has files is skipped', async () => { + it('is per-domain idempotent: a domain dir that already has files is skipped (override:false)', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; + await writeText(catSrc('workflows', 'shipped.json'), '{"name":"shipped"}'); + // Pre-existing org content — scaffold must not overwrite without override. await writeText( - path.join(catalogRoot, 'workflows', 'shipped.json'), - '{"name":"shipped"}', + orgDst('acme', 'workflows', 'existing.json'), + '{"name":"existing"}', ); - // Pre-existing org content — scaffold must not overwrite. - const acmeDir = path.join(configRoot, 'workflows', '@acme'); - await writeText(path.join(acmeDir, 'existing.json'), '{"name":"existing"}'); await scaffoldHandler({} as never, { orgSlug: 'acme' }); - expect(await readFile(path.join(acmeDir, 'existing.json'), 'utf-8')).toBe( - '{"name":"existing"}', - ); - expect(existsSync(path.join(acmeDir, 'shipped.json'))).toBe(false); + expect( + await readFile(orgDst('acme', 'workflows', 'existing.json'), 'utf-8'), + ).toBe('{"name":"existing"}'); + expect(existsSync(orgDst('acme', 'workflows', 'shipped.json'))).toBe(false); }); it('treats a target containing only .history/ as occupied (no re-seed on top of user edit trail)', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; + await writeText(catSrc('workflows', 'shipped.json'), '{"name":"shipped"}'); await writeText( - path.join(catalogRoot, 'workflows', 'shipped.json'), - '{"name":"shipped"}', - ); - // Realistic state: user created the org, edited a workflow (writing - // `.history//.json`), then deleted the visible workflow. - // Re-scaffolding (e.g., via the backfill migration) must NOT silently - // re-seed the catalog on top of the surviving edit trail. - const acmeDir = path.join(configRoot, 'workflows', '@acme'); - await writeText( - path.join(acmeDir, '.history', 'old.json'), + orgDst('acme', 'workflows', '.history', 'old.json'), '{"snapshot":1}', ); await scaffoldHandler({} as never, { orgSlug: 'acme' }); - expect(existsSync(path.join(acmeDir, 'shipped.json'))).toBe(false); - expect(existsSync(path.join(acmeDir, '.history', 'old.json'))).toBe(true); + expect(existsSync(orgDst('acme', 'workflows', 'shipped.json'))).toBe(false); + expect( + existsSync(orgDst('acme', 'workflows', '.history', 'old.json')), + ).toBe(true); }); it('ignores atomicWrite tmp orphans so a crashed scaffold can retry', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; - await writeText( - path.join(catalogRoot, 'workflows', 'shipped.json'), - '{"name":"shipped"}', - ); + await writeText(catSrc('workflows', 'shipped.json'), '{"name":"shipped"}'); // Simulate the residue a prior crashed scaffold would leave behind: // atomicWrite uses `....tmp` and cleans up on // success, but a crash mid-write leaves the tmp orphan in place. - const acmeDir = path.join(configRoot, 'workflows', '@acme'); await writeText( - path.join(acmeDir, '.shipped.json.1700000000000.deadbeef.tmp'), + orgDst('acme', 'workflows', '.shipped.json.1700000000000.deadbeef.tmp'), 'partial', ); await scaffoldHandler({} as never, { orgSlug: 'acme' }); - expect(existsSync(path.join(acmeDir, 'shipped.json'))).toBe(true); + expect(existsSync(orgDst('acme', 'workflows', 'shipped.json'))).toBe(true); }); it('logs error when TALE_CONFIG_BUILTIN_DIR points at a missing path (deploy misconfig)', async () => { - // Builtin root configured but the directory doesn't exist on disk — - // simulates platform/convex image version skew or a missing volume mount. process.env.TALE_CONFIG_BUILTIN_DIR = path.join(catalogRoot, 'missing'); const errSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); @@ -307,25 +231,212 @@ describe('scaffoldNewOrganization', () => { m.includes('does not exist'), ), ).toBe(true); - // Target should remain empty — no silent fallback to default-org dir. - expect(existsSync(path.join(configRoot, 'workflows', '@acme'))).toBe( - false, - ); + expect(existsSync(orgDst('acme', 'workflows'))).toBe(false); } finally { errSpy.mockRestore(); } }); - it('returns null without scaffolding the default org', async () => { + it('default org IS scaffold-able under org-first (no longer early-returned)', async () => { process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; - await writeText(path.join(catalogRoot, 'workflows', 'shipped.json'), '{}'); + await writeText(catSrc('agents', 'shipped.json'), '{"displayName":"x"}'); - const result = await scaffoldHandler({} as never, { orgSlug: 'default' }); + await scaffoldHandler({} as never, { orgSlug: 'default' }); - expect(result).toBeNull(); - // Default org's workspace must not have been touched by scaffold. - expect(existsSync(path.join(configRoot, 'workflows', 'shipped.json'))).toBe( - false, + expect(existsSync(orgDst('default', 'agents', 'shipped.json'))).toBe(true); + }); + + it('override:true overwrites flat-domain files while preserving secrets and .history', async () => { + process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; + await writeText(catSrc('agents', 'shipped.json'), '{"displayName":"new"}'); + + // Pre-existing org state: user-edited shipped, user-added file, secret, history. + await writeText( + orgDst('acme', 'agents', 'shipped.json'), + '{"displayName":"user-edited"}', ); + await writeText( + orgDst('acme', 'agents', 'user-added.json'), + '{"displayName":"keep me"}', + ); + await writeText( + orgDst('acme', 'agents', 'openai.secrets.json'), + '{"key":"keep-me-too"}', + ); + await writeText( + orgDst('acme', 'agents', '.history', 'shipped', '1.json'), + '{"rev":1}', + ); + + await scaffoldHandler({} as never, { orgSlug: 'acme', override: true }); + + // Catalog file overwritten. + expect( + await readFile(orgDst('acme', 'agents', 'shipped.json'), 'utf-8'), + ).toBe('{"displayName":"new"}'); + // User-added file survived. + expect(existsSync(orgDst('acme', 'agents', 'user-added.json'))).toBe(true); + // Secret + history survived. + expect( + await readFile(orgDst('acme', 'agents', 'openai.secrets.json'), 'utf-8'), + ).toBe('{"key":"keep-me-too"}'); + expect( + existsSync(orgDst('acme', 'agents', '.history', 'shipped', '1.json')), + ).toBe(true); + }); + + it('override:true for dir-bundle domains (skills) rm-replaces the bundle but preserves dir-level secrets/.history', async () => { + process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; + await writeText(catSrc('skills', 'code-reviewer', 'SKILL.md'), 'new'); + + // Pre-existing bundle: user-edited SKILL.md + a user-added file inside + // the bundle (gets wiped); domain-level .history + secrets survive. + await writeText( + orgDst('acme', 'skills', 'code-reviewer', 'SKILL.md'), + 'user-edited', + ); + await writeText( + orgDst('acme', 'skills', 'code-reviewer', 'user-extra.txt'), + 'gone after override', + ); + await writeText( + orgDst('acme', 'skills', '.history', 'code-reviewer', '1.md'), + 'old rev', + ); + + await scaffoldHandler({} as never, { orgSlug: 'acme', override: true }); + + expect( + await readFile( + orgDst('acme', 'skills', 'code-reviewer', 'SKILL.md'), + 'utf-8', + ), + ).toBe('new'); + expect( + existsSync(orgDst('acme', 'skills', 'code-reviewer', 'user-extra.txt')), + ).toBe(false); + expect( + existsSync(orgDst('acme', 'skills', '.history', 'code-reviewer', '1.md')), + ).toBe(true); + }); + + it('override:true for workflows preserves user-only folders', async () => { + process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; + await writeText( + catSrc('workflows', 'shopify', 'sync.json'), + '{"name":"new"}', + ); + + await writeText( + orgDst('acme', 'workflows', 'shopify', 'sync.json'), + '{"name":"old"}', + ); + await writeText( + orgDst('acme', 'workflows', 'my-folder', 'custom.json'), + '{"name":"custom"}', + ); + + await scaffoldHandler({} as never, { orgSlug: 'acme', override: true }); + + expect( + await readFile( + orgDst('acme', 'workflows', 'shopify', 'sync.json'), + 'utf-8', + ), + ).toBe('{"name":"new"}'); + expect( + existsSync(orgDst('acme', 'workflows', 'my-folder', 'custom.json')), + ).toBe(true); + }); + + it('seeds retention as a single file at /retention.json', async () => { + process.env.TALE_CONFIG_BUILTIN_DIR = catalogRoot; + await writeText( + catSrc('retention.json'), + '{"version":"v1","categories":{}}', + ); + + await scaffoldHandler({} as never, { orgSlug: 'acme' }); + + expect(existsSync(orgDst('acme', 'retention.json'))).toBe(true); + expect(await readFile(orgDst('acme', 'retention.json'), 'utf-8')).toBe( + '{"version":"v1","categories":{}}', + ); + }); + + it('copy-onto-self guard fires for default-org reseed in dev fallback (catalog env unset)', async () => { + // No TALE_CONFIG_BUILTIN_DIR → fallback source = domain.resolve('default') + // = `/default/`, which is the same dir as the reseed + // target. realpath-based guard must catch this even though path strings + // are syntactically identical. + await writeText( + orgDst('default', 'workflows', 'shopify', 'sync.json'), + '{"name":"existing"}', + ); + + // Should be a no-op (skip with warn), not a destructive copy-onto-self. + await scaffoldHandler({} as never, { + orgSlug: 'default', + override: true, + }); + + expect( + await readFile( + orgDst('default', 'workflows', 'shopify', 'sync.json'), + 'utf-8', + ), + ).toBe('{"name":"existing"}'); + }); +}); + +describe('cleanupOrgFilesystem (symlink + traversal defense)', () => { + it('refuses the literal `default` slug', async () => { + await writeText(orgDst('default', 'agents', 'x.json'), '{}'); + await cleanupHandler({} as never, { orgSlug: 'default' }); + expect(existsSync(orgDst('default', 'agents', 'x.json'))).toBe(true); + }); + + it('removes the entire / subtree for a valid non-default slug', async () => { + await writeText(orgDst('acme', 'agents', 'x.json'), '{}'); + await writeText(orgDst('acme', 'providers', 'p.json'), '{}'); + await writeText(orgDst('other', 'agents', 'keep.json'), '{}'); + + await cleanupHandler({} as never, { orgSlug: 'acme' }); + + expect(existsSync(orgDst('acme'))).toBe(false); + expect(existsSync(orgDst('other', 'agents', 'keep.json'))).toBe(true); + }); + + it('ENOENT on the org dir is idempotent (no throw)', async () => { + // Org dir doesn't exist; cleanup should silently succeed. + await expect( + cleanupHandler({} as never, { orgSlug: 'never-existed' }), + ).resolves.toBeNull(); + }); + + it('refuses invalid org slugs (would have already failed at validateOrgSlug too)', async () => { + // Slugs that don't match ORG_SLUG_REGEX. cleanup must warn-and-skip. + await cleanupHandler({} as never, { orgSlug: '../escape' }); + await cleanupHandler({} as never, { orgSlug: 'UPPER' }); + // No assertion needed on filesystem — we're verifying no throw. + }); + + it('refuses a symlinked org dir (would otherwise rm the symlink target)', async () => { + // Create a directory outside configRoot, then place a symlink at + // configRoot/acme pointing to it. cleanup must lstat → detect symlink → refuse. + const outside = await mkdtemp(path.join(tmpdir(), 'cleanup-outside-')); + const outsideFile = path.join(outside, 'precious.json'); + await writeFile(outsideFile, '{"keep":"me"}', 'utf-8'); + + await symlink(outside, orgDst('acme')); + + try { + await cleanupHandler({} as never, { orgSlug: 'acme' }); + // The symlink target's file MUST survive. + expect(existsSync(outsideFile)).toBe(true); + } finally { + await rm(orgDst('acme'), { force: true }); + await rm(outside, { recursive: true, force: true }); + } }); }); diff --git a/services/platform/convex/organizations/scaffold.ts b/services/platform/convex/organizations/scaffold.ts index f933eea63c..e784d3b6e5 100644 --- a/services/platform/convex/organizations/scaffold.ts +++ b/services/platform/convex/organizations/scaffold.ts @@ -1,36 +1,55 @@ 'use node'; /** - * Scaffold per-org filesystem config on organization creation. + * Scaffold + cleanup per-org filesystem config under the uniform org-first + * layout (`$TALE_CONFIG_DIR///...` for every org incl. + * `default`). Source of seed data is the immutable builtin catalog baked + * into the convex image at `$TALE_CONFIG_BUILTIN_DIR/default//` + * (set in services/platform/Dockerfile, propagated via the entrypoint's + * `convex env set` loop). Falls back to the default org's writable dir + * when the env is unset, so local `bun dev` (no catalog) still works. * - * Seeds new orgs from the immutable builtin catalog baked into the convex - * image at `$TALE_CONFIG_BUILTIN_DIR//` (mirrors the writable - * `$TALE_CONFIG_DIR//` pattern). The env is pushed by the platform - * Dockerfile via the entrypoint's `convex env set` loop. Falls back to the - * default org's writable dir when the env is unset, so local `bun dev` - * (where no catalog is built) still works. The rationale for sourcing from - * the read-only catalog instead of the default workspace lives at the - * `@`-prefix-skip comment in copyTree below — that's the load-bearing site. + * `scaffoldNewOrganization`: + * - org-create path (`override:false`, default): idempotent per-domain + * skip if the target dir already has files. + * - reseed path (`override:true`, called by `reseedAllOrgsFromBuiltin`): + * overwrites builtin-named files in place while always preserving + * `*.secrets.json` and `.history/` trails. Per-domain semantics — + * flat: per-file atomicWrite; dir-bundle (skills/integrations): + * `rm -rf ` then copy bundle; workflows + branding: + * per-file overwrite (preserves user-only folders / images); + * retention: single-file copy. * - * Skips per-org secrets (`*.secrets.json`) and local edit-history dirs - * (`.history/`). Skips branding entirely — read-side hardcodes 'default'. - * - * Idempotent: if the target dir already contains user-visible files, skip - * that domain with a warning rather than overwriting. + * `cleanupOrgFilesystem` removes the entire `/` subtree (org is + * one tree under org-first), guarded by validateOrgSlug + verifyPathWithinBase + * + an lstat symlink defense (an attacker-placed symlink at the org dir + * would otherwise be followed by `rm -rf` to arbitrary filesystem + * locations). Uses a two-phase rename-then-delete so concurrent writers + * fail with ENOENT rather than racing the recursive delete. */ -import { lstat, readdir, readFile, rm, stat } from 'node:fs/promises'; +import { + lstat, + readdir, + readFile, + realpath, + rename, + rm, + stat, +} from 'node:fs/promises'; import path from 'node:path'; import { v } from 'convex/values'; import { internalAction } from '../_generated/server'; import { resolveAgentsDir } from '../agents/file_utils'; +import { resolveBrandingDir } from '../branding/file_utils'; import { resolveIntegrationsDir } from '../integrations/file_utils'; import { atomicWrite, atomicWriteBuffer, errnoCode, + validateOrgSlug, verifyPathWithinBase, } from '../lib/file_io'; import { resolveProvidersDir } from '../providers/file_utils'; @@ -42,20 +61,30 @@ type DirResolver = (orgSlug: string) => string; type Domain = { name: string; resolve: DirResolver; - // Flat domains store one file per item with no subdirectories in the - // catalog (agents/providers: `.json`). copyTree must not recurse into - // subdirs for these — see the `allowSubdirs` guard in copyTree. - flat?: boolean; + // 'flat' = one file per item, no subdirs in the catalog (agents/providers/branding). + // override:true overwrites per-file via atomicWrite; user-added files survive, + // secrets + .history at the dir level survive. + // 'bundle' = per-item directory bundle (skills/integrations). override:true + // rm -rf's the per-bundle subdir then copies — wholesale bundle replace. + // Dir-level `.history`/secrets at the domain root (siblings of bundles) survive. + // 'tree' = arbitrary nested files (workflows). override:true per-file overwrite; + // user-only folders survive. + kind: 'flat' | 'bundle' | 'tree'; }; -// Each domain's per-org dir convention differs — use the domain's own resolver. -// The catalog subdir name matches `name` (e.g., `$TALE_CONFIG_BUILTIN_DIR/agents/`). +// `default` is the canonical template org in the catalog; the catalog tree +// at `$TALE_CONFIG_BUILTIN_DIR/default//` is the source for every +// org including default itself. const DOMAINS: Domain[] = [ - { name: 'agents', resolve: resolveAgentsDir, flat: true }, - { name: 'providers', resolve: resolveProvidersDir, flat: true }, - { name: 'integrations', resolve: resolveIntegrationsDir }, - { name: 'workflows', resolve: resolveWorkflowsDir }, - { name: 'skills', resolve: resolveSkillsDir }, + { name: 'agents', resolve: resolveAgentsDir, kind: 'flat' }, + { name: 'providers', resolve: resolveProvidersDir, kind: 'flat' }, + { name: 'integrations', resolve: resolveIntegrationsDir, kind: 'bundle' }, + { name: 'workflows', resolve: resolveWorkflowsDir, kind: 'tree' }, + { name: 'skills', resolve: resolveSkillsDir, kind: 'bundle' }, + // Branding is logically a tree (branding.json + images/ subdir). Per-file + // overwrite is correct: catalog overwrites branding.json; uploaded + // `images/*.png` survive (they're neither secrets nor .history). + { name: 'branding', resolve: resolveBrandingDir, kind: 'tree' }, ]; const BUILTIN_ENV = 'TALE_CONFIG_BUILTIN_DIR'; @@ -70,7 +99,7 @@ function shouldSkipFile(name: string): boolean { // atomicWrite leaves `....tmp` orphans on crash. Those // shouldn't lock out a retry, but every other entry (including dotfiles // like `.history/` that agents/workflows write on every edit) means a user -// has been here and we must not overwrite. +// has been here and we must not overwrite in the non-override path. function isAtomicWriteTmp(name: string): boolean { return name.startsWith('.') && name.endsWith('.tmp'); } @@ -80,10 +109,6 @@ async function dirHasFiles(dir: string): Promise { const entries = await readdir(dir); return entries.some((n) => !isAtomicWriteTmp(n)); } catch (err) { - // ENOENT (dir doesn't exist yet) is the expected case — domain scaffold - // simply hasn't run. Anything else (EACCES, EIO) means we can't read - // it; treat as "empty" so scaffolding proceeds, but log so a - // permissions glitch isn't silently masked. if (errnoCode(err) !== 'ENOENT') { console.warn('[scaffold.dirHasFiles] readdir failed:', dir, err); } @@ -91,6 +116,56 @@ async function dirHasFiles(dir: string): Promise { } } +/** + * realpath-aware equality / containment check. `path.resolve` only + * canonicalizes `..`/`.` — it does NOT follow symlinks. A symlinked + * `TALE_CONFIG_BUILTIN_DIR` (or bind-mount overlap between src/dst) + * could otherwise produce a copy-onto-self where `rm -rf ` then + * copy from the same dir wipes the live data. Use `realpath` on both + * sides; treat ENOENT on either side as "not yet a symlink concern" + * and fall back to `path.resolve`. + */ +async function pathsOverlap(a: string, b: string): Promise { + const resolveReal = async (p: string): Promise => { + try { + return await realpath(p); + } catch { + return path.resolve(p); + } + }; + const realA = await resolveReal(a); + const realB = await resolveReal(b); + if (realA === realB) return true; + if (realA.startsWith(realB + path.sep)) return true; + if (realB.startsWith(realA + path.sep)) return true; + return false; +} + +async function writeFileFromCatalog(src: string, dst: string): Promise { + const buf = await readFile(src); + const name = path.basename(src); + if ( + name.endsWith('.json') || + name.endsWith('.ts') || + name.endsWith('.svg') || + name.endsWith('.md') + ) { + await atomicWrite(dst, buf.toString('utf-8')); + } else { + await atomicWriteBuffer(dst, buf); + } +} + +/** + * Recursively copy `sourceDir` → `targetDir`. Skips `.history/`, dotfiles + * (`.`), `*.secrets.json`, and symlinks at every level. Used by + * `tree` and (top-level) `bundle` domain seeds. + * + * `allowSubdirs=false` (used by flat domains) means: don't recurse into + * any subdir found in the source. The catalog for flat domains has no + * subdirs, so a subdir indicates a fallback workspace with leaked + * cross-tenant content — skip with a warning rather than recurse. + */ async function copyTree( sourceDir: string, targetDir: string, @@ -106,15 +181,6 @@ async function copyTree( for (const name of entries) { if (name.startsWith('.')) continue; - // Per-org marker prefix used by skills / integrations / workflows for - // tenant subdirs (`@/...`). Defence-in-depth: the builtin - // catalog has no `@` subdirs, but if the source ever falls back to a - // mutable workspace this guard prevents recursing into other orgs' - // trees. Agents / providers use raw `` subdirs (no `@` marker) and - // are flat-copied (`allowSubdirs=false` below), so a stray raw-slug subdir - // in a fallback workspace is never recursed into either — the cross-tenant - // leak is structurally impossible on any source path. - if (name.startsWith('@')) continue; if (SKIP_DIR_NAMES.has(name)) continue; if (shouldSkipFile(name)) continue; @@ -122,9 +188,8 @@ async function copyTree( const dst = path.join(targetDir, name); // lstat (not stat) so a symlink in the source is detected and skipped - // rather than followed. The catalog is built from `examples/` which - // tracks no symlinks today, but this keeps the scaffold from - // dereferencing through to arbitrary paths if one is ever introduced. + // rather than followed. The catalog tracks no symlinks today; this + // keeps the scaffold from dereferencing if one is ever introduced. const info = await lstat(src).catch((err) => { if (errnoCode(err) !== 'ENOENT') { console.warn('[scaffold.copyTree] lstat failed:', src, err); @@ -139,9 +204,6 @@ async function copyTree( if (info.isDirectory()) { if (!allowSubdirs) { - // Flat domain (agents / providers): the catalog has no subdirs here, - // so any subdir is unexpected (e.g. a raw-slug org dir leaked into a - // mutable fallback workspace). Skip rather than recurse. console.warn( '[scaffold.copyTree] skipping unexpected subdir in flat domain:', src, @@ -153,30 +215,188 @@ async function copyTree( } if (!info.isFile()) continue; + await writeFileFromCatalog(src, dst); + } +} - const buf = await readFile(src); - if ( - name.endsWith('.json') || - name.endsWith('.ts') || - name.endsWith('.svg') - ) { - await atomicWrite(dst, buf.toString('utf-8')); +/** + * Seed a single domain for an org. Source is `/default/` + * (canonical template) when `TALE_CONFIG_BUILTIN_DIR` is set, falling back + * to `resolve('default')` for local dev. Returns true on success, false on + * skip/failure. + */ +async function seedDomain( + domain: Domain, + catalogRoot: string | undefined, + orgSlug: string, + override: boolean, +): Promise { + const sourceDir = catalogRoot + ? path.join(catalogRoot, 'default', domain.name) + : domain.resolve('default'); + const targetDir = domain.resolve(orgSlug); + + if (catalogRoot) { + // Operator-set catalog path must exist; missing = deploy misconfig + // (platform/convex image version skew). Surface in logs instead of + // silent zero-seed. + const sourceExists = await stat(sourceDir) + .then(() => true) + .catch((err) => { + if (errnoCode(err) === 'ENOENT') { + console.error( + `[scaffold] ${domain.name}: ${BUILTIN_ENV}=${catalogRoot} is set but ${sourceDir} does not exist; org "${orgSlug}" will receive zero seed data for this domain`, + ); + } else { + console.error( + `[scaffold] ${domain.name}: stat ${sourceDir} failed:`, + err instanceof Error ? err.message : err, + ); + } + return false; + }); + if (!sourceExists) return; + } + + // copy-onto-self guard: realpath-aware. Fires for default-org reseed + // in the fallback case (catalog env unset, source = target) and for + // any symlinked overlap between catalog and data trees. + if (await pathsOverlap(sourceDir, targetDir)) { + console.warn( + `[scaffold] ${domain.name}: source and target overlap (${sourceDir} ↔ ${targetDir}); skipping`, + ); + return; + } + + if (!override) { + const alreadyScaffolded = await dirHasFiles(targetDir); + if (alreadyScaffolded) { + console.warn( + `[scaffold] ${domain.name}: target ${targetDir} already has files, skipping (use override:true to reseed)`, + ); + return; + } + } + + try { + if (domain.kind === 'flat') { + // Per-file atomicWrite. Overwrites only catalog-named files; user-added + // files at the same dir survive (e.g., an org's custom agent). Dir-level + // `.history`/secrets survive (copyTree skips them at the source side, + // and per-file write doesn't touch siblings). + await copyTree(sourceDir, targetDir, /* allowSubdirs */ false); + } else if (domain.kind === 'bundle') { + // For each catalog bundle subdir, rm -rf the corresponding target + // bundle (if override) then copy. Domain-root siblings (.history/, + // *.secrets.json at the domain dir level) survive — we only touch + // bundle subdirs that exist in the catalog. + let bundles: string[]; + try { + bundles = await readdir(sourceDir); + } catch (err) { + if (errnoCode(err) === 'ENOENT') return; + throw err; + } + for (const bundleName of bundles) { + if (bundleName.startsWith('.')) continue; + if (SKIP_DIR_NAMES.has(bundleName)) continue; + const bundleSrc = path.join(sourceDir, bundleName); + const bundleDst = path.join(targetDir, bundleName); + const info = await lstat(bundleSrc).catch(() => null); + if (!info || info.isSymbolicLink() || !info.isDirectory()) continue; + if (override) { + await rm(bundleDst, { recursive: true, force: true }); + } + await copyTree(bundleSrc, bundleDst, /* allowSubdirs */ true); + } } else { - await atomicWriteBuffer(dst, buf); + // 'tree' — workflows + branding. Per-file overwrite, no rm. User-only + // subdirs / files survive intact (e.g. an org's custom workflow folder, + // an uploaded branding/images/logo.png). + await copyTree(sourceDir, targetDir, /* allowSubdirs */ true); } + } catch (err) { + console.error( + `[scaffold] ${domain.name}: copy failed for org "${orgSlug}":`, + err instanceof Error ? err.message : err, + ); + // Continue with other domains; partial scaffolding is better than none. + } +} + +/** + * Retention is one JSON object per org (`/retention.json`), not a + * subtree. Special-cased outside the DOMAINS loop. + */ +async function seedRetention( + catalogRoot: string | undefined, + orgSlug: string, + override: boolean, +): Promise { + const sourceFile = catalogRoot + ? path.join(catalogRoot, 'default', 'retention.json') + : path.join(process.env.TALE_CONFIG_DIR ?? '', 'default', 'retention.json'); + const targetFile = path.join( + process.env.TALE_CONFIG_DIR ?? '', + orgSlug, + 'retention.json', + ); + + const sourceExists = await stat(sourceFile) + .then(() => true) + .catch((err) => { + if (errnoCode(err) !== 'ENOENT') { + console.warn('[scaffold] retention: stat failed:', sourceFile, err); + } + return false; + }); + if (!sourceExists) return; + + if (await pathsOverlap(sourceFile, targetFile)) { + console.warn(`[scaffold] retention: source and target overlap; skipping`); + return; + } + + const targetExists = await stat(targetFile) + .then(() => true) + .catch(() => false); + if (targetExists && !override) { + console.warn( + `[scaffold] retention: target ${targetFile} exists, skipping (use override:true to reseed)`, + ); + return; + } + + try { + const buf = await readFile(sourceFile); + await atomicWrite(targetFile, buf.toString('utf-8')); + } catch (err) { + console.error( + `[scaffold] retention: copy failed for org "${orgSlug}":`, + err instanceof Error ? err.message : err, + ); } } /** - * Remove a deleted org's per-domain filesystem dirs. Safety: - * - Refuses the `default` slug (the global/system org's baseline). - * - Uses each domain's own resolver so we only touch paths that follow - * the established convention (no manual string-building). - * - Verifies the resolved per-org dir is strictly inside the domain's - * base dir via `verifyPathWithinBase` — blocks slug traversal like - * `../foo` even though `validateOrgSlug` should have already caught it. - * - ENOENT on the per-org dir is silently ignored (idempotent; nothing - * to clean up). + * Remove a deleted org's entire `/` subtree under + * `${TALE_CONFIG_DIR}`. Safety: + * - TALE_CONFIG_DIR must be set + absolute. + * - Refuses the literal `default` slug. + * - Validates the slug via `validateOrgSlug` so a NULL / `..` / cased + * slug from a misbehaving caller can't slip through. + * - `verifyPathWithinBase` enforces strict descendant-of-root containment. + * - `lstat`-refuses a symlink at the org dir itself: `verifyPathWithinBase` + * only realpath's the dirname, so a pre-placed symlink at + * `/` would otherwise be followed by `rm -rf` to + * arbitrary filesystem locations. + * - Two-phase rename-then-delete: rename to a `.deleted--` + * sibling first (atomic), then `rm -rf` the renamed path. Concurrent + * writers of the original path fail with ENOENT instead of racing + * the recursive delete. + * - Drops `{ force: true }` — `force` masks EACCES/EBUSY silently; + * surface errors via the explicit ENOENT branch + error logging. + * - ENOENT on the org dir is idempotent (nothing to clean up). */ export const cleanupOrgFilesystem = internalAction({ args: { @@ -184,6 +404,14 @@ export const cleanupOrgFilesystem = internalAction({ }, returns: v.null(), handler: async (_ctx, args) => { + const root = process.env.TALE_CONFIG_DIR; + if (!root || !path.isAbsolute(root)) { + console.error( + '[cleanupOrgFilesystem] TALE_CONFIG_DIR is unset or not absolute; refusing to proceed', + ); + return null; + } + if (args.orgSlug === 'default') { console.warn( '[cleanupOrgFilesystem] refusing to delete the default org filesystem', @@ -191,47 +419,73 @@ export const cleanupOrgFilesystem = internalAction({ return null; } - for (const domain of DOMAINS) { - const baseDir = domain.resolve('default'); - let targetDir: string; - try { - targetDir = domain.resolve(args.orgSlug); - } catch (err) { - console.warn( - `[cleanupOrgFilesystem] ${domain.name}: skipping invalid slug "${args.orgSlug}":`, - err instanceof Error ? err.message : err, - ); - continue; - } + if (!validateOrgSlug(args.orgSlug)) { + console.warn( + `[cleanupOrgFilesystem] refusing invalid slug "${args.orgSlug}"`, + ); + return null; + } - // The default-org's base dir is the per-domain baseDir itself; a - // per-org dir must be a strict descendant, never equal. - if (targetDir === baseDir) { - console.warn( - `[cleanupOrgFilesystem] ${domain.name}: target equals base dir, skipping`, - ); - continue; - } + const orgDir = path.join(root, args.orgSlug); + if (path.resolve(orgDir) === path.resolve(root)) { + console.warn( + `[cleanupOrgFilesystem] computed orgDir equals root, refusing`, + ); + return null; + } - try { - await verifyPathWithinBase(targetDir, baseDir); - } catch (err) { - console.warn( - `[cleanupOrgFilesystem] ${domain.name}: path traversal guard tripped for "${args.orgSlug}":`, - err instanceof Error ? err.message : err, - ); - continue; - } + try { + await verifyPathWithinBase(orgDir, root); + } catch (err) { + console.warn( + `[cleanupOrgFilesystem] path traversal guard tripped for "${args.orgSlug}":`, + err instanceof Error ? err.message : err, + ); + return null; + } - try { - await rm(targetDir, { recursive: true, force: true }); - } catch (err) { - if (errnoCode(err) === 'ENOENT') continue; - console.error( - `[cleanupOrgFilesystem] ${domain.name}: failed to remove "${targetDir}":`, - err instanceof Error ? err.message : err, - ); - } + // Symlink hijack defense: verifyPathWithinBase leaves the basename + // unresolved. If / is itself a symlink (placed by an + // attacker or a misconfigured operator), rm -rf would follow it and + // delete arbitrary filesystem locations. Refuse explicitly here. + const info = await lstat(orgDir).catch((err) => { + if (errnoCode(err) === 'ENOENT') return null; + console.warn( + `[cleanupOrgFilesystem] lstat failed for "${orgDir}":`, + err instanceof Error ? err.message : err, + ); + return null; + }); + if (!info) return null; + if (info.isSymbolicLink()) { + console.error( + `[cleanupOrgFilesystem] refusing to delete symlinked org dir at "${orgDir}"`, + ); + return null; + } + + // Two-phase rename-then-delete. The rename is atomic within a + // filesystem; any concurrent writer of the original path fails with + // ENOENT instead of racing the recursive delete. + const condemned = path.join(root, `.deleted-${args.orgSlug}-${Date.now()}`); + try { + await rename(orgDir, condemned); + } catch (err) { + if (errnoCode(err) === 'ENOENT') return null; + console.error( + `[cleanupOrgFilesystem] rename failed for "${orgDir}" → "${condemned}":`, + err instanceof Error ? err.message : err, + ); + return null; + } + + try { + await rm(condemned, { recursive: true }); + } catch (err) { + console.error( + `[cleanupOrgFilesystem] rm failed for "${condemned}" (org dir was renamed but not fully removed; manual cleanup required):`, + err instanceof Error ? err.message : err, + ); } return null; @@ -241,74 +495,30 @@ export const cleanupOrgFilesystem = internalAction({ export const scaffoldNewOrganization = internalAction({ args: { orgSlug: v.string(), + /** + * When true, overwrite the catalog-named subset of files in each + * domain, preserving `*.secrets.json` and `.history/`. When false + * (default), skip per-domain if the target already has visible + * files (idempotent org-create path). + */ + override: v.optional(v.boolean()), }, returns: v.null(), handler: async (_ctx, args) => { - if (args.orgSlug === 'default') { - // The default org's files are seeded by the Docker entrypoint; nothing to do. + if (!validateOrgSlug(args.orgSlug)) { + console.warn( + `[scaffoldNewOrganization] refusing invalid slug "${args.orgSlug}"`, + ); return null; } - const builtinRoot = process.env[BUILTIN_ENV]; + const catalogRoot = process.env[BUILTIN_ENV]; + const override = args.override ?? false; for (const domain of DOMAINS) { - // Prefer `$TALE_CONFIG_BUILTIN_DIR//` (set by platform - // Dockerfile, pushed into Convex's deployment env). Falls back to - // the default org's dir when the env is unset — covers local - // `bun dev` (no catalog built) and a rollback to a platform image - // that doesn't declare the env. - const sourceDir = builtinRoot - ? path.join(builtinRoot, domain.name) - : domain.resolve('default'); - const targetDir = domain.resolve(args.orgSlug); - - // copyTree's ENOENT-silent contract is correct for the fallback case - // (default-org dir may legitimately not be seeded yet). But when an - // operator-configured catalog path doesn't exist, that's a deploy - // misconfig (e.g., platform/convex image version skew) and the - // resulting zero-seed should NOT look like a successful copy. Probe - // explicitly so the failure surfaces in logs. - if (builtinRoot) { - const sourceExists = await stat(sourceDir) - .then(() => true) - .catch((err) => { - // ENOENT: catalog domain dir missing — a deploy misconfig - // (platform/convex image skew). Other errors (EACCES, EIO) are a - // distinct failure; log each accurately rather than mislabelling - // a permission error as "does not exist". - if (errnoCode(err) === 'ENOENT') { - console.error( - `[scaffoldNewOrganization] ${domain.name}: ${BUILTIN_ENV}=${builtinRoot} is set but ${sourceDir} does not exist; new org "${args.orgSlug}" will receive zero seed data for this domain`, - ); - } else { - console.error( - `[scaffoldNewOrganization] ${domain.name}: stat ${sourceDir} failed:`, - err instanceof Error ? err.message : err, - ); - } - return false; - }); - if (!sourceExists) continue; - } - - const alreadyScaffolded = await dirHasFiles(targetDir); - if (alreadyScaffolded) { - console.warn( - `[scaffoldNewOrganization] ${domain.name}: target ${targetDir} already has files, skipping`, - ); - continue; - } - - try { - await copyTree(sourceDir, targetDir, !domain.flat); - } catch (err) { - console.error( - `[scaffoldNewOrganization] ${domain.name}: copy failed for org "${args.orgSlug}":`, - err instanceof Error ? err.message : err, - ); - // Continue with other domains; partial scaffolding is better than none. - } + await seedDomain(domain, catalogRoot, args.orgSlug, override); } + await seedRetention(catalogRoot, args.orgSlug, override); return null; }, diff --git a/services/platform/convex/providers/file_utils.ts b/services/platform/convex/providers/file_utils.ts index 531788a44e..a55c7fd2e0 100644 --- a/services/platform/convex/providers/file_utils.ts +++ b/services/platform/convex/providers/file_utils.ts @@ -92,22 +92,16 @@ export function parseProviderSecrets( return result.data; } -function getBaseDir(): string { - const dir = process.env.PROVIDERS_DIR; - if (dir) return dir; +function getConfigRoot(): string { const configDir = process.env.TALE_CONFIG_DIR; - if (configDir) return path.join(configDir, 'providers'); - throw new Error( - 'Neither TALE_CONFIG_DIR nor PROVIDERS_DIR environment variable is set.', - ); + if (configDir) return configDir; + throw new Error('TALE_CONFIG_DIR environment variable is not set.'); } export function resolveProvidersDir(orgSlug: string): string { if (!validateOrgSlug(orgSlug)) throw new Error(`Invalid org slug: ${orgSlug}`); - const baseDir = getBaseDir(); - if (orgSlug === 'default') return baseDir; - return path.join(baseDir, orgSlug); + return path.join(getConfigRoot(), orgSlug, 'providers'); } export function resolveProviderFilePath( diff --git a/services/platform/convex/skills/file_actions.ts b/services/platform/convex/skills/file_actions.ts index 3f043e9ed4..a46b693269 100644 --- a/services/platform/convex/skills/file_actions.ts +++ b/services/platform/convex/skills/file_actions.ts @@ -477,7 +477,7 @@ export const listSkills = action({ const dir = resolveSkillsDir(orgSlug); const entries = await readdirSafe(dir); const slugs = entries.filter( - (e) => !e.startsWith('.') && !e.startsWith('@') && validateSkillSlug(e), + (e) => !e.startsWith('.') && validateSkillSlug(e), ); const results = await Promise.all( @@ -992,9 +992,7 @@ export const listSkillsForExecution = internalAction({ handler: async (_ctx, args) => { const dir = resolveSkillsDir(args.orgSlug); const entries = await readdirSafe(dir); - return entries.filter( - (e) => !e.startsWith('.') && !e.startsWith('@') && validateSkillSlug(e), - ); + return entries.filter((e) => !e.startsWith('.') && validateSkillSlug(e)); }, }); diff --git a/services/platform/convex/skills/file_utils.test.ts b/services/platform/convex/skills/file_utils.test.ts index a4eee8c0fa..99d5813a23 100644 --- a/services/platform/convex/skills/file_utils.test.ts +++ b/services/platform/convex/skills/file_utils.test.ts @@ -13,30 +13,41 @@ import { validateSkillSlug, } from './file_utils'; -let skillsRoot: string; -let prevSkillsDir: string | undefined; +// Under the uniform org-first layout, every org's skills live at +// `${TALE_CONFIG_DIR}//skills/` — including the default org +// (which is no longer special-cased). All resolvers compose on top of +// `${TALE_CONFIG_DIR}`; the per-domain SKILLS_DIR override has been dropped. +let configRoot: string; let prevTaleConfigDir: string | undefined; +let prevSkillsDir: string | undefined; beforeEach(async () => { - skillsRoot = await mkdtemp(path.join(tmpdir(), 'skills-test-')); - prevSkillsDir = process.env.SKILLS_DIR; + configRoot = await mkdtemp(path.join(tmpdir(), 'skills-test-')); prevTaleConfigDir = process.env.TALE_CONFIG_DIR; - process.env.SKILLS_DIR = skillsRoot; - delete process.env.TALE_CONFIG_DIR; + prevSkillsDir = process.env.SKILLS_DIR; + process.env.TALE_CONFIG_DIR = configRoot; + // Explicitly clear the legacy per-domain override so its presence in the + // shell env can't accidentally satisfy any leftover fallback. + delete process.env.SKILLS_DIR; }); afterEach(async () => { - if (prevSkillsDir === undefined) { - delete process.env.SKILLS_DIR; + if (prevTaleConfigDir === undefined) { + delete process.env.TALE_CONFIG_DIR; } else { - process.env.SKILLS_DIR = prevSkillsDir; - } - if (prevTaleConfigDir !== undefined) { process.env.TALE_CONFIG_DIR = prevTaleConfigDir; } - await rm(skillsRoot, { recursive: true, force: true }); + if (prevSkillsDir !== undefined) { + process.env.SKILLS_DIR = prevSkillsDir; + } + await rm(configRoot, { recursive: true, force: true }); }); +// Helper: where this test's "default org skills dir" lives under org-first. +function defaultSkillsDir(): string { + return path.join(configRoot, 'default', 'skills'); +} + describe('validateSkillSlug', () => { it('accepts hyphen-separated lowercase slugs', () => { expect(validateSkillSlug('code-reviewer')).toBe(true); @@ -67,14 +78,14 @@ describe('validateSkillSlug', () => { }); }); -describe('resolveSkillsDir (org isolation)', () => { - it('default org uses base dir directly', () => { - expect(resolveSkillsDir('default')).toBe(skillsRoot); +describe('resolveSkillsDir (org isolation, org-first)', () => { + it('default org lives at /default/skills/', () => { + expect(resolveSkillsDir('default')).toBe(defaultSkillsDir()); }); - it('other orgs live under @/', () => { + it('other orgs live at //skills/ (no @-prefix)', () => { expect(resolveSkillsDir('acme-corp')).toBe( - path.join(skillsRoot, '@acme-corp'), + path.join(configRoot, 'acme-corp', 'skills'), ); }); @@ -85,9 +96,9 @@ describe('resolveSkillsDir (org isolation)', () => { }); describe('resolveSkillDir', () => { - it('returns path under skills root', () => { + it('returns path under /skills/', () => { const p = resolveSkillDir('default', 'code-reviewer'); - expect(p).toBe(path.join(skillsRoot, 'code-reviewer')); + expect(p).toBe(path.join(defaultSkillsDir(), 'code-reviewer')); }); it('rejects invalid slugs upstream', () => { @@ -99,7 +110,7 @@ describe('resolveSkillDir', () => { describe('resolveSkillMdPath', () => { it('appends SKILL.md', () => { expect(resolveSkillMdPath('default', 'code-reviewer')).toBe( - path.join(skillsRoot, 'code-reviewer', 'SKILL.md'), + path.join(defaultSkillsDir(), 'code-reviewer', 'SKILL.md'), ); }); }); @@ -112,7 +123,7 @@ describe('resolveSkillAssetPath (traversal hardening)', () => { 'scripts/extract.py', ); expect(p).toBe( - path.join(skillsRoot, 'pdf-extractor', 'scripts', 'extract.py'), + path.join(defaultSkillsDir(), 'pdf-extractor', 'scripts', 'extract.py'), ); }); @@ -161,11 +172,11 @@ describe('resolveSkillAssetPath (traversal hardening)', () => { describe('resolveSkillAssetPathChecked (realpath / symlink defense)', () => { it('catches a symlink planted as an intermediate directory', async () => { - // skills//escape → ../../outside + // /default/skills//escape → ../../../outside const slug = 'symlink-test'; - const skillDir = path.join(skillsRoot, slug); + const skillDir = path.join(defaultSkillsDir(), slug); await mkdir(skillDir, { recursive: true }); - const outside = path.join(skillsRoot, '..', 'outside'); + const outside = path.join(configRoot, 'outside'); await mkdir(outside, { recursive: true }); await symlink(outside, path.join(skillDir, 'escape')); @@ -178,7 +189,7 @@ describe('resolveSkillAssetPathChecked (realpath / symlink defense)', () => { it('allows asset reads through a real subdirectory', async () => { const slug = 'normal-test'; - const dir = path.join(skillsRoot, slug, 'scripts'); + const dir = path.join(defaultSkillsDir(), slug, 'scripts'); await mkdir(dir, { recursive: true }); await writeFile(path.join(dir, 'run.py'), 'print("ok")'); @@ -187,6 +198,8 @@ describe('resolveSkillAssetPathChecked (realpath / symlink defense)', () => { slug, 'scripts/run.py', ); - expect(resolved).toBe(path.join(skillsRoot, slug, 'scripts', 'run.py')); + expect(resolved).toBe( + path.join(defaultSkillsDir(), slug, 'scripts', 'run.py'), + ); }); }); diff --git a/services/platform/convex/skills/file_utils.ts b/services/platform/convex/skills/file_utils.ts index e0fb07ad08..3af44e3b14 100644 --- a/services/platform/convex/skills/file_utils.ts +++ b/services/platform/convex/skills/file_utils.ts @@ -8,11 +8,11 @@ * agents/file_utils.ts and integrations/file_utils.ts but uses Markdown + * YAML frontmatter as the wire format (per agentskills.io spec). * - * Org isolation: default org sits at `${SKILLS_DIR}/`; other orgs live - * under `${SKILLS_DIR}/@/` — same `@` prefix convention used by - * integrations. Every resolver applies a path-traversal guard plus a - * `verifyPathWithinBase` realpath check so symlinks planted in the bundle - * cannot escape the skill's directory. + * Org isolation: every org's skills live under + * `${TALE_CONFIG_DIR}//skills/` — uniform org-first layout. Every + * resolver applies a path-traversal guard plus a `verifyPathWithinBase` + * realpath check so symlinks planted in the bundle cannot escape the + * skill's directory. */ import { constants, lstat, open } from 'node:fs/promises'; @@ -94,30 +94,25 @@ export function validateSkillSlug(slug: string): boolean { return true; } -function getBaseDir(): string { - const dir = process.env.SKILLS_DIR; - if (dir) return dir; +function getConfigRoot(): string { const configDir = process.env.TALE_CONFIG_DIR; - if (configDir) return path.join(configDir, 'skills'); + if (configDir) return configDir; throw new Error( - 'Neither TALE_CONFIG_DIR nor SKILLS_DIR environment variable is set. ' + - 'Set TALE_CONFIG_DIR in .env to the root config directory ' + + 'TALE_CONFIG_DIR environment variable is not set. ' + + 'Set it to the root config directory ' + '(e.g., TALE_CONFIG_DIR=/path/to/tale/examples).', ); } /** - * Resolve the skills directory for an organization. Default org uses the - * base directly; every other org lives under a `@/` prefix — - * matches the convention enforced by integrations and agents. + * Resolve the skills directory for an organization. Org-first: + * `${TALE_CONFIG_DIR}//skills/`. */ export function resolveSkillsDir(orgSlug: string): string { if (!validateOrgSlug(orgSlug)) { throw new Error(`Invalid org slug: ${orgSlug}`); } - const baseDir = getBaseDir(); - if (orgSlug === 'default') return baseDir; - return path.join(baseDir, `@${orgSlug}`); + return path.join(getConfigRoot(), orgSlug, 'skills'); } export function resolveSkillDir(orgSlug: string, slug: string): string { diff --git a/services/platform/convex/workflows/file_utils.ts b/services/platform/convex/workflows/file_utils.ts index 2c97a61c7c..9bfcf3debd 100644 --- a/services/platform/convex/workflows/file_utils.ts +++ b/services/platform/convex/workflows/file_utils.ts @@ -75,32 +75,26 @@ export function urlParamToSlug(param: string): string { return param.replace(new RegExp(SLUG_SEPARATOR, 'g'), '/'); } -function getBaseDir(): string { - const dir = process.env.WORKFLOWS_DIR; - if (dir) return dir; +function getConfigRoot(): string { const configDir = process.env.TALE_CONFIG_DIR; - if (configDir) return path.join(configDir, 'workflows'); + if (configDir) return configDir; throw new Error( - 'Neither TALE_CONFIG_DIR nor WORKFLOWS_DIR environment variable is set. ' + - 'Set TALE_CONFIG_DIR in .env to the root config directory ' + + 'TALE_CONFIG_DIR environment variable is not set. ' + + 'Set it to the root config directory ' + '(e.g., TALE_CONFIG_DIR=/path/to/tale/examples).', ); } /** - * Resolve the workflows directory for an organization. - * Default org uses the base dir directly. - * Other orgs use `{baseDir}/@{orgSlug}/` to prevent collision with workflow folders. + * Resolve the workflows directory for an organization. Org-first: + * `${TALE_CONFIG_DIR}//workflows/`. No `@`-prefix collision concern + * here since workflow folders live inside the per-org subtree. */ export function resolveWorkflowsDir(orgSlug: string): string { if (!validateOrgSlug(orgSlug)) { throw new Error(`Invalid org slug: ${orgSlug}`); } - const baseDir = getBaseDir(); - if (orgSlug === 'default') { - return baseDir; - } - return path.join(baseDir, `@${orgSlug}`); + return path.join(getConfigRoot(), orgSlug, 'workflows'); } /** diff --git a/services/platform/docker-entrypoint.sh b/services/platform/docker-entrypoint.sh index bcfe181bac..0ac3c78b6d 100644 --- a/services/platform/docker-entrypoint.sh +++ b/services/platform/docker-entrypoint.sh @@ -227,30 +227,25 @@ deploy_convex_functions() { CONVEX_ENV_MAP["$key"]="${line#*=}" done <<< "$CONVEX_ENV_OUTPUT" - # One-shot cleanup: remove env vars that earlier Tale versions auto-pushed - # but the current architecture derives from TALE_CONFIG_DIR. - # - # Safety: only remove the var if its current value matches the auto-derived - # path (i.e. it's a stale auto-push, not an operator's custom override). - # An override like AGENTS_DIR=/data/custom-agents is preserved untouched. - local config_dir="${TALE_CONFIG_DIR:-/app/data}" - local -A ORPHAN_DERIVED=( - [AGENTS_DIR]="${config_dir}/agents" - [WORKFLOWS_DIR]="${config_dir}/workflows" - [INTEGRATIONS_DIR]="${config_dir}/integrations" - [PROVIDERS_DIR]="${config_dir}/providers" + # Unconditional purge: the per-domain env overrides (AGENTS_DIR / + # WORKFLOWS_DIR / INTEGRATIONS_DIR / PROVIDERS_DIR / SKILLS_DIR) are no + # longer honored by the resolvers under the uniform org-first layout. + # Remove them from the Convex deployment env on every boot, regardless + # of whether they look auto-derived or operator-customized. Operators + # who previously relied on a custom value must now point TALE_CONFIG_DIR + # at the root and use the `//` subtree. + local -a LEGACY_DOMAIN_VARS=( + AGENTS_DIR + WORKFLOWS_DIR + INTEGRATIONS_DIR + PROVIDERS_DIR + SKILLS_DIR ) - for orphan in "${!ORPHAN_DERIVED[@]}"; do - if [ "${CONVEX_ENV_MAP[$orphan]+_}" ]; then - local current="${CONVEX_ENV_MAP[$orphan]}" - local derived="${ORPHAN_DERIVED[$orphan]}" - if [ "$current" = "$derived" ]; then - if bunx convex env remove "$orphan" --url "$CONVEX_URL" --admin-key "$ADMIN_KEY" >/dev/null 2>&1; then - echo " ✓ $orphan (orphan removed — derived from TALE_CONFIG_DIR)" - unset 'CONVEX_ENV_MAP[$orphan]' - fi - else - log_info "$orphan=$current preserved (custom override; not the derived $derived)" + for legacy in "${LEGACY_DOMAIN_VARS[@]}"; do + if [ "${CONVEX_ENV_MAP[$legacy]+_}" ]; then + if bunx convex env remove "$legacy" --url "$CONVEX_URL" --admin-key "$ADMIN_KEY" >/dev/null 2>&1; then + echo " ✓ $legacy removed (no longer honored under org-first layout)" + unset 'CONVEX_ENV_MAP[$legacy]' fi fi done diff --git a/services/platform/env.sh b/services/platform/env.sh index 107e8af686..94972118f9 100644 --- a/services/platform/env.sh +++ b/services/platform/env.sh @@ -55,9 +55,12 @@ env_normalize_common() { export INSTANCE_NAME="tale_platform" export INSTANCE_SECRET="${INSTANCE_SECRET}" - # Root config directory. Sub-dirs (agents/workflows/integrations/providers) - # are derived inside Convex via `convex/*/file_utils.ts` — no need to set - # AGENTS_DIR / WORKFLOWS_DIR / INTEGRATIONS_DIR / PROVIDERS_DIR explicitly. + # Root config directory. Per-org subtrees live at $TALE_CONFIG_DIR// + # with one subdir per domain (agents/, workflows/, providers/, etc.). + # Per-domain env overrides (AGENTS_DIR / WORKFLOWS_DIR / INTEGRATIONS_DIR / + # PROVIDERS_DIR / SKILLS_DIR) are no longer honored — set TALE_CONFIG_DIR + # only. The entrypoint purges those legacy vars from the Convex deployment + # env on every boot. export TALE_CONFIG_DIR="${TALE_CONFIG_DIR:-/app/data}" # Site URL - the canonical base URL for the platform (required) diff --git a/services/platform/lib/config-watcher.ts b/services/platform/lib/config-watcher.ts index 9851a3122c..1ee647ed3a 100644 --- a/services/platform/lib/config-watcher.ts +++ b/services/platform/lib/config-watcher.ts @@ -15,27 +15,37 @@ interface ConfigChangeEvent { } const ATOMIC_WRITE_TMP_RE = /\.\d+\.[a-f0-9]{8}\.tmp$/; +// Must match validateOrgSlug at services/platform/convex/lib/file_io.ts. +const ORG_SLUG_REGEX = /^[a-z0-9][a-z0-9_-]*$/; /** - * Parse a relative path within the config directory into a structured event. + * Parse a relative path within the config directory into a structured event, + * under the uniform org-first layout `${TALE_CONFIG_DIR}///...`. * - * Examples: - * agents/my-agent.json → { type: 'agent', slug: 'my-agent' } - * agents/@acme/my-agent.json → { type: 'agent', orgSlug: 'acme', slug: 'my-agent' } - * workflows/general/hello.json → { type: 'workflow', slug: 'general/hello' } - * workflows/@acme/hello.json → { type: 'workflow', orgSlug: 'acme', slug: 'hello' } - * integrations/slack/config.json → { type: 'integration', slug: 'slack' } - * integrations/@acme/slack/config.json → { type: 'integration', orgSlug: 'acme', slug: 'slack' } - * branding/branding.json → { type: 'branding' } + * Examples (with `default` as one possible orgSlug): + * default/agents/my-agent.json → { type: 'agents', orgSlug: 'default', slug: 'my-agent' } + * acme/agents/my-agent.json → { type: 'agents', orgSlug: 'acme', slug: 'my-agent' } + * default/workflows/general/hello.json → { type: 'workflows', orgSlug: 'default', slug: 'general/hello' } + * default/integrations/slack/config.json → { type: 'integrations', orgSlug: 'default', slug: 'slack' } + * default/branding/branding.json → { type: 'branding', orgSlug: 'default' } + * default/skills/code-reviewer/SKILL.md → { type: 'skills', orgSlug: 'default', slug: 'code-reviewer' } + * + * Returns null for paths that don't fit the `//` shape + * (org slug must validate; domain must be recognized). */ function parseConfigChange(relativePath: string): ConfigChangeEvent | null { const parts = relativePath.split('/'); if (parts.length < 2) return null; - const topDir = parts[0]; + const orgSlug = parts[0]; + if (!ORG_SLUG_REGEX.test(orgSlug)) return null; + + const domain = parts[1]; - if (topDir === 'branding') { - return { type: 'branding' }; + if (domain === 'branding') { + // Branding is default-only on the read side, but still emit per-org so + // future per-org branding (or operator inspection) sees the event. + return { type: 'branding', orgSlug }; } const typeMap: Record = { @@ -46,46 +56,38 @@ function parseConfigChange(relativePath: string): ConfigChangeEvent | null { skills: 'skills', }; - const type = typeMap[topDir]; + const type = typeMap[domain]; if (!type) return null; - const rest = parts.slice(1); - let orgSlug: string | undefined; - - // If the first segment after the top dir starts with @, it's an org slug - if (rest[0]?.startsWith('@')) { - orgSlug = rest[0].slice(1); - rest.shift(); - } - + const rest = parts.slice(2); if (rest.length === 0) return null; if (type === 'agents') { - // agents/[@org/]name.json + // /agents/.json const filename = rest[0]; return { type, orgSlug, slug: filename.replace(/\.json$/, '') }; } if (type === 'workflows') { - // workflows/[@org/][folder/]name.json — slug is the path without extension + // /workflows/[folder/]name.json — slug is the path without extension const slug = rest.join('/').replace(/\.json$/, ''); return { type, orgSlug, slug }; } if (type === 'integrations') { - // integrations/[@org/]slug/config.json + // /integrations//config.json (or other bundle files) const slug = rest[0]; return { type, orgSlug, slug }; } if (type === 'providers') { - // providers/[@org/]name.json + // /providers/.json const filename = rest[0]; return { type, orgSlug, slug: filename.replace(/\.json$/, '') }; } if (type === 'skills') { - // skills/[@org/]slug/SKILL.md (or any asset under the slug dir). + // /skills//SKILL.md (or any asset under the slug dir). // Emit at slug granularity so a write to scripts/x.py invalidates the // same query keys as a SKILL.md write. const slug = rest[0]; diff --git a/services/platform/lib/shared/schemas/governance.ts b/services/platform/lib/shared/schemas/governance.ts index dc4ab1a2d6..6368055ed8 100644 --- a/services/platform/lib/shared/schemas/governance.ts +++ b/services/platform/lib/shared/schemas/governance.ts @@ -161,7 +161,7 @@ export type UploadPolicyConfig = z.infer; /** * Per-org retention policy payload. Schema only validates structural * shape (integer + non-negative); category min/max bounds live in - * `examples/retention/default.json` (or per-org override files) and are + * `examples/default/retention.json` (or per-org override files) and are * enforced at write time by `assertWithinBounds` inside * `upsertRetentionPolicyAction`. Operators tighten or rename bounds by * editing the JSON file; the schema does not duplicate them. diff --git a/services/platform/lib/shared/schemas/retention.test.ts b/services/platform/lib/shared/schemas/retention.test.ts index 9d85f28c97..d4eed59ebd 100644 --- a/services/platform/lib/shared/schemas/retention.test.ts +++ b/services/platform/lib/shared/schemas/retention.test.ts @@ -96,7 +96,7 @@ describe('retentionBoundDefSchema', () => { }); describe('retentionDefaultsConfigSchema', () => { - it('accepts examples/retention/default.json (every category + root envPrefix + full envNames map)', () => { + it('accepts examples/default/retention.json (every category + root envPrefix + full envNames map)', () => { // Resolve from this test's directory up to repo root, then to examples/. // __dirname is services/platform/lib/shared/schemas/ const examplePath = join( @@ -107,8 +107,8 @@ describe('retentionDefaultsConfigSchema', () => { '..', '..', 'examples', - 'retention', - 'default.json', + 'default', + 'retention.json', ); const content = readFileSync(examplePath, 'utf-8'); const parsed = JSON.parse(content); @@ -121,7 +121,7 @@ describe('retentionDefaultsConfigSchema', () => { // Strict drift check: factory file declares every category and the // root `_metadata.envNames` map covers every (category × field) // pair (16 × 3 = 48 entries). Adding a new category to - // RETENTION_CATEGORIES without updating examples/retention/default.json + // RETENTION_CATEGORIES without updating examples/default/retention.json // fails one of these assertions loudly. expect(typeof parsed._metadata?.envPrefix).toBe('string'); expect(parsed._metadata.envPrefix.length).toBeGreaterThan(0); diff --git a/services/platform/lib/shared/utils/example-agents-normalized.test.ts b/services/platform/lib/shared/utils/example-agents-normalized.test.ts index 245ea9cdd0..8234270dad 100644 --- a/services/platform/lib/shared/utils/example-agents-normalized.test.ts +++ b/services/platform/lib/shared/utils/example-agents-normalized.test.ts @@ -7,19 +7,23 @@ import type { AgentJsonConfig } from '../../../convex/agents/file_utils'; import { isNormalized, normalizeAgentConfig } from './normalize-agent-config'; /** - * Every agent JSON in `examples/agents/` is treated as part of the shipped - * product — new orgs scaffold their agent directory by copying these files - * via `scaffoldNewOrganization`, which goes around the `normalizeAgentConfig` - * write boundary. If an example ever drifts into a non-normalized shape - * (legacy top-level co-existing with i18n[defaultLocale], empty-string - * placeholders, etc.), new orgs will inherit the pollution on creation. + * Every agent JSON in `examples/default/agents/` is treated as part of the + * shipped product — new orgs scaffold their agent directory by copying these + * files via `scaffoldNewOrganization`, which goes around the + * `normalizeAgentConfig` write boundary. If an example ever drifts into a + * non-normalized shape (legacy top-level co-existing with i18n[defaultLocale], + * empty-string placeholders, etc.), new orgs will inherit the pollution on + * creation. * * This test pins the invariant at build time so any drift fails CI. */ -const EXAMPLES_DIR = path.resolve(__dirname, '../../../../../examples/agents'); +const EXAMPLES_DIR = path.resolve( + __dirname, + '../../../../../examples/default/agents', +); -describe('examples/agents/*.json invariants', () => { +describe('examples/default/agents/*.json invariants', () => { const files = readdirSync(EXAMPLES_DIR).filter((f) => f.endsWith('.json')); it('discovered at least one example agent', () => { diff --git a/services/platform/server.ts b/services/platform/server.ts index 3e02354808..a02df47df1 100644 --- a/services/platform/server.ts +++ b/services/platform/server.ts @@ -83,8 +83,11 @@ const port = process.env.PORT || 3000; const moduleDir = dirname(fileURLToPath(import.meta.url)); const distDir = join(moduleDir, 'dist'); const distSeoDir = join(moduleDir, 'dist-seo'); +// Branding is default-only on the read side (see branding/file_actions.ts — +// every reader passes the literal 'default'). On-disk location follows the +// uniform org-first layout: `${TALE_CONFIG_DIR}/default/branding/images/`. const brandingImagesDir = process.env.TALE_CONFIG_DIR - ? join(process.env.TALE_CONFIG_DIR, 'branding', 'images') + ? join(process.env.TALE_CONFIG_DIR, 'default', 'branding', 'images') : null; // Lazily loaded once per process. The manifest is read on the first diff --git a/services/platform/vite-plugins/serve-branding-images.ts b/services/platform/vite-plugins/serve-branding-images.ts index 9d74f69413..e05fe95e4d 100644 --- a/services/platform/vite-plugins/serve-branding-images.ts +++ b/services/platform/vite-plugins/serve-branding-images.ts @@ -14,8 +14,12 @@ const MIME_TYPES: Record = { }; export function serveBrandingImages(): Plugin { + // Branding is default-only on the read side (see branding/file_actions.ts). + // On-disk location: `${TALE_CONFIG_DIR}/default/branding/images/`. const configDir = process.env.TALE_CONFIG_DIR; - const imagesDir = configDir ? join(configDir, 'branding', 'images') : null; + const imagesDir = configDir + ? join(configDir, 'default', 'branding', 'images') + : null; return { name: 'serve-branding-images', diff --git a/tools/cli/src/commands/deploy/index.ts b/tools/cli/src/commands/deploy/index.ts index 4fe3f59c49..c7d9816dd5 100644 --- a/tools/cli/src/commands/deploy/index.ts +++ b/tools/cli/src/commands/deploy/index.ts @@ -34,13 +34,15 @@ export function createDeployCommand(): Command { ) .option('-q, --quiet', 'Suppress container logs during deployment') .option( - '-y, --yes', - 'Non-interactive: automatically accept any pending migrations', + '--override-all', + 'After deploy, factory-reseed the builtin catalog into ALL orgs server-side ' + + '(preserves *.secrets.json, .history/, and uploaded branding/images/). ' + + 'Implies --all (recreates stateful services so the new entrypoint runs).', false, ) .option( - '--migrate-volumes', - '[deprecated] alias for --yes; will be removed in a future release', + '-y, --yes', + 'Non-interactive: auto-accept destructive confirmation prompts (e.g. --override-all)', false, ) .action(async (options) => { @@ -84,22 +86,20 @@ export function createDeployCommand(): Command { services = serviceList as ServiceName[]; } - if (options.migrateVolumes && !options.yes) { - logger.warn( - '--migrate-volumes is deprecated; use --yes for non-interactive migration acceptance.', - ); - } const hostAlias = options.host ?? process.env.HOST ?? 'tale.local'; await deploy({ version, - updateStateful: options.all, + // --override-all implies --all so the convex container restarts + // with the new entrypoint + new code before the reseed action runs. + updateStateful: options.all || options.overrideAll, env, hostAlias, dryRun: options.dryRun, services, override: options.override, + overrideAll: options.overrideAll, quiet: options.quiet, - assumeYes: options.yes || options.migrateVolumes, + assumeYes: options.yes, forceRecreate, }); } catch (err) { diff --git a/tools/cli/src/commands/migrate.ts b/tools/cli/src/commands/migrate.ts new file mode 100644 index 0000000000..d2e3b8cbde --- /dev/null +++ b/tools/cli/src/commands/migrate.ts @@ -0,0 +1,41 @@ +import { Command } from 'commander'; + +import { migrateConfigLayout } from '../lib/actions/migrate-config-layout'; +import { requireProject } from '../lib/project/find-project'; +import { resolveProjectContext } from '../lib/project/project-context'; +import * as logger from '../utils/logger'; + +export function createMigrateCommand(): Command { + const migrateCmd = new Command('migrate').description( + 'One-shot, manually-run config migrations', + ); + + migrateCmd + .command('config-layout') + .description( + 'Relocate providers/*.secrets.json from the legacy per-domain layout ' + + 'to the org-first layout. Idempotent; copies (not moves) so old paths ' + + 'remain readable until --cleanup-old runs.', + ) + .option('--dry-run', 'Preview moves without changing files', false) + .option( + '--cleanup-old', + 'After verifying new == old (sha256), remove the old-path secrets. ' + + 'Run only after the new deployment is healthy.', + false, + ) + .action(async (opts: { dryRun?: boolean; cleanupOld?: boolean }) => { + try { + await resolveProjectContext(requireProject()); + await migrateConfigLayout({ + dryRun: opts.dryRun ?? false, + cleanupOld: opts.cleanupOld ?? false, + }); + } catch (err) { + logger.error(err instanceof Error ? err.message : String(err)); + process.exit(1); + } + }); + + return migrateCmd; +} diff --git a/tools/cli/src/commands/start/index.ts b/tools/cli/src/commands/start/index.ts index 636413b154..6b4bc54efa 100644 --- a/tools/cli/src/commands/start/index.ts +++ b/tools/cli/src/commands/start/index.ts @@ -1,36 +1,47 @@ -import { Command } from 'commander'; +import { Command, Option } from 'commander'; import { start } from '../../lib/actions/start'; import * as logger from '../../utils/logger'; export function createStartCommand(): Command { - return new Command('start') - .description('Start Tale platform locally with project files') - .option('-d, --detach', 'run in background') - .option('-p, --port ', 'HTTPS port to expose', '443') - .option('--host ', 'host alias for proxy', 'tale.local') - .option( - '-y, --yes', - 'automatically accept any pending migrations (non-interactive; required in CI/non-TTY)', - ) - .action( - async (opts: { - detach?: boolean; - port: string; - host: string; - yes?: boolean; - }) => { - try { - await start({ - detach: opts.detach, - port: Number(opts.port), - host: opts.host, - assumeYes: opts.yes, - }); - } catch (err) { - logger.error(err instanceof Error ? err.message : String(err)); - process.exit(1); - } - }, - ); + return ( + new Command('start') + .description('Start Tale platform locally with project files') + .option('-d, --detach', 'run in background') + .option('-p, --port ', 'HTTPS port to expose', '443') + .option('--host ', 'host alias for proxy', 'tale.local') + // Hidden back-compat: `tale start -y` used to skip migration prompts. + // The auto-migration framework is gone but operator CI scripts may + // still pass `-y`. Accept and ignore for one release, then remove. + .addOption( + new Option( + '-y, --yes', + '[deprecated] no longer needed (auto-migrations removed); ignored', + ).hideHelp(), + ) + .action( + async (opts: { + detach?: boolean; + port: string; + host: string; + yes?: boolean; + }) => { + try { + if (opts.yes) { + logger.warn( + '--yes/-y is deprecated on `tale start` and ignored; safe to remove from scripts.', + ); + } + await start({ + detach: opts.detach, + port: Number(opts.port), + host: opts.host, + }); + } catch (err) { + logger.error(err instanceof Error ? err.message : String(err)); + process.exit(1); + } + }, + ) + ); } diff --git a/tools/cli/src/index.ts b/tools/cli/src/index.ts index c5d1d16c86..b396d70f55 100644 --- a/tools/cli/src/index.ts +++ b/tools/cli/src/index.ts @@ -10,6 +10,7 @@ import { createDeployCommand } from './commands/deploy'; import { createDoctorCommand } from './commands/doctor'; import { createInitCommand } from './commands/init'; import { createLogsCommand } from './commands/logs'; +import { createMigrateCommand } from './commands/migrate'; import { createResetCommand } from './commands/reset'; import { createRollbackCommand } from './commands/rollback'; import { createStartCommand } from './commands/start'; @@ -47,5 +48,6 @@ program.addCommand(createRollbackCommand()); program.addCommand(createResetCommand()); program.addCommand(createCleanupCommand()); program.addCommand(createDoctorCommand()); +program.addCommand(createMigrateCommand()); await program.parseAsync(); diff --git a/tools/cli/src/lib/actions/deploy.ts b/tools/cli/src/lib/actions/deploy.ts index 5ce54c0144..6f235fe576 100644 --- a/tools/cli/src/lib/actions/deploy.ts +++ b/tools/cli/src/lib/actions/deploy.ts @@ -1,5 +1,5 @@ -import { existsSync } from 'node:fs'; -import { cp, mkdtemp, rm } from 'node:fs/promises'; +import { lstatSync } from 'node:fs'; +import { cp, mkdtemp, readdir, rm } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; @@ -34,8 +34,7 @@ import { getNextColor } from '../state/get-next-color'; import { setCurrentColor } from '../state/set-current-color'; import { setPreviousVersion } from '../state/set-previous-version'; import { withLock } from '../state/with-lock'; -import { MIGRATIONS } from '../upgrade/registry'; -import { runPendingMigrations } from '../upgrade/runner'; +import { reseedAllOrgsFromBuiltin } from './reseed-all-orgs'; async function ensureInfrastructure( prefix: string, @@ -73,11 +72,16 @@ interface DeployOptions { dryRun: boolean; services?: ServiceName[]; override?: boolean; + /** + * Factory-reseed builtin → all orgs after deploy completes. Triggers a + * server-side reseed action; preserves *.secrets.json, .history/, and + * uploaded branding/images/. Combined with `override`, host-push runs + * first, then the all-orgs reseed. + */ + overrideAll?: boolean; quiet?: boolean; - /** Non-interactive acceptance of any pending migrations. */ + /** Non-interactive: accept destructive confirmation prompts (e.g. --override-all). */ assumeYes?: boolean; - /** @deprecated use assumeYes. Kept for one release of CLI back-compat. */ - migrateVolumes?: boolean; /** * Set by the caller when `ensureEnv` filled in auto-gen secrets headlessly * (e.g. an upgrade silently materialized `SANDBOX_TOKEN`). All subsequent @@ -141,56 +145,8 @@ export async function deploy(options: DeployOptions): Promise { const prefix = dryRun ? '[DRY-RUN] ' : ''; logger.header(`${prefix}Deploying Tale ${version}`); - // Detect and apply any pending migrations before deploying. The runner - // prints the plan and prompts the user (default No) when anything is - // pending; non-interactive callers must pass --yes (aliased from the - // deprecated --migrate-volumes). Declining aborts deploy cleanly. - { - const migrationResult = await runPendingMigrations( - MIGRATIONS, - { projectId: getProjectId(), projectDir: env.DEPLOY_DIR }, - { - context: 'deploy', - assumeYes: options.assumeYes ?? options.migrateVolumes, - dryRun, - async performStops(stops) { - // `stops` may contain compose project names (e.g. 'tale', - // 'tale-blue') and/or individual container names (e.g. - // '${projectId}-platform-blue'). Try each as a compose project - // first, fall back to plain `docker stop`. Failures MUST - // surface — a silently-swallowed stop can let the migration - // copy a live volume, corrupting data. - for (const name of stops) { - const composeDown = await exec( - 'docker', - ['compose', '-p', name, 'down', '--remove-orphans'], - { silent: true }, - ); - if (composeDown.success) continue; - const stopResult = await exec( - 'docker', - ['stop', '-t', '30', name], - { silent: true }, - ); - if (stopResult.success) continue; - const stderr = `${stopResult.stderr ?? ''}`.toLowerCase(); - const looksMissing = - stderr.includes('no such container') || - stderr.includes('not found'); - if (!looksMissing) { - throw new Error( - `Failed to stop '${name}' before migration: ${stopResult.stderr?.trim() || 'unknown error'}`, - ); - } - } - }, - }, - ); - if (!migrationResult.proceed) { - logger.info('Aborting deploy until migrations are approved.'); - return; - } - } + // (Auto-migration framework removed — `tale migrate config-layout` is + // the only opt-in, manually-run migration now.) // Check if this is a first-time deployment const currentColor = await getCurrentColor(env.DEPLOY_DIR); @@ -628,6 +584,16 @@ export async function deploy(options: DeployOptions): Promise { tempStageDirs, options.override ?? false, ); + + // After deploy + optional host-push, trigger server-side reseed of + // builtin catalog into every org. Runs against the platform container + // (which holds the convex function source + admin key derivation). + if (options.overrideAll) { + await reseedAllOrgsFromBuiltin({ + dryRun, + assumeYes: options.assumeYes ?? false, + }); + } }); } finally { process.removeListener('SIGINT', onInterrupt); @@ -635,20 +601,60 @@ export async function deploy(options: DeployOptions): Promise { } } -// Host workspace dirs that `tale deploy --override` pushes into the convex -// container. `*.secrets.json` files and `.history/` directories are always -// excluded from the push (see `stageForOverride`): encrypted secrets cannot be -// re-derived from the host, and the container's UI edit-history trail must -// survive. `docker cp` is additive, so anything not staged is left untouched -// on the container side. -const SYNC_DIRS = [ +// Org slug shape — must match validateOrgSlug at services/platform/convex/lib/file_io.ts. +// Duplicated here because the CLI ships in a single compiled binary that does +// not import convex sources at runtime. +const ORG_SLUG_REGEX = /^[a-z0-9][a-z0-9_-]*$/; +const MAX_ORG_SLUG_LENGTH = 64; + +// Top-level names under the project root that are legitimate per-domain +// dirs from the OLD flat layout (`agents/`, `workflows/`, …). Under +// org-first these don't belong at the root anymore — if any are present +// it's a legacy project that hasn't been re-init'd. Refuse to push (would +// silently land in `/app/data/agents/` etc., which the new resolvers don't +// read) and point the operator at `tale init --force`. +const LEGACY_DOMAIN_DIR_NAMES = new Set([ 'agents', 'workflows', 'integrations', 'branding', 'providers', 'skills', -]; + 'retention', +]); + +function isValidOrgSlug(name: string): boolean { + return ( + name.length > 0 && + name.length <= MAX_ORG_SLUG_LENGTH && + ORG_SLUG_REGEX.test(name) + ); +} + +async function findOrgDirs( + projectDir: string, +): Promise<{ orgDirs: string[]; legacyDirs: string[] }> { + const orgDirs: string[] = []; + const legacyDirs: string[] = []; + let entries: import('node:fs').Dirent[]; + try { + entries = await readdir(projectDir, { withFileTypes: true }); + } catch { + return { orgDirs, legacyDirs }; + } + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const name = entry.name; + if (name.startsWith('.')) continue; // skips .tale, .git, .vscode, .DS_Store etc. + if (LEGACY_DOMAIN_DIR_NAMES.has(name)) { + legacyDirs.push(name); + continue; + } + if (!isValidOrgSlug(name)) continue; + orgDirs.push(name); + } + return { orgDirs, legacyDirs }; +} async function syncProjectFiles( containerName: string, @@ -672,11 +678,24 @@ async function syncProjectFiles( return; } - const dirsToSync = SYNC_DIRS.filter((dir) => - existsSync(join(projectDir, dir)), - ); + const { orgDirs, legacyDirs } = await findOrgDirs(projectDir); + + if (legacyDirs.length > 0) { + logger.error( + `${prefix}Legacy flat layout detected at project root (${legacyDirs.join(', ')}/).`, + ); + logger.info( + `${prefix} Move config under 'default//' (or run 'tale init --force' to rescaffold).`, + ); + logger.info(`${prefix} Aborting --override push.`); + return; + } - if (dirsToSync.length === 0) { + if (orgDirs.length === 0) { + logger.blank(); + logger.info( + `${prefix}Nothing to push: no org directories found at host root (expected e.g. 'default/').`, + ); return; } @@ -691,86 +710,115 @@ async function syncProjectFiles( } logger.blank(); - logger.step(`${prefix}Overriding container config from host workspace...`); + logger.step( + `${prefix}Overriding container config from host workspace (1:1 push)...`, + ); logger.info( `${prefix} (encrypted *.secrets.json and .history/ are always preserved)`, ); + logger.info( + `${prefix} (--override is an additive overlay; files deleted locally remain in the container — use --override-all to factory-reseed from builtin)`, + ); - for (const dir of dirsToSync) { - const srcPath = join(projectDir, dir); + // Stage the full set of org subtrees into a single tmp dir whose top-level + // mirrors the in-container `/app/data/` shape: `///...`. + // Then a single `docker cp /. :/app/data/` does the push. + // Root-level junk (`tale.json`, `.tale/`, `.env`, `.git/`, IDE configs, etc.) + // is excluded by allowlist — never staged, never shipped. + const stageDir = await mkdtemp(join(tmpdir(), 'tale-sync-')); + tempStageDirs.add(stageDir); + + try { + for (const orgName of orgDirs) { + const orgSrc = join(projectDir, orgName); + const orgDst = join(stageDir, orgName); + + if (dryRun) { + logger.info( + `${prefix}Would push ${orgName}/ → ${containerName}:/app/data/${orgName}/ (excluding *.secrets.json, .history/, symlinks)`, + ); + continue; + } + + await stageOrgIntoDir(orgSrc, orgDst); + } if (dryRun) { logger.info( - `${prefix}Would override ${dir}/ → ${containerName}:/app/data/${dir}/ (excluding *.secrets.json and .history/)`, + `${prefix}Skipped at root: tale.json, .tale/, .env, .git/, dotfiles, ${legacyDirs.length ? `legacy ${legacyDirs.join(', ')}/, ` : ''}any other non-org-shaped entries`, ); - continue; + return; } - const stageDir = await stageForOverride(srcPath, tempStageDirs); - - try { - const dockerSrcPath = stageDir.replaceAll('\\', '/'); - const result = await exec('docker', [ - 'cp', - `${dockerSrcPath}/.`, - `${containerName}:/app/data/${dir}/`, - ]); - - if (result.success) { - // docker cp copies files as root — fix ownership so the app user can write - const chownResult = await exec('docker', [ - 'exec', - containerName, - 'chown', - '-R', - 'app:app', - `/app/data/${dir}/`, - ]); - if (!chownResult.success) { - logger.warn( - `Failed to fix ownership for ${dir}/: ${chownResult.stderr}`, - ); - } - logger.info(`Overrode ${dir}/`); - } else { - logger.warn(`Failed to override ${dir}/: ${result.stderr}`); - } - } finally { - tempStageDirs.delete(stageDir); - await rm(stageDir, { recursive: true, force: true }); + const dockerSrcPath = stageDir.replaceAll('\\', '/'); + const result = await exec('docker', [ + 'cp', + `${dockerSrcPath}/.`, + `${containerName}:/app/data/`, + ]); + + if (!result.success) { + logger.error(`Failed to override config: ${result.stderr}`); + return; } - } - if (!dryRun) { - logger.success('Config override complete'); + // docker cp copies files as root — fix ownership so the app user can write + const chownResult = await exec('docker', [ + 'exec', + containerName, + 'chown', + '-R', + 'app:app', + `/app/data/`, + ]); + if (!chownResult.success) { + logger.warn( + `Failed to fix ownership on /app/data: ${chownResult.stderr}`, + ); + } + + logger.success( + `Overrode ${orgDirs.length} org${orgDirs.length === 1 ? '' : 's'}: ${orgDirs.join(', ')}`, + ); + } finally { + tempStageDirs.delete(stageDir); + await rm(stageDir, { recursive: true, force: true }); } } -// Copy host `/` into a fresh tmp dir, excluding `*.secrets.json` files -// and any `.history/` directory during the copy. The staging dir is what -// `docker cp` ships; since `docker cp` is additive (never deletes container -// files absent from the source), excluded paths simply never reach the -// container and its existing secrets / edit-history survive. fs.cp defaults to -// dereference=false, which keeps symlinks intact. The `*.secrets.json` match -// mirrors the entrypoint's seed-skip check (services/convex/docker-entrypoint.sh). +// Copy a host org subtree (`//`) into a fresh +// `//` while: +// - skipping `.history/` directories at any depth (UI edit-history trail +// must survive in the container; `docker cp` is additive so absent = +// preserved on the container side), +// - skipping `*.secrets.json` files at any depth (encrypted secrets +// cannot be re-derived from the host), +// - skipping symlinks (defense against operator's host workspace +// containing a symlink to /etc/passwd or similar; cp's filter receives +// the source path so we lstat it). // -// Registers `stageDir` in `tempStageDirs` before any I/O so an interrupt or a -// throw mid-copy still gets cleaned up by the caller / SIGINT handler. -async function stageForOverride( - srcDir: string, - tempStageDirs: Set, -): Promise { - const stageDir = await mkdtemp(join(tmpdir(), 'tale-sync-')); - tempStageDirs.add(stageDir); - await cp(srcDir, stageDir, { +// All directory exclusions prune the entire subtree; `fs.cp` recurses past +// the filter for any directory the filter returned `true` for. Root-level +// non-org junk (`.tale/`, `.git/`, `.env`, IDE configs, dotfiles, etc.) is +// excluded one level up — only org-shaped dirs from `findOrgDirs` reach +// this function — so the filter here only handles depth-1+ skips. +async function stageOrgIntoDir(srcDir: string, destDir: string): Promise { + await cp(srcDir, destDir, { recursive: true, filter: (src) => { const base = src.split(/[\\/]/).pop() ?? ''; - // Returning false for a directory prunes its entire subtree. if (base === '.history') return false; if (base.endsWith('.secrets.json')) return false; + // lstat is sync here because fs.cp's filter is sync. Symlinks at + // any depth are skipped; missing entries (ENOENT) also skip rather + // than throw — fs.cp re-races stat() so any race is benign. + try { + const info = lstatSync(src); + if (info.isSymbolicLink()) return false; + } catch { + return false; + } return true; }, }); - return stageDir; } diff --git a/tools/cli/src/lib/actions/init.ts b/tools/cli/src/lib/actions/init.ts index b209300215..4cffcbdc8c 100644 --- a/tools/cli/src/lib/actions/init.ts +++ b/tools/cli/src/lib/actions/init.ts @@ -19,8 +19,6 @@ import { } from '../project/types'; import { writeProject } from '../project/write-project'; import { generateAllRules } from '../rules/generators'; -import { MIGRATIONS } from '../upgrade/registry'; -import { writeMigrationsState } from '../upgrade/state'; interface InitOptions { directory?: string; @@ -31,7 +29,9 @@ interface InitOptions { const GITIGNORE_ENTRIES = [ '.tale/', '.env', - '.history/', + // History dirs sit at any depth under the org-first tree + // (e.g. `default/agents/.history//`); use a recursive glob. + '**/.history/', 'compose.override.yml', 'compose.override.yaml', // Provider API keys — SOPS-encrypted when SOPS_AGE_KEY is set, plaintext @@ -133,26 +133,35 @@ export async function init(options: InitOptions): Promise { await mkdir(join(target, '.tale'), { recursive: true }); await fetchReference(target); + // Host workspace mirrors the uniform org-first layout: scaffold under + // `default//...`. The default org is the canonical template; + // operators can add `//...` subtrees alongside and + // `tale deploy --override` will push each `` it finds at root. + const defaultOrgDir = join(target, 'default'); + // Copy agents from embedded examples logger.step('Copying agent configurations...'); const agentFiles = getEmbeddedExamples('agents'); - await writeEmbeddedFiles(agentFiles, join(target, 'agents')); + await writeEmbeddedFiles(agentFiles, join(defaultOrgDir, 'agents')); // Copy workflows from embedded examples logger.step('Copying workflow configurations...'); const workflowFiles = getEmbeddedExamples('workflows'); - await writeEmbeddedFiles(workflowFiles, join(target, 'workflows')); + await writeEmbeddedFiles(workflowFiles, join(defaultOrgDir, 'workflows')); // Copy integrations from embedded examples logger.step('Copying integration configurations...'); const integrationFiles = getEmbeddedExamples('integrations'); - await writeEmbeddedFiles(integrationFiles, join(target, 'integrations')); + await writeEmbeddedFiles( + integrationFiles, + join(defaultOrgDir, 'integrations'), + ); // Create branding directory with empty config logger.step('Creating branding configuration...'); - await mkdir(join(target, 'branding', 'images'), { recursive: true }); - await writeFile(join(target, 'branding', 'branding.json'), '{}\n'); - await writeFile(join(target, 'branding', 'images', '.gitkeep'), ''); + await mkdir(join(defaultOrgDir, 'branding', 'images'), { recursive: true }); + await writeFile(join(defaultOrgDir, 'branding', 'branding.json'), '{}\n'); + await writeFile(join(defaultOrgDir, 'branding', 'images', '.gitkeep'), ''); // Copy provider configs (public JSON only, not encrypted secrets) logger.step('Copying provider configurations...'); @@ -163,33 +172,55 @@ export async function init(options: InitOptions): Promise { providerConfigFiles.set(relPath, content); } } - await writeEmbeddedFiles(providerConfigFiles, join(target, 'providers')); + await writeEmbeddedFiles( + providerConfigFiles, + join(defaultOrgDir, 'providers'), + ); // Copy skills from embedded examples logger.step('Copying skill bundles...'); const skillFiles = getEmbeddedExamples('skills'); - await writeEmbeddedFiles(skillFiles, join(target, 'skills')); + await writeEmbeddedFiles(skillFiles, join(defaultOrgDir, 'skills')); - // Compute checksums + // Compute checksums. Paths are recorded relative to the project root, + // matching where the files actually live (default//...). logger.step('Computing file checksums...'); const allFiles = new Map(); for (const [relPath, content] of agentFiles) { - allFiles.set(join('agents', relPath), computeContentHash(content)); + allFiles.set( + join('default', 'agents', relPath), + computeContentHash(content), + ); } for (const [relPath, content] of workflowFiles) { - allFiles.set(join('workflows', relPath), computeContentHash(content)); + allFiles.set( + join('default', 'workflows', relPath), + computeContentHash(content), + ); } for (const [relPath, content] of integrationFiles) { - allFiles.set(join('integrations', relPath), computeContentHash(content)); + allFiles.set( + join('default', 'integrations', relPath), + computeContentHash(content), + ); } for (const [relPath, content] of providerConfigFiles) { - allFiles.set(join('providers', relPath), computeContentHash(content)); + allFiles.set( + join('default', 'providers', relPath), + computeContentHash(content), + ); } for (const [relPath, content] of skillFiles) { - allFiles.set(join('skills', relPath), computeContentHash(content)); + allFiles.set( + join('default', 'skills', relPath), + computeContentHash(content), + ); } - allFiles.set(join('branding', 'branding.json'), computeContentHash('{}\n')); + allFiles.set( + join('default', 'branding', 'branding.json'), + computeContentHash('{}\n'), + ); const checksums: Checksums = { cliVersion: pkg.version, @@ -220,25 +251,9 @@ export async function init(options: InitOptions): Promise { // Make the ID available to subsequent steps (ensureEnv uses getProjectId()). setProjectId(projectId); - // Seed `.tale/migrations.json` for fresh projects so historical migrations - // never apply to data that was born in the current CLI's schema. Without - // this, a `tale init` in a directory where the host still has legacy - // `tale_*` volumes from some older project would trigger namespace-volumes - // to copy that unrelated data into the new project's namespace. - // - // Only seed when there was no existing tale.json AND no migrations.json - // already present — reinit must not clobber prior state. - const migrationsJsonPath = join(target, '.tale', 'migrations.json'); - if (existingProject === null && !existsSync(migrationsJsonPath)) { - const now = new Date().toISOString(); - await writeMigrationsState(target, { - applied: MIGRATIONS.map((m) => ({ - id: m.id, - at: now, - cliVersion: pkg.version, - })), - }); - } + // (`.tale/migrations.json` seeding removed alongside the auto-migration + // framework. Existing projects' stale files are harmless and can be + // deleted manually.) // Write AI rules files logger.step('Writing AI rules files...'); @@ -266,7 +281,12 @@ export async function init(options: InitOptions): Promise { // encrypted-vs-plaintext mode is a runtime save-path decision, not an // init-time choice. if (envResult.openrouterKey && envResult.agePublicKey) { - const secretsPath = join(target, 'providers', 'openrouter.secrets.json'); + const secretsPath = join( + target, + 'default', + 'providers', + 'openrouter.secrets.json', + ); const { sopsEncryptJson } = await import('../crypto/sops-encrypt'); const encrypted = await sopsEncryptJson( { apiKey: envResult.openrouterKey }, @@ -274,7 +294,7 @@ export async function init(options: InitOptions): Promise { ); await writeFile(secretsPath, encrypted); logger.success( - 'Encrypted provider API key into providers/openrouter.secrets.json', + 'Encrypted provider API key into default/providers/openrouter.secrets.json', ); } } @@ -301,7 +321,7 @@ export async function init(options: InitOptions): Promise { logger.info(` ${step++}. Run "cd ${target}" to enter your project`); } logger.info( - ` ${step++}. Edit agents/, workflows/, integrations/, skills/, and branding/ to customize your setup`, + ` ${step++}. Edit default/agents/, default/workflows/, default/integrations/, default/skills/, and default/branding/ to customize your setup`, ); logger.info( ` ${step++}. Open the project in an AI-powered editor (Claude Code, Cursor, Copilot, or Windsurf) for guided config creation`, @@ -309,15 +329,23 @@ export async function init(options: InitOptions): Promise { logger.info(` ${step++}. Run "tale start" to launch the platform locally`); } +// Top-level markers indicating a Tale project. Under the uniform org-first +// layout, `default/` is the canonical org dir (and any other org dir is +// also a marker, but we don't try to enumerate slugs — `default/` is enough +// to detect a project). Legacy per-domain dirs (`agents/`, `workflows/`, +// etc.) at the root are kept as markers so `tale init` re-detects old +// projects from a prior CLI version. const TALE_PROJECT_MARKERS = new Set([ '.env', 'tale.json', + '.tale', + 'default', + // Legacy / pre-org-first markers (detected during reinit only): 'providers', 'agents', 'workflows', 'integrations', 'skills', - '.tale', 'branding', ]); diff --git a/tools/cli/src/lib/actions/migrate-config-layout.ts b/tools/cli/src/lib/actions/migrate-config-layout.ts new file mode 100644 index 0000000000..67e209e78e --- /dev/null +++ b/tools/cli/src/lib/actions/migrate-config-layout.ts @@ -0,0 +1,104 @@ +/** + * `tale migrate config-layout` orchestration. Pipes the migrate-config-layout + * bash script into the currently-running convex container via stdin so the + * operator can run migrate FIRST (before redeploying with the new image). + * + * Uses cp (not mv) so old paths remain readable until the operator runs + * `tale migrate config-layout --cleanup-old` after verifying the new + * deployment is healthy. This is the rollback-insurance step. + * + * Runbook (2-step + optional cleanup): + * 1. tale migrate config-layout + * (copies providers/*.secrets.json to new org-first paths; + * old paths remain so the currently-running old code still works) + * 2. tale deploy --override-all -y + * (recreates convex with new code + seeds non-default orgs from builtin) + * 3. (optional, after verifying health) tale migrate config-layout --cleanup-old + * (sha-verifies new == old, then unlinks the olds) + */ + +import { readFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { getProjectId } from '../../utils/load-env'; +import * as logger from '../../utils/logger'; +import { exec } from '../docker/exec'; +import { isContainerRunning } from '../docker/is-container-running'; + +export interface MigrateConfigLayoutOptions { + dryRun: boolean; + cleanupOld: boolean; +} + +/** + * Read the migrate script next to this module. The .sh file is the source + * of truth (also runnable in the shell-script integration harness), and + * Bun's source-file colocation makes runtime loading work in both `bun + * run`-from-source and the compiled binary (Bun bundles imported assets). + */ +async function loadScript(): Promise { + const moduleDir = dirname(fileURLToPath(import.meta.url)); + const scriptPath = join( + moduleDir, + '..', + 'migrate-config-layout', + 'script.sh', + ); + return await readFile(scriptPath, 'utf-8'); +} + +export async function migrateConfigLayout( + options: MigrateConfigLayoutOptions, +): Promise { + const { dryRun, cleanupOld } = options; + + const containerName = `${getProjectId()}-convex`; + if (!(await isContainerRunning(containerName))) { + throw new Error( + `Convex container "${containerName}" is not running. ` + + 'Start the platform first (e.g. `tale deploy`) before running this migration.', + ); + } + + const script = await loadScript(); + + const scriptArgs: string[] = []; + if (dryRun) scriptArgs.push('--dry-run'); + if (cleanupOld) scriptArgs.push('--cleanup-old'); + + logger.blank(); + if (cleanupOld) { + logger.step( + dryRun + ? '[DRY-RUN] Cleanup-old: would verify and remove old-path secrets' + : 'Verifying + removing old-path secrets (sha-matched against new paths)...', + ); + } else { + logger.step( + dryRun + ? '[DRY-RUN] Migrate: would cp providers/*.secrets.json to new org-first paths' + : 'Copying providers/*.secrets.json to new org-first paths (old paths preserved for rollback)...', + ); + } + + // `docker exec -i ... bash -s -- ` runs the script piped via + // stdin; the `--` separates script args from bash's own flags. + const result = await exec( + 'docker', + ['exec', '-i', containerName, 'bash', '-s', '--', ...scriptArgs], + { stdin: script }, + ); + + if (result.stdout) logger.info(result.stdout); + if (!result.success) { + if (result.stderr) logger.error(result.stderr.trim()); + throw new Error( + `tale migrate config-layout${cleanupOld ? ' --cleanup-old' : ''} failed (exit code ${result.exitCode}).`, + ); + } + if (result.stderr) { + // Warnings printed to stderr (e.g. SKIP messages) are not fatal but worth surfacing. + logger.warn(result.stderr.trim()); + } +} diff --git a/tools/cli/src/lib/actions/reseed-all-orgs.ts b/tools/cli/src/lib/actions/reseed-all-orgs.ts new file mode 100644 index 0000000000..170c819a19 --- /dev/null +++ b/tools/cli/src/lib/actions/reseed-all-orgs.ts @@ -0,0 +1,116 @@ +/** + * `tale deploy --override-all` orchestration: invoke the convex-side + * `reseedAllOrgsFromBuiltin` action via `docker exec` into the running + * platform container. Mirrors the proven incantation pattern from + * scripts/2026-03-28-migrate-convex-data.sh:120-131 (source env.sh, + * ensure_instance_secret, compute admin key inline, run convex CLI). + * + * Destructive: factory-reseeds every org's non-secret config from the + * builtin catalog. `*.secrets.json` files and `.history/` trails are + * preserved server-side by `scaffoldNewOrganization({override:true})`. + * Uploaded branding `images/` survive (branding is treated as a tree + * with per-file overwrite). Everything else under each `//` + * is overwritten with builtin content. + */ + +import { confirm } from '../../utils/confirm'; +import * as logger from '../../utils/logger'; +import { exec } from '../docker/exec'; +import { findPlatformContainer } from '../docker/find-platform-container'; + +export interface ReseedAllOrgsOptions { + dryRun: boolean; + assumeYes: boolean; +} + +/** + * The bash script piped into the platform container. Adopts the proven + * env-sourcing pattern from scripts/2026-03-28-migrate-convex-data.sh so + * `INSTANCE_SECRET` is guaranteed populated and the admin key derivation + * matches the entrypoint's own runtime computation. + * + * Runtime workdir is `/app` (services/platform/Dockerfile sets + * `WORKDIR /app`; flattens services/platform/{convex,lib,env.sh,…} into + * `/app/`). No `cd /app/services/platform` — that path does not exist + * at runtime. + */ +const RESEED_SCRIPT = `set -eo pipefail +source /app/env.sh +env_normalize_common +source /app/generate-admin-key.sh +ensure_instance_secret +ADMIN_KEY=$(generate_key "$INSTANCE_NAME" "$INSTANCE_SECRET") +cd /app +HOME=/home/app timeout 1800 bunx convex run \\ + organizations/reseed_all_orgs:reseedAllOrgsFromBuiltin \\ + --url "\${CONVEX_URL:-http://convex:3210}" \\ + --admin-key "$ADMIN_KEY" +`; + +const CONFIRM_MESSAGE = + '--override-all will factory-reset every org from the builtin catalog. ' + + '*.secrets.json files, .history/ trails, and uploaded branding/images/ are preserved; ' + + 'all other config (model lists, agents, workflows, skills, integrations, branding.json, retention.json) ' + + 'is overwritten. Proceed?'; + +export async function reseedAllOrgsFromBuiltin( + options: ReseedAllOrgsOptions, +): Promise { + const { dryRun, assumeYes } = options; + + // Gate non-interactive callers behind --yes to avoid silent abort in CI. + const isTty = Boolean(process.stdin.isTTY); + if (!assumeYes && !isTty) { + throw new Error( + '--override-all requires --yes (-y) when stdin is not a TTY (e.g. CI).', + ); + } + if (!assumeYes && isTty) { + const ok = await confirm(CONFIRM_MESSAGE); + if (!ok) { + logger.info('Aborted by user.'); + return; + } + } + + const container = await findPlatformContainer(); + + if (dryRun) { + logger.blank(); + logger.info('[DRY-RUN] Would run:'); + logger.info(` docker exec ${container} bash -lc ''`); + logger.info('Reseed script body (would be piped into bash):'); + for (const line of RESEED_SCRIPT.split('\n')) { + logger.info(` ${line}`); + } + return; + } + + logger.blank(); + logger.step('Reseeding builtin catalog into all orgs...'); + + // Pipe the script via stdin instead of embedding in argv — avoids shell + // escaping pitfalls and keeps the script source readable. + const result = await exec('docker', ['exec', '-i', container, 'bash', '-s'], { + stdin: RESEED_SCRIPT, + }); + + if (!result.success) { + if (result.stderr) { + logger.error(result.stderr.trim()); + } + throw new Error( + `--override-all failed (docker exec into ${container} returned non-zero).`, + ); + } + + // The action's return value is printed to stdout by `bunx convex run`. + if (result.stdout) { + const trimmed = result.stdout.trim(); + if (trimmed) { + logger.info(trimmed); + } + } + + logger.success('Reseed complete.'); +} diff --git a/tools/cli/src/lib/actions/start.ts b/tools/cli/src/lib/actions/start.ts index 634233cc94..47cdecdf9a 100644 --- a/tools/cli/src/lib/actions/start.ts +++ b/tools/cli/src/lib/actions/start.ts @@ -16,8 +16,6 @@ import { exec } from '../docker/exec'; import { findProject } from '../project/find-project'; import { resolveOrAssignProjectContext } from '../project/project-context'; import { withLock } from '../state/with-lock'; -import { MIGRATIONS } from '../upgrade/registry'; -import { runPendingMigrations } from '../upgrade/runner'; import { init } from './init'; async function assertDockerAvailable(): Promise { @@ -122,8 +120,6 @@ interface StartOptions { detach?: boolean; port?: number; host?: string; - /** Non-interactive acceptance of any pending migrations (mirrors deploy). */ - assumeYes?: boolean; } export async function start(options: StartOptions): Promise { @@ -154,67 +150,15 @@ export async function start(options: StartOptions): Promise { await assertDockerAvailable(); // Resolve project ID from tale.json before any Docker-resource naming. - // Auto-assign an ID for legacy projects so users don't have to run - // `tale upgrade` as a separate step before `tale start` works. await resolveOrAssignProjectContext(projectDir); - // Detect and apply any pending migrations, then ensure dev infrastructure, - // all under a project-scoped lock so parallel `tale start` / `tale deploy` - // shells can't race on docker volumes or migrations.json. The lock is - // released before `docker compose up` starts — holding it for the full - // foreground lifetime of compose would block every other tale command. + // Ensure dev infrastructure under a project-scoped lock so parallel + // `tale start` / `tale deploy` shells can't race on docker volumes. + // The lock is released before `docker compose up` starts — holding it + // for the full foreground lifetime of compose would block every other + // tale command. const devPrefix = `${getProjectId()}-dev_`; await withLock(projectDir, 'start', async () => { - const migrationResult = await runPendingMigrations( - MIGRATIONS, - { projectId: getProjectId(), projectDir }, - { - context: 'start', - assumeYes: options.assumeYes, - async performStops(stops) { - // `stops` is the union of compose project names (e.g. legacy - // 'tale-dev') and individual container names (e.g. - // '${projectId}-dev-platform-blue'). Try each as a compose project - // first, then fall back to `docker stop` for container names. - // Failures here MUST surface — a silently-swallowed stop can let - // the migration copy a live volume, corrupting data. - for (const name of stops) { - const composeDown = await exec( - 'docker', - ['compose', '-p', name, 'down', '--remove-orphans'], - { silent: true }, - ); - if (composeDown.success) continue; - const stopResult = await exec( - 'docker', - ['stop', '-t', '30', name], - { - silent: true, - }, - ); - if (stopResult.success) continue; - // Neither channel worked. If the container genuinely doesn't - // exist, `docker stop` produces a specific stderr we can match; - // any other failure is a hard abort so we don't proceed to - // `cp -a` against a live volume. - const stderr = `${stopResult.stderr ?? ''}`.toLowerCase(); - const looksMissing = - stderr.includes('no such container') || - stderr.includes('not found'); - if (!looksMissing) { - throw new Error( - `Failed to stop '${name}' before migration: ${stopResult.stderr?.trim() || 'unknown error'}`, - ); - } - } - }, - }, - ); - if (!migrationResult.proceed) { - logger.info('Aborting start until migrations are approved.'); - process.exit(2); - } - // Pre-create dev volumes and network with explicit project-scoped names. // The dev compose file references them as external, so they must exist // before `docker compose up`. diff --git a/tools/cli/src/lib/actions/update.ts b/tools/cli/src/lib/actions/update.ts index 48c8a82f9b..11f4ad7fbb 100644 --- a/tools/cli/src/lib/actions/update.ts +++ b/tools/cli/src/lib/actions/update.ts @@ -21,8 +21,6 @@ import { readProject } from '../project/read-project'; import type { Checksums } from '../project/types'; import { writeProject } from '../project/write-project'; import { generateAllRules } from '../rules/generators'; -import { MIGRATIONS } from '../upgrade/registry'; -import { planPendingMigrations } from '../upgrade/runner'; interface UpdateOptions { force?: boolean; @@ -206,21 +204,6 @@ export async function update(options: UpdateOptions): Promise { ); } - // Plan (but do NOT apply) any pending migrations so operators know what - // `tale start` / `tale deploy` will prompt them about next. Never stops - // containers or modifies Docker state from within `tale upgrade` itself — - // production deployments remain untouched. - if (!options.dryRun) { - const projectId = assignedId ?? project.id; - if (projectId) { - logger.blank(); - const pending = await planPendingMigrations(MIGRATIONS, { - projectId, - projectDir, - }); - if (pending.length === 0) { - logger.debug('No pending migrations.'); - } - } - } + // (Auto-migration planning removed — `tale migrate config-layout` is the + // only opt-in, manually-run migration now; operators invoke it directly.) } diff --git a/tools/cli/src/lib/docker/exec.ts b/tools/cli/src/lib/docker/exec.ts index 8b47f241c5..aa1b33a857 100644 --- a/tools/cli/src/lib/docker/exec.ts +++ b/tools/cli/src/lib/docker/exec.ts @@ -10,19 +10,42 @@ export interface ExecResult { export async function exec( command: string, args: string[], - options: { cwd?: string; silent?: boolean; timeout?: number } = {}, + options: { + cwd?: string; + silent?: boolean; + timeout?: number; + /** + * Pipe this string into the child's stdin and close. Required for the + * `docker exec -i bash -s` pattern used by reseed/migrate. + */ + stdin?: string; + } = {}, ): Promise { - const { cwd, silent = false, timeout } = options; + const { cwd, silent = false, timeout, stdin } = options; if (!silent) { logger.debug(`Executing: ${command} ${args.join(' ')}`); } - const proc = Bun.spawn([command, ...args], { - cwd, - stdout: 'pipe', - stderr: 'pipe', - }); + const proc = + stdin === undefined + ? Bun.spawn([command, ...args], { + cwd, + stdout: 'pipe', + stderr: 'pipe', + }) + : Bun.spawn([command, ...args], { + cwd, + stdin: 'pipe', + stdout: 'pipe', + stderr: 'pipe', + }); + + if (stdin !== undefined) { + const sink = (proc as Bun.Subprocess<'pipe', 'pipe', 'pipe'>).stdin; + sink.write(stdin); + await sink.end(); + } const exitPromise = timeout ? Promise.race([ diff --git a/tools/cli/src/lib/migrate-config-layout/script.sh b/tools/cli/src/lib/migrate-config-layout/script.sh new file mode 100644 index 0000000000..40f2f9a850 --- /dev/null +++ b/tools/cli/src/lib/migrate-config-layout/script.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# Migrate providers/*.secrets.json from old per-domain layout to new +# org-first layout. Idempotent. Uses cp (not mv) so old paths remain +# readable until the operator runs `tale migrate config-layout --cleanup-old`. +# +# Old → new mapping: +# $DATA/providers/.secrets.json +# → $DATA/default/providers/.secrets.json +# $DATA/providers//.secrets.json +# → $DATA//providers/.secrets.json +# +# Scope: providers/*.secrets.json ONLY. Non-secret config is reseeded by +# `tale deploy --override-all` against the builtin catalog; non-provider +# .history/ trails under old paths are intentionally abandoned (the user's +# "secrets only" runbook trade-off). +# +# Designed to run against the CURRENTLY-running convex container (old +# image, old code paths still active). cp leaves old paths in place so +# old code keeps reading providers correctly until the operator runs +# `tale deploy --override-all -y` to recreate convex with the new code. +set -eo pipefail + +DRY_RUN=0 +CLEANUP_OLD=0 +for arg in "$@"; do + case "$arg" in + --dry-run) DRY_RUN=1 ;; + --cleanup-old) CLEANUP_OLD=1 ;; + *) echo "Unknown arg: $arg" >&2; exit 2 ;; + esac +done + +DATA="${TALE_CONFIG_DIR:-/app/data}" +APP_UID=1001 +APP_GID=1001 + +planned=0 +copied=0 +skipped=0 +removed=0 +errors=0 +conflicts=() + +# Move a single .secrets.json from old to new path. cp -a preserves mode + +# ownership (encrypted secrets are 0600 owner:app). Idempotent: if the +# destination already exists, verify byte-for-byte equality (then skip) +# rather than overwriting — protects a concurrent UI-side `atomicWriteSecret` +# that landed at the new path between this script's check and its copy. +copy_secret() { + local src="$1" dst="$2" + local dst_dir; dst_dir="$(dirname "$dst")" + if [ -e "$dst" ]; then + if cmp -s "$src" "$dst" 2>/dev/null; then + skipped=$((skipped+1)); echo "SKIP (already migrated): $src" + return 0 + else + conflicts+=("$src ≠ $dst") + errors=$((errors+1)) + echo "ERROR: $dst exists but differs from $src; refusing to overwrite" >&2 + return 0 + fi + fi + if [ "$DRY_RUN" = 1 ]; then + echo "MIGRATE_PLAN: mkdir -p $dst_dir && cp -a $src $dst" + planned=$((planned+1)) + return 0 + fi + mkdir -p "$dst_dir" + chown "$APP_UID:$APP_GID" "$dst_dir" 2>/dev/null || true + cp -a "$src" "$dst" + copied=$((copied+1)) + echo "OK: $src -> $dst" +} + +# Remove an old-path secret IF the new-path copy exists and matches +# byte-for-byte. Refuses any mismatch — operator must reconcile manually. +remove_old_secret() { + local old="$1" new="$2" + if [ ! -e "$old" ]; then return 0; fi + if [ ! -e "$new" ]; then + conflicts+=("missing new-path counterpart for $old (expected $new)") + errors=$((errors+1)) + echo "ERROR: $new does not exist; refusing to remove $old" >&2 + return 0 + fi + if ! cmp -s "$old" "$new" 2>/dev/null; then + conflicts+=("$old ≠ $new") + errors=$((errors+1)) + echo "ERROR: $old and $new differ; refusing to remove $old" >&2 + return 0 + fi + if [ "$DRY_RUN" = 1 ]; then + echo "CLEANUP_PLAN: rm $old" + planned=$((planned+1)) + return 0 + fi + rm -f "$old" + removed=$((removed+1)) + echo "REMOVED: $old" +} + +# --------------------------------------------------------------------------- +# Enumeration +# --------------------------------------------------------------------------- +process_secret() { + local src="$1" dst="$2" + if [ "$CLEANUP_OLD" = 1 ]; then + remove_old_secret "$src" "$dst" + else + copy_secret "$src" "$dst" + fi +} + +# Default org: top-level $DATA/providers/*.secrets.json → $DATA/default/providers/ +if [ -d "$DATA/providers" ]; then + for f in "$DATA"/providers/*.secrets.json; do + [ -f "$f" ] || continue + process_secret "$f" "$DATA/default/providers/$(basename "$f")" + done +fi + +# Non-default orgs: $DATA/providers//*.secrets.json → $DATA//providers/ +if [ -d "$DATA/providers" ]; then + for d in "$DATA"/providers/*/; do + [ -d "$d" ] || continue + org="$(basename "$d")" + case "$org" in + .*) continue ;; + esac + # Validate against ORG_SLUG_REGEX (keep in sync with validateOrgSlug + # at services/platform/convex/lib/file_io.ts). Anything that doesn't + # match is skipped with a warning — defends against `.history` or + # future hidden markers leaking into the iteration. + if ! [[ "$org" =~ ^[a-z0-9][a-z0-9_-]{0,63}$ ]]; then + echo "SKIP (not a valid org slug): $org" >&2 + skipped=$((skipped+1)) + continue + fi + for f in "$d"*.secrets.json; do + [ -f "$f" ] || continue + process_secret "$f" "$DATA/$org/providers/$(basename "$f")" + done + done +fi + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- +echo +if [ "$CLEANUP_OLD" = 1 ]; then + if [ "$DRY_RUN" = 1 ]; then + echo "MIGRATE_SUMMARY: planned=$planned removed=0 errors=$errors (cleanup-old --dry-run)" + else + echo "MIGRATE_SUMMARY: removed=$removed errors=$errors (cleanup-old)" + fi +else + if [ "$DRY_RUN" = 1 ]; then + echo "MIGRATE_SUMMARY: planned=$planned copied=0 skipped=$skipped errors=$errors (--dry-run)" + else + echo "MIGRATE_SUMMARY: copied=$copied skipped=$skipped errors=$errors" + fi + if [ "$copied" -gt 0 ] || [ "$planned" -gt 0 ]; then + echo "Next: run 'tale deploy --override-all -y' to recreate convex with the new code and seed non-default orgs." + fi +fi +if [ "${#conflicts[@]}" -gt 0 ]; then + echo + echo "Unresolved conflicts (require manual reconciliation):" + for c in "${conflicts[@]}"; do + echo " - $c" + done +fi +[ "$errors" -eq 0 ] || exit 1 diff --git a/tools/cli/src/lib/project/fetch-reference.ts b/tools/cli/src/lib/project/fetch-reference.ts index e9e70fde5b..89628449da 100644 --- a/tools/cli/src/lib/project/fetch-reference.ts +++ b/tools/cli/src/lib/project/fetch-reference.ts @@ -22,9 +22,15 @@ export async function fetchReference(projectDir: string): Promise { } } +/** + * Read a slice of the embedded `examples/default//...` tree as a + * map of `` → content. The catalog ships only the canonical `default` + * org's seed under `examples/default/`; the org-first layout repeats the + * same shape for any number of orgs at runtime. + */ export function getEmbeddedExamples(prefix: string): Map { const result = new Map(); - const examplesPrefix = `examples/${prefix}/`; + const examplesPrefix = `examples/default/${prefix}/`; for (const [path, content] of Object.entries(EMBEDDED_EXAMPLES)) { if (path.startsWith(examplesPrefix)) { diff --git a/tools/cli/src/lib/upgrade/migrations/adopt-convex-stateful.ts b/tools/cli/src/lib/upgrade/migrations/adopt-convex-stateful.ts deleted file mode 100644 index 8d0ac07835..0000000000 --- a/tools/cli/src/lib/upgrade/migrations/adopt-convex-stateful.ts +++ /dev/null @@ -1,67 +0,0 @@ -import * as logger from '../../../utils/logger'; -import { docker } from '../../docker/docker'; -import type { Migration, MigrationContext } from '../types'; - -/** - * Convex was previously emitted in the color compose file (blue/green project) - * even though it is a singleton. This migration detects the existing convex - * container under a color project and removes it so the stateful compose can - * recreate it under the main project. The convex-data volume is external and - * is not affected. - */ - -async function getContainerProjectLabel( - containerName: string, -): Promise { - const result = await docker( - 'inspect', - '--format', - '{{index .Config.Labels "com.docker.compose.project"}}', - containerName, - ); - if (!result.success) return null; - const label = result.stdout.trim(); - return label || null; -} - -export const adoptConvexStatefulMigration: Migration = { - id: 'adopt-convex-stateful', - introducedIn: '0.3.1', - description: (ctx: MigrationContext) => - `Move ${ctx.projectId}-convex container from blue/green project scope to stateful project scope (${ctx.projectId}).`, - - async detect(ctx: MigrationContext): Promise { - const label = await getContainerProjectLabel(`${ctx.projectId}-convex`); - if (!label) return false; // container doesn't exist — fresh install - return label !== ctx.projectId; // needs migration if owned by a color project - }, - - async requiredStops(ctx: MigrationContext): Promise { - const label = await getContainerProjectLabel(`${ctx.projectId}-convex`); - if (!label || label === ctx.projectId) return []; - return [`${ctx.projectId}-convex`]; - }, - - async apply(ctx, { dryRun }) { - if (dryRun) return 'noop'; - - const containerName = `${ctx.projectId}-convex`; - const label = await getContainerProjectLabel(containerName); - if (!label || label === ctx.projectId) return 'noop'; - - logger.info( - ` Removing ${containerName} (owned by project "${label}") so it can be recreated under "${ctx.projectId}"`, - ); - const result = await docker('rm', '-f', containerName); - if (!result.success) { - throw new Error( - `Failed to remove ${containerName}: ${result.stderr.trim()}`, - ); - } - - logger.info( - ' The convex-data volume is preserved. The container will be recreated by the next deploy.', - ); - return 'applied'; - }, -}; diff --git a/tools/cli/src/lib/upgrade/migrations/namespace-caddy-config.ts b/tools/cli/src/lib/upgrade/migrations/namespace-caddy-config.ts deleted file mode 100644 index aeea94b558..0000000000 --- a/tools/cli/src/lib/upgrade/migrations/namespace-caddy-config.ts +++ /dev/null @@ -1,77 +0,0 @@ -import * as logger from '../../../utils/logger'; -import type { Migration, MigrationContext } from '../types'; -import { - copyVolumeWithVerify, - resolveMigrationImage, - volumeExists, - volumeHasData, -} from '../volume-helpers'; - -/** - * Supplemental fix: `caddy-config` was accidentally omitted from PROD_VOLUMES - * in the namespace-volumes migration, so `tale_caddy-config` was never copied - * to `${projectId}_caddy-config` for existing production deployments. - * - * This migration uses the same idempotent end-state check: only copies if the - * source has data and the destination is absent or empty. - */ - -const LEGACY_PROJECT_NAME = 'tale'; - -export const namespaceCaddyConfigMigration: Migration = { - id: 'namespace-caddy-config', - introducedIn: '0.3.1', - description: (ctx: MigrationContext) => - `Copy ${LEGACY_PROJECT_NAME}_caddy-config to ${ctx.projectId}_caddy-config (missed by namespace-volumes).`, - - async detect(ctx: MigrationContext): Promise { - const oldName = `${LEGACY_PROJECT_NAME}_caddy-config`; - const newName = `${ctx.projectId}_caddy-config`; - - if (!(await volumeExists(oldName))) return false; - - // If destination already has data, nothing to do. - const image = await resolveMigrationImage(); - if ( - (await volumeExists(newName)) && - (await volumeHasData(newName, image)) - ) { - return false; - } - - return volumeHasData(oldName, image); - }, - - async requiredStops(): Promise { - // Proxy is the only consumer of caddy-config and it lives in the - // stateful compose under the namespaced project. The legacy compose - // projects ('tale', etc.) were already torn down by namespace-volumes. - return []; - }, - - async apply(ctx, { dryRun }) { - if (dryRun) return 'noop'; - - const oldName = `${LEGACY_PROJECT_NAME}_caddy-config`; - const newName = `${ctx.projectId}_caddy-config`; - - const image = await resolveMigrationImage(); - - // Re-check end-state (idempotent). - if ( - (await volumeExists(newName)) && - (await volumeHasData(newName, image)) - ) { - return 'noop'; - } - if (!(await volumeExists(oldName))) return 'noop'; - if (!(await volumeHasData(oldName, image))) return 'noop'; - - logger.info(` ${oldName} → ${newName}`); - await copyVolumeWithVerify(oldName, newName, image); - - logger.info('Old volume preserved. After verifying, reclaim disk with:'); - logger.info(` docker volume rm ${oldName}`); - return 'applied'; - }, -}; diff --git a/tools/cli/src/lib/upgrade/migrations/namespace-volumes.ts b/tools/cli/src/lib/upgrade/migrations/namespace-volumes.ts deleted file mode 100644 index 36b0f3bdcb..0000000000 --- a/tools/cli/src/lib/upgrade/migrations/namespace-volumes.ts +++ /dev/null @@ -1,171 +0,0 @@ -import * as logger from '../../../utils/logger'; -import { docker } from '../../docker/docker'; -import type { Migration, MigrationContext } from '../types'; -import { - copyVolumeWithVerify, - resolveMigrationImage, - stopContainerOrThrow, - volumeExists, - volumeHasData, -} from '../volume-helpers'; - -/** - * Pre-0.2.33 hard-coded project name. Volumes and containers from that era - * were all prefixed with `tale_` / `tale-dev_` / `tale-blue_` / `tale-green_` - * because `docker compose` used the fixed `-p tale` flag. - */ -const LEGACY_PROJECT_NAME = 'tale'; - -const DEV_VOLUMES = [ - 'platform-data', - 'db-data', - 'db-backup', - 'rag-data', - 'crawler-data', - 'caddy-data', - 'caddy-config', -]; -const PROD_VOLUMES = [ - 'platform-data', - 'caddy-data', - 'caddy-config', - 'rag-data', - 'crawler-data', - 'db-data', - 'db-backup', -]; - -function buildPairs( - projectId: string, -): Array<{ oldName: string; newName: string }> { - const pairs: Array<{ oldName: string; newName: string }> = []; - for (const v of DEV_VOLUMES) { - pairs.push({ - oldName: `${LEGACY_PROJECT_NAME}-dev_${v}`, - newName: `${projectId}-dev_${v}`, - }); - } - for (const v of PROD_VOLUMES) { - pairs.push({ - oldName: `${LEGACY_PROJECT_NAME}_${v}`, - newName: `${projectId}_${v}`, - }); - } - return pairs; -} - -async function findRunningLegacyContainers(): Promise { - const r = await docker( - 'ps', - '--filter', - 'name=tale-', - '--format', - '{{.Names}}', - ); - if (!r.success) return []; - const legacyPattern = - /^tale(-(dev|blue|green))?-(platform|db|rag|crawler|proxy)(-(blue|green))?$/; - return r.stdout - .split('\n') - .map((l) => l.trim()) - .filter((name) => name && legacyPattern.test(name)); -} - -/** Pairs where the end-state does NOT yet hold and there is something to copy. - * - * End-state for this migration: the destination volume exists and has data. - * Therefore a pair is pending iff: - * - destination is absent or empty, AND - * - source exists and has data. - * - * If the destination already has data we always skip — regardless of whether - * a sentinel is present, regardless of what legacy volumes sit on the host. - * This is the key idempotency guarantee: a project whose namespaced volumes - * were populated by the compose stack directly (v0.2.33+ fresh init, or a - * previous successful migration) must never be touched by this migration - * again, even if stray `tale-dev_*` volumes from unrelated installs exist. */ -async function findPending( - projectId: string, - image: string, -): Promise> { - const all = buildPairs(projectId); - const pending: Array<{ oldName: string; newName: string }> = []; - for (const p of all) { - // End-state check first: if the destination already has data, this pair - // is satisfied. We do not trust, nor require, the sentinel here — a - // populated destination that predates the migration infrastructure - // (v0.2.33 fresh inits) will legitimately lack one. - if ( - (await volumeExists(p.newName)) && - (await volumeHasData(p.newName, image)) - ) { - continue; - } - // Destination is absent or empty. Only migrate if there's actual source - // data to copy — an empty source would just recreate an empty dst. - if (!(await volumeExists(p.oldName))) continue; - if (!(await volumeHasData(p.oldName, image))) continue; - pending.push(p); - } - return pending; -} - -export const namespaceVolumesMigration: Migration = { - id: 'namespace-volumes', - introducedIn: '0.2.33', - description: (ctx: MigrationContext) => - `Rename legacy Docker volumes (tale_* / tale-dev_*) to the per-project scope (${ctx.projectId}_*).`, - - async detect(ctx: MigrationContext): Promise { - // Cheap shortcut: if no legacy source volume exists anywhere on the host, - // there is nothing we could ever copy. Bail before pulling an image. - const all = buildPairs(ctx.projectId); - let anySourceExists = false; - for (const p of all) { - if (await volumeExists(p.oldName)) { - anySourceExists = true; - break; - } - } - if (!anySourceExists) return false; - // Otherwise defer to findPending — it applies the full end-state check - // per pair and is the single source of truth for "do we have work?". - const image = await resolveMigrationImage(); - return (await findPending(ctx.projectId, image)).length > 0; - }, - - async requiredStops(): Promise { - // Legacy compose project names we might need to bring down. These were - // the only names in use pre-0.2.33. - return ['tale', 'tale-blue', 'tale-green', 'tale-dev']; - }, - - async apply(ctx, { dryRun }) { - if (dryRun) return 'noop'; - - // Extra safety: never run while legacy containers are live. The runner - // should already have stopped them via requiredStops → performStops, but - // a running container here means the caller's stop logic didn't fully - // cover the surface. - const running = await findRunningLegacyContainers(); - if (running.length > 0) { - // Stop them individually; if that fails, bail out loudly rather than - // copying over a live volume. - for (const name of running) await stopContainerOrThrow(name); - } - - const image = await resolveMigrationImage(); - const pending = await findPending(ctx.projectId, image); - if (pending.length === 0) return 'noop'; - - for (const { oldName, newName } of pending) { - logger.info(` ${oldName} → ${newName}`); - await copyVolumeWithVerify(oldName, newName, image); - } - - logger.info('Old volumes preserved. After verifying, reclaim disk with:'); - const oldNames = pending.map((p) => p.oldName).join(' '); - logger.info(` docker volume rm ${oldNames}`); - return 'applied'; - }, -}; diff --git a/tools/cli/src/lib/upgrade/migrations/split-convex.ts b/tools/cli/src/lib/upgrade/migrations/split-convex.ts deleted file mode 100644 index 7477e8dc1e..0000000000 --- a/tools/cli/src/lib/upgrade/migrations/split-convex.ts +++ /dev/null @@ -1,154 +0,0 @@ -import * as logger from '../../../utils/logger'; -import { docker } from '../../docker/docker'; -import type { Migration, MigrationContext } from '../types'; -import { - copyVolumeWithVerify, - resolveMigrationImage, - stopContainerOrThrow, - volumeExists, - volumeHasData, -} from '../volume-helpers'; - -interface SplitPair { - oldName: string; - newName: string; - scope: 'prod' | 'dev'; -} - -function buildPairs(projectId: string): SplitPair[] { - return [ - { - oldName: `${projectId}_platform-data`, - newName: `${projectId}_convex-data`, - scope: 'prod', - }, - { - oldName: `${projectId}-dev_platform-data`, - newName: `${projectId}-dev_convex-data`, - scope: 'dev', - }, - ]; -} - -/** Pairs where the end-state does NOT yet hold and there is something to copy. - * - * End-state: the new `*_convex-data` volume exists and has data. - * - * A pair is pending iff the destination is absent or empty AND the old - * platform-data volume exists with data. We deliberately do not require a - * sentinel on the destination — a destination populated by the compose - * stack (e.g. a fresh install of a CLI that already ships the split layout) - * legitimately has no sentinel and must be left alone. */ -async function findPending( - projectId: string, - image: string, -): Promise { - const pending: SplitPair[] = []; - for (const p of buildPairs(projectId)) { - // End-state check: if dst already has data, this pair is satisfied. - if ( - (await volumeExists(p.newName)) && - (await volumeHasData(p.newName, image)) - ) { - continue; - } - if (!(await volumeExists(p.oldName))) continue; - if (!(await volumeHasData(p.oldName, image))) continue; - pending.push(p); - } - return pending; -} - -async function findContainersUsingPlatformData( - projectId: string, -): Promise { - // Match platform/convex containers under both prod and dev project scopes. - const prefixes = [`${projectId}-`, `${projectId}-dev-`]; - const names: string[] = []; - for (const prefix of prefixes) { - const r = await docker( - 'ps', - '-a', - '--filter', - `name=${prefix}`, - '--format', - '{{.Names}}', - ); - if (!r.success) continue; - for (const raw of r.stdout.split('\n')) { - const n = raw.trim(); - if (!n) continue; - if (!/(platform|convex)/.test(n)) continue; - if (!names.includes(n)) names.push(n); - } - } - return names; -} - -export const splitConvexMigration: Migration = { - id: 'split-convex', - introducedIn: '0.3.0', - description: (ctx: MigrationContext) => - `Copy ${ctx.projectId}_platform-data into ${ctx.projectId}_convex-data so the new dedicated Convex service can own its data volume.`, - - async detect(ctx: MigrationContext): Promise { - // Cheap check first: if no legacy platform-data volume exists at all, - // there is nothing we could ever copy — bail before pulling an image. - const pairs = buildPairs(ctx.projectId); - let anyOldExists = false; - for (const p of pairs) { - if (await volumeExists(p.oldName)) { - anyOldExists = true; - break; - } - } - if (!anyOldExists) return false; - const image = await resolveMigrationImage(); - return (await findPending(ctx.projectId, image)).length > 0; - }, - - async requiredStops(ctx): Promise { - // Individual container names, not compose project names — the runner - // passes these through to its caller's stop routine. `tale deploy` / - // `tale start` both issue `docker compose -p down` for compose - // projects; for individual containers we still want them stopped, so we - // surface them verbatim and let the caller decide how to stop. - return findContainersUsingPlatformData(ctx.projectId); - }, - - async apply(ctx, { dryRun }) { - if (dryRun) return 'noop'; - - const image = await resolveMigrationImage(); - const pending = await findPending(ctx.projectId, image); - if (pending.length === 0) return 'noop'; - - // Defensive: any platform/convex container that's still running at this - // point holds open file handles against the volume we're about to copy. - // The runner should have stopped them, but verify. - for (const name of await findContainersUsingPlatformData(ctx.projectId)) { - const inspect = await docker( - 'inspect', - '--format', - '{{.State.Running}}', - name, - ); - if (inspect.success && inspect.stdout.trim() === 'true') { - await stopContainerOrThrow(name); - } - } - - for (const p of pending) { - logger.info(` [${p.scope}] ${p.oldName} → ${p.newName}`); - await copyVolumeWithVerify(p.oldName, p.newName, image); - } - - logger.info( - 'Legacy platform-data volumes are preserved. After verifying the new convex service is healthy, reclaim disk with:', - ); - for (const p of pending) { - logger.info(` docker volume rm ${p.oldName}`); - } - return 'applied'; - }, -}; diff --git a/tools/cli/src/lib/upgrade/registry.ts b/tools/cli/src/lib/upgrade/registry.ts deleted file mode 100644 index b691e31e95..0000000000 --- a/tools/cli/src/lib/upgrade/registry.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { adoptConvexStatefulMigration } from './migrations/adopt-convex-stateful'; -import { namespaceCaddyConfigMigration } from './migrations/namespace-caddy-config'; -import { namespaceVolumesMigration } from './migrations/namespace-volumes'; -import { splitConvexMigration } from './migrations/split-convex'; -import type { Migration } from './types'; - -/** - * Ordered registry of all known upgrade steps. - * - * This is NOT a per-release changelog — each entry is a one-shot data - * migration the CLI knows how to apply. Steps are idempotent (gated by - * detect()), so the registry only grows when a release actually needs to - * mutate user state on the host (Docker volumes, on-disk files, …). - * - * Order matters: each entry may assume every earlier entry has run (or - * reported "nothing to do" via detect()). Never reorder; only append. - */ -export const MIGRATIONS: readonly Migration[] = [ - namespaceVolumesMigration, // v0.2.33 — rename tale_* → ${projectId}_* - splitConvexMigration, // v0.3.0 — platform-data → convex-data - namespaceCaddyConfigMigration, // v0.3.1 — fix: caddy-config missed by namespace-volumes - adoptConvexStatefulMigration, // v0.3.1 — convex from color→stateful project -]; diff --git a/tools/cli/src/lib/upgrade/runner.test.ts b/tools/cli/src/lib/upgrade/runner.test.ts deleted file mode 100644 index 34361e41eb..0000000000 --- a/tools/cli/src/lib/upgrade/runner.test.ts +++ /dev/null @@ -1,230 +0,0 @@ -import { afterEach, describe, expect, mock, test } from 'bun:test'; - -import type { ApplyOutcome, Migration, MigrationContext } from './types'; - -// --- Mocks --- - -const recordAppliedMock = mock(); - -mock.module('./state', () => ({ - recordApplied: recordAppliedMock, -})); - -mock.module('../../utils/logger', () => ({ - blank: mock(), - header: mock(), - info: mock(), - notice: mock(), - step: mock(), - success: mock(), - warn: mock(), - debug: mock(), - error: mock(), -})); - -mock.module('../../utils/confirm', () => ({ - confirm: mock(() => Promise.resolve(true)), -})); - -// --- Helpers --- - -const CTX: MigrationContext = { - projectId: 'test-project', - projectDir: '/tmp/test-project', -}; - -function makeMigration( - id: string, - opts: { - detect?: boolean; - apply?: ApplyOutcome; - stops?: string[]; - detectFn?: () => Promise; - } = {}, -): Migration { - return { - id, - introducedIn: '0.0.1', - description: `Migration ${id}`, - detect: opts.detectFn ?? mock(() => Promise.resolve(opts.detect ?? false)), - requiredStops: mock(() => Promise.resolve(opts.stops ?? [])), - apply: mock(() => Promise.resolve(opts.apply ?? 'applied')), - }; -} - -// --- Import after mocks --- - -const { runPendingMigrations, planPendingMigrations } = - await import('./runner'); - -afterEach(() => { - recordAppliedMock.mockReset(); -}); - -// --- Tests --- - -describe('runPendingMigrations', () => { - test('returns proceed=true with no applied when nothing is pending', async () => { - const m = makeMigration('a', { detect: false }); - const result = await runPendingMigrations([m], CTX, { - context: 'deploy', - assumeYes: true, - }); - - expect(result).toEqual({ proceed: true, applied: [], declined: false }); - expect(m.detect).toHaveBeenCalledWith(CTX); - expect(m.apply).not.toHaveBeenCalled(); - }); - - test('applies a new pending migration', async () => { - const m = makeMigration('a', { detect: true, apply: 'applied' }); - const result = await runPendingMigrations([m], CTX, { - context: 'deploy', - assumeYes: true, - }); - - expect(result.proceed).toBe(true); - expect(result.applied).toEqual(['a']); - expect(m.apply).toHaveBeenCalledWith(CTX, { dryRun: false }); - expect(recordAppliedMock).toHaveBeenCalledTimes(1); - expect(recordAppliedMock.mock.calls[0][1]).toMatchObject({ id: 'a' }); - }); - - test('re-detects and re-applies a drifted migration (detect returns true even if previously recorded)', async () => { - // Simulate a migration whose end-state has drifted: detect() returns true - // even though recordApplied would be a no-op (already recorded). - // The key assertion: detect() IS called, apply() IS called. - const m = makeMigration('split-convex', { - detect: true, - apply: 'applied', - }); - - const result = await runPendingMigrations([m], CTX, { - context: 'deploy', - assumeYes: true, - }); - - expect(result.proceed).toBe(true); - expect(result.applied).toEqual(['split-convex']); - expect(m.detect).toHaveBeenCalledTimes(1); - expect(m.apply).toHaveBeenCalledTimes(1); - }); - - test('skips migrations whose detect() returns false', async () => { - const satisfied = makeMigration('done', { detect: false }); - const pending = makeMigration('todo', { detect: true, apply: 'applied' }); - - const result = await runPendingMigrations([satisfied, pending], CTX, { - context: 'deploy', - assumeYes: true, - }); - - expect(result.applied).toEqual(['todo']); - expect(satisfied.detect).toHaveBeenCalled(); - expect(satisfied.apply).not.toHaveBeenCalled(); - }); - - test('preserves registry order for mixed pending migrations', async () => { - const a = makeMigration('a', { detect: true, apply: 'applied' }); - const b = makeMigration('b', { detect: false }); - const c = makeMigration('c', { detect: true, apply: 'applied' }); - - const result = await runPendingMigrations([a, b, c], CTX, { - context: 'deploy', - assumeYes: true, - }); - - expect(result.applied).toEqual(['a', 'c']); - // Verify order: a applied before c - const aCallOrder = (a.apply as ReturnType).mock - .invocationCallOrder[0]; - const cCallOrder = (c.apply as ReturnType).mock - .invocationCallOrder[0]; - expect(aCallOrder).toBeLessThan(cCallOrder); - }); - - test('propagates detect() errors with context', async () => { - const m = makeMigration('bad', { - detectFn: () => Promise.reject(new Error('docker not found')), - }); - - await expect( - runPendingMigrations([m], CTX, { - context: 'deploy', - assumeYes: true, - }), - ).rejects.toThrow('migration bad: detect() failed: docker not found'); - }); - - test('collects requiredStops from all pending migrations', async () => { - const a = makeMigration('a', { - detect: true, - apply: 'applied', - stops: ['container-1'], - }); - const b = makeMigration('b', { - detect: true, - apply: 'applied', - stops: ['container-2', 'container-1'], - }); - - const stopsReceived: string[][] = []; - const performStops = mock((s: string[]) => { - stopsReceived.push(s); - return Promise.resolve(); - }); - - await runPendingMigrations([a, b], CTX, { - context: 'deploy', - assumeYes: true, - performStops, - }); - - expect(performStops).toHaveBeenCalledTimes(1); - expect(stopsReceived[0]).toContain('container-1'); - expect(stopsReceived[0]).toContain('container-2'); - expect(stopsReceived[0]).toHaveLength(2); // deduplicated - }); - - test('handles noop outcome from apply()', async () => { - const m = makeMigration('already-ok', { detect: true, apply: 'noop' }); - - const result = await runPendingMigrations([m], CTX, { - context: 'deploy', - assumeYes: true, - }); - - expect(result.applied).toEqual(['already-ok']); - expect(recordAppliedMock).toHaveBeenCalledTimes(1); - }); - - test('dry-run prints plan but does not apply', async () => { - const m = makeMigration('a', { detect: true }); - - const result = await runPendingMigrations([m], CTX, { - context: 'deploy', - assumeYes: true, - dryRun: true, - }); - - expect(result.applied).toEqual([]); - expect(m.apply).not.toHaveBeenCalled(); - expect(recordAppliedMock).not.toHaveBeenCalled(); - }); -}); - -describe('planPendingMigrations', () => { - test('returns empty array when nothing is pending', async () => { - const m = makeMigration('a', { detect: false }); - const result = await planPendingMigrations([m], CTX); - expect(result).toEqual([]); - }); - - test('returns pending migrations without applying', async () => { - const m = makeMigration('a', { detect: true }); - const result = await planPendingMigrations([m], CTX); - expect(result).toHaveLength(1); - expect(result[0].id).toBe('a'); - expect(m.apply).not.toHaveBeenCalled(); - }); -}); diff --git a/tools/cli/src/lib/upgrade/runner.ts b/tools/cli/src/lib/upgrade/runner.ts deleted file mode 100644 index 5fff0cfd7f..0000000000 --- a/tools/cli/src/lib/upgrade/runner.ts +++ /dev/null @@ -1,199 +0,0 @@ -import pkg from '../../../package.json'; -import { confirm } from '../../utils/confirm'; -import * as logger from '../../utils/logger'; -import { recordApplied } from './state'; -import type { Migration, MigrationContext } from './types'; - -/** - * Compute the pending subset of migrations: those whose `detect()` returns - * true for the current observable state. - * - * Per the contract in types.ts, `detect()` is the sole source of truth — - * `migrations.json` is a log, not a gate. A migration whose end-state has - * drifted (e.g. a volume was deleted after the migration was recorded) will - * be re-detected and re-applied automatically. - * - * Order is preserved from the registry — callers must not reorder. - */ -async function computePending( - registry: readonly Migration[], - ctx: MigrationContext, -): Promise { - const pending: Migration[] = []; - for (const m of registry) { - try { - if (await m.detect(ctx)) pending.push(m); - } catch (err) { - // A failing detect() should not silently drop the migration — surface - // it loudly so operators can investigate before we either apply an - // unsafe migration or skip a necessary one. - throw new Error( - `migration ${m.id}: detect() failed: ${err instanceof Error ? err.message : String(err)}`, - { cause: err }, - ); - } - } - return pending; -} - -function resolveDescription(m: Migration, ctx: MigrationContext): string { - return typeof m.description === 'function' - ? m.description(ctx) - : m.description; -} - -function printPlan( - pending: readonly Migration[], - stops: readonly string[], - ctx: MigrationContext, -): void { - logger.blank(); - logger.header(`${pending.length} pending migration(s)`); - for (const m of pending) { - logger.info(` • ${m.id} (introduced in ${m.introducedIn})`); - logger.info(` ${resolveDescription(m, ctx)}`); - } - if (stops.length > 0) { - logger.blank(); - logger.info('The following compose projects / containers will be stopped:'); - for (const s of stops) logger.info(` - ${s}`); - } - logger.blank(); -} - -function isNonInteractive(): boolean { - return !(process.stdin.isTTY && process.stdout.isTTY); -} - -interface RunPendingOptions { - /** Where we're being called from — used in messages only. */ - context: 'start' | 'deploy' | 'upgrade'; - /** Skip the interactive prompt and proceed. Required for non-TTY use. */ - assumeYes?: boolean; - /** Print the plan but apply nothing. */ - dryRun?: boolean; - /** - * Callback invoked with the union of `requiredStops()` across pending - * migrations, before apply runs. Callers implement the actual - * `docker compose -p down` since that behaviour varies between - * start and deploy call sites. - */ - performStops?: (stops: string[]) => Promise; -} - -interface RunPendingResult { - /** True if the caller should keep executing the original command. */ - proceed: boolean; - /** Migrations that ran successfully this pass. */ - applied: string[]; - /** True if migrations were pending but the user declined to apply. */ - declined: boolean; -} - -/** - * Runs the pending-migration pipeline. - * - * - If nothing is pending → proceed=true, no side effects. - * - If pending and interactive → print plan, prompt [y/N]: - * yes → apply all in order, record each, proceed=true - * no → exit cleanly with proceed=false, declined=true, no side effects - * - If pending and non-TTY and not assumeYes → throw (caller turns this - * into a process exit with a clear error). - * - If pending and (TTY or assumeYes) → apply all in order. - * - * This is the single entry point used by `tale start` and `tale deploy`. - */ -export async function runPendingMigrations( - registry: readonly Migration[], - ctx: MigrationContext, - opts: RunPendingOptions, -): Promise { - const pending = await computePending(registry, ctx); - if (pending.length === 0) { - return { proceed: true, applied: [], declined: false }; - } - - // Collect the union of requiredStops across pending migrations so we can - // show the full blast radius up front. - const stopsSet = new Set(); - for (const m of pending) { - for (const s of await m.requiredStops(ctx)) stopsSet.add(s); - } - const stops = [...stopsSet]; - - printPlan(pending, stops, ctx); - - if (opts.dryRun) { - logger.notice( - 'DRY RUN — migrations NOT applied. Re-run without --dry-run to apply.', - ); - return { proceed: true, applied: [], declined: false }; - } - - // Decide whether to proceed. - let approved = opts.assumeYes === true; - if (!approved) { - if (isNonInteractive()) { - throw new Error( - 'Pending migrations detected but stdin/stdout is not a TTY. ' + - 'Re-run this command in an interactive shell to confirm, or pass --yes to accept non-interactively.', - ); - } - approved = await confirm( - `Apply ${pending.length} pending migration(s) now?`, - ); - if (!approved) { - logger.info('Migration declined. Nothing changed.'); - return { proceed: false, applied: [], declined: true }; - } - } - - // Stop everything the pending migrations need down. - if (stops.length > 0 && opts.performStops) { - logger.step('Stopping containers before migration...'); - await opts.performStops(stops); - } - - // Apply in order. Record each as soon as it succeeds so a mid-pipeline - // failure leaves us resumable. - const applied: string[] = []; - for (const m of pending) { - logger.step(`Applying migration: ${m.id}`); - const outcome = await m.apply(ctx, { dryRun: false }); - await recordApplied(ctx.projectDir, { - id: m.id, - at: new Date().toISOString(), - cliVersion: pkg.version, - }); - if (outcome === 'applied') { - logger.success(`Migration ${m.id} applied.`); - } else { - logger.info(`Migration ${m.id} was a no-op (already satisfied).`); - } - applied.push(m.id); - } - - return { proceed: true, applied, declined: false }; -} - -/** - * Plan-only variant used by `tale upgrade`. Never stops containers, never - * runs apply(). Just logs the plan so operators know what `tale start` / - * `tale deploy` will do next. - */ -export async function planPendingMigrations( - registry: readonly Migration[], - ctx: MigrationContext, -): Promise { - const pending = await computePending(registry, ctx); - if (pending.length === 0) return []; - const stopsSet = new Set(); - for (const m of pending) { - for (const s of await m.requiredStops(ctx)) stopsSet.add(s); - } - printPlan(pending, [...stopsSet], ctx); - logger.notice( - 'Run "tale start" (dev) or "tale deploy" (prod) to apply — the CLI will prompt before changing anything.', - ); - return pending; -} diff --git a/tools/cli/src/lib/upgrade/state.ts b/tools/cli/src/lib/upgrade/state.ts deleted file mode 100644 index 33627202ff..0000000000 --- a/tools/cli/src/lib/upgrade/state.ts +++ /dev/null @@ -1,94 +0,0 @@ -import { existsSync } from 'node:fs'; -import { mkdir, readFile, rename, unlink, writeFile } from 'node:fs/promises'; -import { dirname, join } from 'node:path'; - -import * as logger from '../../utils/logger'; -import type { AppliedMigration, MigrationsState } from './types'; - -const STATE_FILENAME = 'migrations.json'; -/** Legacy one-bit marker written by v0.2.33. Migrated on first read. */ -const LEGACY_MARKER = 'migration-pending'; - -function statePath(projectDir: string): string { - return join(projectDir, '.tale', STATE_FILENAME); -} - -function legacyMarkerPath(projectDir: string): string { - return join(projectDir, '.tale', LEGACY_MARKER); -} - -/** - * Read the applied-migration list from `.tale/migrations.json`. If the file - * doesn't exist but the legacy `.tale/migration-pending` marker is present, - * treat that as "no migrations applied yet" (the legacy marker carried no - * per-migration identity, so we must let each registered migration's detect() - * re-discover any real pending work) and delete the legacy marker. - */ -async function readMigrationsState( - projectDir: string, -): Promise { - const path = statePath(projectDir); - if (!existsSync(path)) { - const legacyPath = legacyMarkerPath(projectDir); - if (existsSync(legacyPath)) { - logger.debug( - `Found legacy migration marker at ${legacyPath}; seeding empty migrations.json`, - ); - await unlink(legacyPath).catch(() => { - /* best-effort */ - }); - } - return { applied: [] }; - } - try { - const raw = await readFile(path, 'utf8'); - const parsed = JSON.parse(raw) as Partial; - if (!parsed.applied || !Array.isArray(parsed.applied)) { - return { applied: [] }; - } - return { applied: parsed.applied }; - } catch (err) { - // Preserve the corrupt file for postmortem rather than silently losing - // history. A truncated write (crash, disk full) can land here; the - // operator will want to see the bytes that were there. - const backupPath = `${path}.corrupted-${new Date() - .toISOString() - .replace(/[:.]/g, '-')}`; - await rename(path, backupPath).catch(() => { - /* best-effort — if even rename fails, log and continue */ - }); - logger.warn( - `Could not parse ${path}: ${err instanceof Error ? err.message : String(err)}. Moved to ${backupPath} and treating as empty.`, - ); - return { applied: [] }; - } -} - -export async function writeMigrationsState( - projectDir: string, - state: MigrationsState, -): Promise { - const path = statePath(projectDir); - await mkdir(dirname(path), { recursive: true }); - // Atomic write: write to a sibling tmp file then rename. rename(2) is - // atomic on POSIX when source and destination are on the same filesystem, - // so a crash during write leaves the previous migrations.json intact - // instead of producing a truncated/parseable-as-empty file. - const tmpPath = `${path}.tmp`; - await writeFile(tmpPath, `${JSON.stringify(state, null, 2)}\n`); - await rename(tmpPath, path); -} - -export async function recordApplied( - projectDir: string, - entry: AppliedMigration, -): Promise { - const state = await readMigrationsState(projectDir); - if (state.applied.some((a) => a.id === entry.id)) { - // Already recorded; nothing to do. This can happen on an idempotent - // re-run of a migration whose detect() returned true by accident. - return; - } - state.applied.push(entry); - await writeMigrationsState(projectDir, state); -} diff --git a/tools/cli/src/lib/upgrade/types.ts b/tools/cli/src/lib/upgrade/types.ts deleted file mode 100644 index dc4107822a..0000000000 --- a/tools/cli/src/lib/upgrade/types.ts +++ /dev/null @@ -1,71 +0,0 @@ -/** Context passed to every migration's detect/apply/requiredStops hook. */ -export interface MigrationContext { - projectId: string; - projectDir: string; -} - -/** Outcome of a single apply() call. */ -export type ApplyOutcome = 'applied' | 'noop'; - -/** - * A single migration step registered in the pipeline. - * - * Every migration must be **idempotent**, and its `detect` must be a pure - * feature check against observable end-state: - * - * - `detect` must return `true` only when the migration's postcondition - * does NOT already hold. Mere existence of source artifacts is not - * enough; the destination must also be absent/incomplete. Stray - * legacy volumes on the host from unrelated installs must never - * cause `detect` to return `true`. - * - `apply` must be safe to re-run at any point — on a fully satisfied - * system, on a partially-migrated system after an interruption, and - * on a freshly-initialised system. It must independently re-check - * each unit of work against the target state and skip units already - * satisfied. - * - `detect` and `apply` must NOT consult `migrations.json`, CLI - * versions, or any other external history. Those are caches/logs, - * not sources of truth — the filesystem/volume/container state is. - * - * A migration that can't express its precondition in terms of observable - * end-state is a bug. - */ -export interface Migration { - /** Stable id, used as the key in `.tale/migrations.json`. */ - id: string; - /** CLI version that introduced this migration (for logs only). */ - introducedIn: string; - /** - * One-line human-readable description, shown in plan output. May be a - * static string OR a function of the context when the description needs - * to interpolate projectId etc. — plain strings never get template-literal - * expansion at use site, so use the function form whenever the text - * contains per-project names. - */ - description: string | ((ctx: MigrationContext) => string); - /** Returns true iff this migration has work to do given current state. */ - detect(ctx: MigrationContext): Promise; - /** - * Docker compose project names / container names that must be stopped - * before apply(). The runner collects the union across pending migrations - * and stops them once. - */ - requiredStops(ctx: MigrationContext): Promise; - /** Apply the migration. Must throw on any error. */ - apply( - ctx: MigrationContext, - opts: { dryRun: boolean }, - ): Promise; -} - -/** Persisted record of a successfully-applied migration. */ -export interface AppliedMigration { - id: string; - at: string; - cliVersion: string; -} - -/** Shape of `.tale/migrations.json`. */ -export interface MigrationsState { - applied: AppliedMigration[]; -} diff --git a/tools/cli/src/lib/upgrade/volume-helpers.ts b/tools/cli/src/lib/upgrade/volume-helpers.ts deleted file mode 100644 index c8d3f6a6e9..0000000000 --- a/tools/cli/src/lib/upgrade/volume-helpers.ts +++ /dev/null @@ -1,333 +0,0 @@ -import * as logger from '../../utils/logger'; -import { docker } from '../docker/docker'; - -/** - * Sentinel file written inside a destination volume once the `cp -a` completes - * successfully. Presence guarantees a complete migration; absence (with data - * present) indicates a partial/interrupted copy that must be recovered. - */ -const MIGRATION_SENTINEL = '.tale-migration-complete'; - -export async function volumeExists(name: string): Promise { - const r = await docker('volume', 'inspect', name); - return r.success; -} - -export async function volumeHasData( - name: string, - image: string, -): Promise { - const r = await docker( - 'run', - '--rm', - '-v', - `${name}:/vol:ro`, - '--entrypoint', - 'sh', - image, - '-c', - 'ls -A /vol | head -1', - ); - return r.success && r.stdout.trim().length > 0; -} - -async function volumeHasSentinel( - name: string, - image: string, -): Promise { - const r = await docker( - 'run', - '--rm', - '-v', - `${name}:/vol:ro`, - '--entrypoint', - 'sh', - image, - '-c', - `test -f /vol/${MIGRATION_SENTINEL}`, - ); - return r.success; -} - -async function volumeFileCount( - name: string, - image: string, -): Promise { - // `cp -a` preserves regular files, directories, and symlinks but silently - // skips sockets, FIFOs, and device nodes. To keep src/dst counts - // comparable, only count things cp will actually copy: regular files and - // symlinks. Exclude the migration sentinel itself so chained migrations - // (whose source may already carry a sentinel from an earlier pipeline - // step) compare cleanly — sentinel presence is verified separately via - // volumeHasSentinel. - const r = await docker( - 'run', - '--rm', - '-v', - `${name}:/vol:ro`, - '--entrypoint', - 'sh', - image, - '-c', - `find /vol \\( -type f -o -type l \\) ! -name '${MIGRATION_SENTINEL}' | wc -l`, - ); - if (!r.success) return null; - const n = parseInt(r.stdout.trim(), 10); - return Number.isFinite(n) ? n : null; -} - -/** Diagnostic: list relative paths present in `/src` but not in `/dst`, plus - * any special (non-regular, non-symlink, non-dir) files in src that cp -a - * would have skipped. Best-effort — used only on verification failure. */ -async function diffVolumes( - src: string, - dst: string, - image: string, -): Promise { - const r = await docker( - 'run', - '--rm', - '-v', - `${src}:/src:ro`, - '-v', - `${dst}:/dst:ro`, - '--entrypoint', - 'sh', - image, - '-c', - [ - '(cd /src && find . \\( -type f -o -type l \\) | sort) > /tmp/s', - '(cd /dst && find . \\( -type f -o -type l \\) | sort) > /tmp/d', - 'echo "--- src counts ---"', - 'echo "regular files: $(find /src -type f | wc -l)"', - 'echo "symlinks: $(find /src -type l | wc -l)"', - 'echo "dirs: $(find /src -type d | wc -l)"', - 'echo "special: $(find /src ! -type f ! -type l ! -type d | wc -l)"', - 'echo "sentinel: $(ls -la /src/.tale-migration-complete 2>/dev/null || echo absent)"', - 'echo "--- dst counts ---"', - 'echo "regular files: $(find /dst -type f | wc -l)"', - 'echo "symlinks: $(find /dst -type l | wc -l)"', - 'echo "dirs: $(find /dst -type d | wc -l)"', - 'echo "special: $(find /dst ! -type f ! -type l ! -type d | wc -l)"', - 'echo "sentinel: $(ls -la /dst/.tale-migration-complete 2>/dev/null || echo absent)"', - 'echo "--- in src but not dst (first 20) ---"', - 'comm -23 /tmp/s /tmp/d | head -20', - 'echo "--- in dst but not src (first 20) ---"', - 'comm -13 /tmp/s /tmp/d | head -20', - ].join(' && '), - ); - if (!r.success) return `diff failed: ${r.stderr.trim()}`; - return r.stdout.trim(); -} - -/** Rename a volume's contents aside by moving them into a timestamped sub-dir. - * Safer than deleting: if we later discover we wiped legitimate data, the - * operator can recover by hand. Note: docker volumes can't be renamed, so we - * create a sibling *-partial- volume and copy the unsentinelled contents - * into it before wiping the destination. */ -async function moveContentsToBackupVolume( - name: string, - image: string, -): Promise { - const ts = new Date().toISOString().replace(/[:.]/g, '-'); - const backup = `${name}.partial-${ts}`; - const created = await docker('volume', 'create', backup); - if (!created.success) { - logger.warn( - ` failed to create backup volume ${backup}: ${created.stderr.trim()}`, - ); - return null; - } - const copy = await docker( - 'run', - '--rm', - '-v', - `${name}:/src:ro`, - '-v', - `${backup}:/dst`, - '--entrypoint', - 'sh', - image, - '-c', - 'cp -a /src/. /dst/', - ); - if (!copy.success) { - logger.warn( - ` failed to copy partial contents into ${backup}: ${copy.stderr.trim()}`, - ); - return null; - } - // Fail-fast wipe: `find -delete` without `-e` continues past errors and can - // leave the destination half-wiped; a subsequent copyVolumeWithVerify would - // then see corrupted state. Use `sh -e` so any rm failure aborts loudly. - const wipe = await docker( - 'run', - '--rm', - '-v', - `${name}:/vol`, - '--entrypoint', - 'sh', - image, - '-ec', - 'cd /vol && find . -mindepth 1 -maxdepth 1 -exec rm -rf -- {} +', - ); - if (!wipe.success) { - logger.warn( - ` failed to wipe ${name} (destination may be partial): ${wipe.stderr.trim()}`, - ); - return null; - } - return backup; -} - -/** Find an image that is already available locally for running throwaway - * copy containers, avoiding a network pull. Prefers images we know Tale - * itself ships so plain `docker image inspect` succeeds. */ -export async function resolveMigrationImage(): Promise { - const candidates = ['tale-platform', 'tale-proxy', 'alpine']; - for (const candidate of candidates) { - const exact = await docker('image', 'inspect', candidate); - if (exact.success) return candidate; - const lookup = await docker( - 'images', - '--format', - '{{.Repository}}:{{.Tag}}', - ); - if (lookup.success) { - const match = lookup.stdout - .split('\n') - .find((line) => line.includes(candidate) && !line.includes('')); - if (match) return match.trim(); - } - } - // Final fallback: alpine — may trigger a pull, but docker run handles that - // transparently. - return 'alpine'; -} - -/** Stop a container and wait for it to exit. Treats failure as fatal so - * callers never run a volume copy against a live container. */ -export async function stopContainerOrThrow(name: string): Promise { - const stop = await docker('stop', '-t', '30', name); - if (!stop.success) { - throw new Error( - `failed to stop container ${name}: ${stop.stderr.trim() || 'unknown error'}`, - ); - } - const waited = await docker('wait', name); - if (!waited.success) { - throw new Error( - `container ${name} did not confirm shutdown: ${waited.stderr.trim() || 'unknown error'}`, - ); - } -} - -/** - * Copy the contents of one volume into another, verify with a strict file - * count check (src vs dst-minus-sentinel must match exactly), and mark the - * destination with the sentinel file only on success. - * - * If the destination already has data but no sentinel, it is moved aside - * into a timestamped backup volume rather than wiped, so the operator can - * recover manually if the earlier state was actually legitimate. - */ -export async function copyVolumeWithVerify( - src: string, - dst: string, - image: string, -): Promise { - if (!(await volumeExists(dst))) { - const created = await docker('volume', 'create', dst); - if (!created.success) { - throw new Error( - `failed to create destination volume ${dst}: ${created.stderr.trim()}`, - ); - } - } else if (await volumeHasData(dst, image)) { - if (await volumeHasSentinel(dst, image)) { - // Already migrated — caller should have detected this and skipped. We - // treat this as a soft no-op rather than an error. - logger.debug( - `${dst} already has migration sentinel; skipping re-copy in copyVolumeWithVerify`, - ); - return; - } - // Safety rail: by the time we reach here, the calling migration's - // `detect`/`findPending` has already asserted this destination is NOT - // in its end-state. But if a migration has a detection bug and asks us - // to copy something SMALLER than what's already on the destination, - // this is almost certainly either (a) a stale / unrelated source being - // pulled in, or (b) a logic error in the migration. Either way, silent - // clobbering is wrong — fail loudly and let the operator investigate. - const srcCountPre = await volumeFileCount(src, image); - const dstCountPre = await volumeFileCount(dst, image); - if (srcCountPre != null && dstCountPre != null) { - if (srcCountPre === 0 && dstCountPre > 0) { - throw new Error( - `refusing to overwrite ${dst} (${dstCountPre} files) with empty source ${src}. This looks like a migration detection bug — destination already populated but source is empty.`, - ); - } - if (dstCountPre > srcCountPre * 2) { - throw new Error( - `refusing to overwrite ${dst} (${dstCountPre} files) with much smaller source ${src} (${srcCountPre} files). A migration should not replace populated destination data with a substantially smaller source — this looks like a stale/unrelated source volume.`, - ); - } - } - logger.warn( - ` ⚠ ${dst} has data but no sentinel; moving partial contents to backup volume`, - ); - const backup = await moveContentsToBackupVolume(dst, image); - if (!backup) { - throw new Error( - `could not preserve partial contents of ${dst}; aborting migration`, - ); - } - logger.info(` partial contents preserved at volume: ${backup}`); - } - - // Run the copy as root (no --user flag). Destination volume is newly - // created by docker with a root-owned / directory, so a non-root process - // cannot write into it. `cp -a` preserves ownership from source, so files - // populated by the convex container (uid 1001) stay uid 1001. We chown - // the dst root + sentinel explicitly so the app user can read/write at - // the top level when convex later mounts it. - const copy = await docker( - 'run', - '--rm', - '-v', - `${src}:/src:ro`, - '-v', - `${dst}:/dst`, - '--entrypoint', - 'sh', - image, - '-ec', - `cp -a /src/. /dst/ && : > /dst/${MIGRATION_SENTINEL} && chown 1001:1001 /dst /dst/${MIGRATION_SENTINEL}`, - ); - if (!copy.success) { - throw new Error(`copy ${src} → ${dst} failed: ${copy.stderr.trim()}`); - } - - const srcCount = await volumeFileCount(src, image); - const dstCount = await volumeFileCount(dst, image); - if (srcCount == null || dstCount == null) { - throw new Error( - `could not verify file counts for ${src} → ${dst} (src=${srcCount}, dst=${dstCount})`, - ); - } - // Both counts exclude the sentinel itself (see volumeFileCount) so chained - // migrations compare cleanly regardless of whether src already carries a - // sentinel from an earlier pipeline step. - if (dstCount !== srcCount) { - const diff = await diffVolumes(src, dst, image); - throw new Error( - `file count mismatch for ${src} → ${dst}: src=${srcCount}, dst=${dstCount}. Refusing to mark migration complete.\n${diff}`, - ); - } - if (!(await volumeHasSentinel(dst, image))) { - throw new Error( - `migration sentinel missing on ${dst} after copy. Refusing to mark migration complete.`, - ); - } - logger.success(` ${src} → ${dst} (${srcCount} files)`); -} From bddf863e6c202e7963cbb21ba508ff4794df0c7c Mon Sep 17 00:00:00 2001 From: larryro <371767072@qq.com> Date: Wed, 27 May 2026 22:55:07 +0800 Subject: [PATCH 02/41] fix(platform,cli,rag,crawler): error-reporting chain + org-aware RAG/Crawler + runbook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-round multi-agent review of 5deaa5a8c surfaced ~45 findings across four themes; this commit lands the unified fix in one pass. Wave 1 — Correctness (error-reporting chain + safety guards) - scaffold.ts: seedDomain/seedRetention now return structured { domain, ok, error? } results. scaffoldNewOrganization aggregates and accepts a new `strict` arg — when true, throws on any per-domain failure (used by reseed_all_orgs); when false/omitted, preserves the org-create lenient semantics auth.afterCreateOrganization depends on. Promote path.isAbsolute(TALE_CONFIG_DIR) guard from cleanupOrgFilesystem so seedRetention can't accidentally write into the action's CWD on unset env. Bundle-mode rm-before-copy replaced with staging-dir + atomic-rename. randomUUID suffix on the condemned-dir name (defends against ms-resolution Date.now() collisions). Opportunistic janitor for stale /.deleted-* trees older than 24h. Three previously-empty catches replaced with console.warn lines. - reseed_all_orgs.ts: throw at end of loop when failed > 0 with aggregated failed-slug detail; that propagates through bunx convex run → docker exec exit code → CLI throw. Returns validator added so the action's shape is explicit. Passes strict:true to scaffold so per-domain failures are no longer swallowed silently. - tools/cli/src/lib/actions/reseed-all-orgs.ts: add --no-push to bunx convex run; grep-strip the bunx banner (Admin key, emoji separators, blank lines) so the trailing JSON is parseable; parse the result on success and surface succeeded/total counts; throw on docker-exec non-zero (which is what the action's new end-of-loop throw produces). - tools/cli/src/lib/actions/deploy.ts: stageOrgIntoDir filter now skips dotfiles (.git, .DS_Store, .vscode, .idea, .tale), node_modules, __pycache__ — the previous filter only excluded .history/ and *.secrets.json at depth ≥ 1, so operators with default/.git/ or macOS .DS_Store would have shipped those into /app/data/. syncProjectFiles now throws on docker cp failure instead of returning, so the outer success("Deployment complete!") no longer prints over a half-pushed state. - Deleted services/platform/convex/migrations/rename_org_slug.ts — under the multi-org model the migration is actively dangerous (renames every org to `default`), and there is no registry/cron tying it to anything. The docblock's "Self-hosted Tale deployments use a single organization" assumption is stale. Wave 2 — Hidden code paths (out-of-sight assumptions that survived 5deaa5a8c) - Python provider loader: load_providers + get_chat_model/ get_embedding_model/get_vision_model + their *_config siblings now REQUIRE org_slug. Path resolves to //providers/ instead of /providers/. BaseServiceSettings + Settings.get_llm_config likewise threaded. Without this, RAG and crawler both died at FastAPI lifespan startup with "No chat model found" against the old flat path. - RAG: RagService rebuilt around a per-org _OrgClients cache with a 15s TTL. DB pool stays singleton; embedding/openai/vision clients and search service are per-org, built lazily on first request for that org. add_document/search/generate/compare_files now take org_slug as first arg. Embedding dimensions pinned globally on first org init; subsequent orgs that disagree raise loudly (per-org schema would need per-org DB). - Crawler: uses contextvars instead of explicit threading — a new app/org_context.py exposes set_active_org/get_active_org/ require_org_slug; main.py mounts require_org_slug as a router- level dep on every public router. embedding_service.py rebuilt with per-org cache keyed on slug. Boot-time embedding-dim guard in database.py removed (no org context at lifespan); pgvector enforces dim on insert. vision/openai_client.py and file_parser_service.py read get_active_org() at each settings.get_* call. scheduler.py background task sets "default" with a one-shot warn until per-website org binding lands. - ragFetch: optional orgSlug in init; when set, X-Tale-Org header is forced (cannot be spoofed via init.headers). RAG endpoints that need it (search/generate/upload/compare-files) enforce via Depends(require_org_slug); status/delete/content/compare- by-id stay org-agnostic. All platform callers threaded — new lib/helpers/org_slug.ts (orgSlugFromId) bridges organizationId to slug for callers that only have the id. Crawler /api/v1/search callers (query_web_context, search_pages) set X-Tale-Org directly. - generate-dev-compose.ts: bind mounts rewritten for the org-first layout. Old HOST_CONFIG_DIRS = ['agents','workflows',…] enumerated flat host dirs that no longer exist after tale init writes default//. Replaced with findOrgDirs() — emits .//:/app/data//{ro} for every org found. start.ts user-facing hot-reload message updated. - RULES_CONTENT (tools/cli/src/lib/rules/content.ts) + Cursor MDC globs rewritten for the org-first layout. tale update now applies checksum protection to rules files (CLAUDE.md, .cursor/rules/ tale.mdc, .github/copilot-instructions.md, .windsurfrules) — was unconditional overwrite, would clobber local edits. - tale update embedded-examples paths prefixed with `default/` so scaffolded files land where init puts them; previously update wrote into the now-unread flat layout. - services/convex/docker-entrypoint.sh: detects pre-orgfirst flat dirs at /app/data/{agents,workflows,…}/ on boot and warns loudly with the tale migrate config-layout runbook. atomic_cp helper comment reworded — it's atomic for the destination but cp itself isn't atomic. - tale start: detects legacy flat-layout dirs at project root and prints the runbook before continuing. Wave 3 — User-facing surface - All three root READMEs: stale "pending data migrations are detected and applied automatically on tale start/deploy" claim replaced with the explicit migrate runbook + link to upgrades.md. - docs/{en,de,fr}/self-hosted/configuration/providers.md: GitHub href tree/main/examples/providers (404) → tree/main/examples/default/providers. - docs/{en,de,fr}/self-hosted/configuration/retention.md: path documented as per-org /app/data//retention.json instead of the removed /app/data/platform-config/governance/retention-bounds.json. - docs/{en,de,fr}/self-hosted/operate/upgrades.md: new "Migrating to the org-first config layout" section covering the 3-step runbook, the rollback story (downgrade safe between steps 1 and 3 via the -orgfirst marker token; provider-secrets restore-from- backup needed after step 3), and the skip-step-1 fallback. - governance/mutations.ts: client-facing ConvexError message now references $TALE_CONFIG_DIR//retention.json. Wave 4 — Remaining majors (small surgical fixes) - init.ts: OpenRouter secrets file gets mode 0o600. - .dockerignore: !examples/**/*.md keeps skill SKILL.md in build context. - compose.yml + tools/cli/src/lib/compose/generators/constants.ts: stale `tale migrate split-convex` justification reworded — the platform-data volume is now an unused pre-0.3.0 stub kept only for the detect() probe in start.ts. - migrate-config-layout/script.sh: set -euo pipefail + ${DATA:?} guard so an unset $DATA can't make --cleanup-old rm from the container root. - Empty-catch fix-ups in branding/file_actions.ts (unlink loop + readdir), serve-branding-images.ts (.catch fallthrough now logs non-ENOENT), init.ts (detectTaleProjectFiles readdir). - config_store/store.ts: deleted the dead orgFirst flag — every caller passed true. Inlined the org-first layout; deleted the legacy /.json branch and updated the unit tests. - Stale docblocks updated in governance/{retention_actions, retention_bounds_proposal, retention_floors}.ts, integrations/ {credentials_schema, load_integration}.ts, agents/file_utils.ts, skills/file_actions.ts (all referenced the old flat layout or removed env vars). - services/platform/docker-entrypoint.sh: ORPHAN_DERIVED → LEGACY_ DOMAIN_VARS, dropped 2>/dev/null on the env-purge so failures surface in logs. - services/platform/Dockerfile: env-comment rewritten for the org-first sub-dir derivation. Test surface: 36/36 tasks pass via `bun run check`; 70927 platform tests, 298 RAG, 472 crawler. Touched tests: test_rag_service, test_compare_files, test_background_ingest, test_config (RAG + crawler), test_document_helpers, test_database (crawler — boot-time dim guard tests skipped with rationale), test_llm_cache (ContextVar setup), upload_file_direct.test, upload_document.test, store.test (rewritten for org-first paths). Out of scope (per user direction): reserving the literal `default` slug at the Better Auth `beforeCreateOrganization` hook. Resolved operationally — the operator is the first user and registers the default org via the normal flow. --- .dockerignore | 6 +- README.de.md | 2 +- README.fr.md | 2 +- README.md | 2 +- compose.yml | 9 +- .../de/self-hosted/configuration/providers.md | 2 +- .../de/self-hosted/configuration/retention.md | 2 +- docs/de/self-hosted/operate/upgrades.md | 38 ++ .../en/self-hosted/configuration/providers.md | 2 +- .../en/self-hosted/configuration/retention.md | 2 +- docs/en/self-hosted/operate/upgrades.md | 38 ++ .../fr/self-hosted/configuration/providers.md | 2 +- .../fr/self-hosted/configuration/retention.md | 2 +- docs/fr/self-hosted/operate/upgrades.md | 38 ++ .../src/tale_shared/config/base.py | 48 +- .../src/tale_shared/config/providers.py | 44 +- services/convex/docker-entrypoint.sh | 47 +- services/crawler/app/main.py | 30 +- services/crawler/app/org_context.py | 69 +++ services/crawler/app/services/database.py | 47 +- .../crawler/app/services/embedding_service.py | 102 ++-- .../app/services/file_parser_service.py | 7 +- services/crawler/app/services/scheduler.py | 20 +- .../app/services/vision/openai_client.py | 9 +- services/crawler/tests/test_config.py | 14 +- services/crawler/tests/test_database.py | 10 + services/crawler/tests/test_llm_cache.py | 6 + services/platform/Dockerfile | 15 +- services/platform/convex/_generated/api.d.ts | 4 +- .../helpers/fetch_document_comparison.ts | 2 + .../agent_tools/rag/query_rag_context.ts | 7 + .../convex/agent_tools/rag/rag_search_tool.ts | 9 + .../web/helpers/query_web_context.ts | 11 +- .../agent_tools/web/helpers/search_pages.ts | 15 +- services/platform/convex/agents/file_utils.ts | 2 +- .../platform/convex/branding/file_actions.ts | 20 +- .../convex/documents/compare_documents.ts | 3 + .../convex/file_metadata/transcribe_audio.ts | 7 + .../platform/convex/governance/mutations.ts | 2 +- .../convex/governance/retention_actions.ts | 7 +- .../governance/retention_bounds_proposal.ts | 7 +- .../convex/governance/retention_floors.ts | 5 +- .../convex/integrations/credentials_schema.ts | 5 +- .../convex/integrations/load_integration.ts | 2 +- .../lib/agent_response/generate_response.ts | 4 +- .../convex/lib/config_store/actions.ts | 1 - .../convex/lib/config_store/store.test.ts | 51 +- .../platform/convex/lib/config_store/store.ts | 88 +--- .../platform/convex/lib/helpers/org_slug.ts | 48 ++ .../platform/convex/lib/helpers/rag_config.ts | 38 +- .../convex/migrations/rename_org_slug.ts | 69 --- .../convex/organizations/reseed_all_orgs.ts | 61 ++- .../platform/convex/organizations/scaffold.ts | 264 ++++++++-- .../platform/convex/skills/file_actions.ts | 2 +- .../action_defs/document/document_action.ts | 3 + .../rag/helpers/upload_document.test.ts | 11 +- .../rag/helpers/upload_document.ts | 4 + .../rag/helpers/upload_file_direct.test.ts | 1 + .../rag/helpers/upload_file_direct.ts | 4 + .../action_defs/rag/rag_action.ts | 14 +- services/platform/docker-entrypoint.sh | 10 +- .../vite-plugins/serve-branding-images.ts | 19 +- services/rag/app/auth.py | 29 ++ services/rag/app/config.py | 8 +- services/rag/app/routers/documents.py | 19 +- services/rag/app/routers/search.py | 15 +- services/rag/app/services/rag_service.py | 453 ++++++++++-------- services/rag/tests/test_background_ingest.py | 10 + services/rag/tests/test_compare_files.py | 55 ++- services/rag/tests/test_config.py | 18 +- services/rag/tests/test_document_helpers.py | 28 +- services/rag/tests/test_rag_service.py | 109 +++-- tools/cli/src/lib/actions/deploy.ts | 57 ++- tools/cli/src/lib/actions/init.ts | 20 +- tools/cli/src/lib/actions/reseed-all-orgs.ts | 107 ++++- tools/cli/src/lib/actions/start.ts | 36 +- tools/cli/src/lib/actions/update.ts | 81 +++- .../src/lib/compose/generators/constants.ts | 12 +- .../generators/generate-dev-compose.ts | 85 +++- .../src/lib/migrate-config-layout/script.sh | 8 +- tools/cli/src/lib/rules/content.ts | 45 +- tools/cli/src/lib/rules/generators.ts | 5 +- 82 files changed, 1858 insertions(+), 757 deletions(-) create mode 100644 services/crawler/app/org_context.py create mode 100644 services/platform/convex/lib/helpers/org_slug.ts delete mode 100644 services/platform/convex/migrations/rename_org_slug.ts diff --git a/.dockerignore b/.dockerignore index 4f2099eb36..095d883195 100644 --- a/.dockerignore +++ b/.dockerignore @@ -55,7 +55,7 @@ services/platform/.env*.local # ============================================================================ # NOTE: keep tools/, examples/, and patches/ — Dockerfiles reference them: # - platform image copies tools/cli/package.json and patches/ -# - convex image copies examples/{agents,workflows,integrations,providers,branding} +# - convex image copies examples/default/{agents,workflows,integrations,providers,branding,skills} tests/ designs/ scripts/ @@ -69,6 +69,10 @@ knip-results.json docs/ !docs/package.json README.md +# Skill bundles ship their SKILL.md alongside the scripts; without +# this carve-out the convex image's COPY of examples/default/ would +# drop every skill's README and break runtime skill discovery. +!examples/**/*.md # ============================================================================ # IDE and Editor Files diff --git a/README.de.md b/README.de.md index 005eb6c56d..e465f09f4b 100644 --- a/README.de.md +++ b/README.de.md @@ -88,7 +88,7 @@ tale cleanup # Inaktive Container entfernen tale reset --force # Alle Container entfernen ``` -In der [CLI-Referenz](tools/cli/README.md) findest du alle Optionen und Flags. Anstehende Daten-Migrationen werden beim nächsten `tale start` oder `tale deploy` automatisch erkannt und angewendet. +In der [CLI-Referenz](tools/cli/README.md) findest du alle Optionen und Flags. Das Aktualisieren einer bestehenden Installation erfordert eine einmalige manuelle Migration: führe `tale migrate config-layout` aus, danach `tale deploy --override-all -y`. Das vollständige Runbook findest du in [Self-hosted Upgrades](docs/de/self-hosted/operate/upgrades.md). ## In Produktion deployen diff --git a/README.fr.md b/README.fr.md index cacb0800dc..802699ed93 100644 --- a/README.fr.md +++ b/README.fr.md @@ -88,7 +88,7 @@ tale cleanup # Supprimer les conteneurs inactifs tale reset --force # Supprimer tous les conteneurs ``` -Voir la [référence du CLI](tools/cli/README.md) pour toutes les options et flags. Les migrations de données en attente sont détectées et appliquées automatiquement au prochain `tale start` ou `tale deploy`. +Voir la [référence du CLI](tools/cli/README.md) pour toutes les options et flags. Mettre à jour un déploiement existant nécessite une migration manuelle unique : exécutez `tale migrate config-layout` puis `tale deploy --override-all -y`. Le runbook complet se trouve dans [Mises à niveau auto-hébergées](docs/fr/self-hosted/operate/upgrades.md). ## Déployer en production diff --git a/README.md b/README.md index a4335a0196..9ac2de2e64 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ tale cleanup # Remove inactive containers tale reset --force # Remove all containers ``` -See the [CLI reference](tools/cli/README.md) for all options and flags. Pending data migrations are detected and applied automatically on the next `tale start` or `tale deploy`. +See the [CLI reference](tools/cli/README.md) for all options and flags. Upgrading an existing deployment requires a one-time manual migration: run `tale migrate config-layout` then `tale deploy --override-all -y`. See [Self-hosted upgrades](docs/en/self-hosted/operate/upgrades.md) for the full runbook. ## Deploy to production diff --git a/compose.yml b/compose.yml index 42564237af..f2f278a2ad 100644 --- a/compose.yml +++ b/compose.yml @@ -688,10 +688,11 @@ volumes: rag-data: driver: local - # Persistent storage for all platform data (Convex DB, agents, workflows, integrations) - # LEGACY: once `tale migrate split-convex` has run, data lives in convex-data. - # This volume is preserved in case users need to rollback; safe to remove - # manually after a successful Phase 2 upgrade. + # LEGACY (pre-0.3.0): platform data used to live here before the + # split-convex transition. Today everything lives in `convex-data`; the + # volume is retained as an unused stub so detect() in start.ts can + # identify pre-0.3.0 deployments and produce a coherent diff. Operators + # can delete it by hand once they're past the upgrade window. platform-data: driver: local diff --git a/docs/de/self-hosted/configuration/providers.md b/docs/de/self-hosted/configuration/providers.md index f8e904321d..3f6fbe7932 100644 --- a/docs/de/self-hosted/configuration/providers.md +++ b/docs/de/self-hosted/configuration/providers.md @@ -31,7 +31,7 @@ Die Referenz ist das Dateiformat auf Platte und die Reihenfolge der Operationen, } ``` -Die vollständige Menge der Felder lebt in [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json` und `vercel-gateway.json` decken die drei Formen ab, die du wahrscheinlich brauchst. +Die vollständige Menge der Felder lebt in [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/default/providers) — `openai.json`, `openrouter.json` und `vercel-gateway.json` decken die drei Formen ab, die du wahrscheinlich brauchst. ## Die Secrets-Datei diff --git a/docs/de/self-hosted/configuration/retention.md b/docs/de/self-hosted/configuration/retention.md index 3a0f9006d1..5553f2c1ff 100644 --- a/docs/de/self-hosted/configuration/retention.md +++ b/docs/de/self-hosted/configuration/retention.md @@ -24,7 +24,7 @@ Die mitgelieferten Defaults sind locker; zieh sie an, je nach deiner Compliance- ## Wo du Grenzen setzt -Die Grenzen leben in der Operator-Config-Datei, nicht in Env-Vars. Editiere `governance/retention-bounds.json` unter `TALE_CONFIG_DIR` (default `/app/data/platform-config/` im Plattform-Container): +Unter dem Org-first-Layout sind Retention-Grenzen **pro Org**: editiere `retention.json` direkt im Unterbaum einer Org unter `TALE_CONFIG_DIR` (default `/app/data/` im Plattform-Container, also liegt die Datei unter `/app/data//retention.json`, z. B. `/app/data/default/retention.json`). Jede Org hat ihre eigene Datei; die `default`-Datei ist die Vorlage, die eine neue Installation beim ersten Start aufgreift. ```json { diff --git a/docs/de/self-hosted/operate/upgrades.md b/docs/de/self-hosted/operate/upgrades.md index f28aa6a1c8..db5eee2a08 100644 --- a/docs/de/self-hosted/operate/upgrades.md +++ b/docs/de/self-hosted/operate/upgrades.md @@ -80,3 +80,41 @@ Minor-Versionen zu überspringen (von 0.9 auf 0.11 zu gehen) ist unterstützt, s ## Wo das hingehört Der Upgrade-Flow knüpft jede andere Operate-Seite an — Backups sind das, was ein gescheitertes Upgrade wiederherstellbar macht, Observability ist das, was dir sagt, dass die neue Farbe healthy ist, Hardening ist das, was du nach einer Major-Version neu walkst. Setzt du das CLI zum ersten Mal auf, deckt [Tale-CLI installieren](/de/self-hosted/install/cli-install) das workstationseitige Setup ab; nimmst du den Pager mitten im Rollout auf, nennt [Troubleshooting](/de/self-hosted/operate/observability/troubleshooting) die Symptome. + +## Migration auf das Org-first-Config-Layout + +Ältere Tale-Releases haben Config in einem flachen Baum im Workspace-Root abgelegt (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`). Aktuelles Tale nutzt ein **Org-first**-Layout, in dem jede Org — auch die kanonische `default` — ihren eigenen Unterbaum besitzt: `///...`. Die Migration ist opt-in und läuft einmal pro Workspace. Die neue Plattform liest die alten Pfade nicht mehr; bis du migrierst, liegen Provider-Secrets und Anpassungen in Verzeichnissen, die das Runtime nicht mehr anschaut. + +Die Migration sind drei Kommandos: + +```bash +# 1. Provider-Secrets (und andere Config) aus dem flachen Layout nach +# `default//...` kopieren. cp statt mv, damit die alten Pfade +# für einen möglichen Rollback intakt bleiben. +tale migrate config-layout + +# 2. Convex-Container gegen das Org-first-Volume-Layout neu erstellen +# und den server-seitigen Reseed über jede registrierte Org laufen +# lassen. Impliziert `--all`; `-y` überspringt den destruktiven +# Bestätigungs-Prompt für CI / Skript-Läufe. +tale deploy --override-all -y + +# 3. Wenn du das neue Layout verifiziert hast, alte Pfade entfernen. +# sha-verifiziert, dass die neue Datei der alten entspricht, bevor +# unlink; bei Mismatch wird das Löschen verweigert. +tale migrate config-layout --cleanup-old +``` + +Schritt 1 ist safe und reversibel — ein Re-Run ist no-op, sobald Pfade existieren. Schritt 2 ist destruktiv: jede Org-Config mit Katalog-Name (`*.json` unter `agents/`, `workflows/`, `integrations/`, `skills/`, `branding/branding.json`, `retention.json`) wird mit dem Builtin-Katalog überschrieben. `*.secrets.json`-Dateien, `.history/`-Trails und hochgeladene `branding/images/*` bleiben server-seitig erhalten. Nach Schritt 2 liest die Plattform ausschließlich aus dem Org-first-Layout. + +Schritt 3 ist der Point-of-no-Return für Downgrades — siehe unten. + +### Org-first-Migration zurückrollen + +Zwischen Schritt 1 und 3 kannst du sauber downgraden. Der Convex-Entrypoint markiert jeden Seed-Lauf mit einem Token, das die Layout-Version enthält (`.seeded--orgfirst`); ein älteres Binary, das diesen Token nicht erkennt, re-seedet idempotent in seine eigenen (flachen) Pfade, und Schritt 1's `cp` hat die alten Pfade intakt gelassen. Downgrade ist ein normales `tale rollback`. + +Nach Schritt 3 (`--cleanup-old`) sind die alten Pfade weg. Downgrade re-seedet das Layout zwar weiterhin korrekt via Marker-Token-Mechanismus, aber die App startet mit leeren Provider-Secrets — stelle sie aus dem Backup wieder her (siehe [Backups und Restore](/de/self-hosted/operate/backups-and-restore)), bevor du Traffic wieder aufnimmst. + +### Was, wenn ich Schritt 1 überspringe? + +Der Convex-Container erkennt beim Start die übrig gebliebenen flachen Layout-Dirs und schreibt eine Warnung in seine Logs, die die Verzeichnisse benennt und auf dieses Runbook zeigt. Das Deployment startet, aber Reads aus diesen Verzeichnissen liefern leer, und Writes gehen in die neuen (leeren) Org-first-Pfade. Die Korrektur sind weiterhin Schritt 1 + 2 — sie nach der Warnung laufen zu lassen funktioniert genauso wie sie im Voraus laufen zu lassen. diff --git a/docs/en/self-hosted/configuration/providers.md b/docs/en/self-hosted/configuration/providers.md index 799f97a0f2..293dcdb1d5 100644 --- a/docs/en/self-hosted/configuration/providers.md +++ b/docs/en/self-hosted/configuration/providers.md @@ -31,7 +31,7 @@ The reference is the file format on disk and the order operations follow when ad } ``` -The full set of fields lives in [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json`, and `vercel-gateway.json` cover the three shapes you are likely to need. +The full set of fields lives in [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/default/providers) — `openai.json`, `openrouter.json`, and `vercel-gateway.json` cover the three shapes you are likely to need. ## The secrets file diff --git a/docs/en/self-hosted/configuration/retention.md b/docs/en/self-hosted/configuration/retention.md index 8bc20b4b9e..0e36299cce 100644 --- a/docs/en/self-hosted/configuration/retention.md +++ b/docs/en/self-hosted/configuration/retention.md @@ -24,7 +24,7 @@ The shipped defaults are loose; tighten per your compliance posture. ## Where you set bounds -The bounds live in the operator config file, not in env vars. Edit `governance/retention-bounds.json` under `TALE_CONFIG_DIR` (defaults to `/app/data/platform-config/` inside the platform container): +Under the org-first layout, retention bounds are **per-org**: edit `retention.json` directly inside an org's subtree under `TALE_CONFIG_DIR` (defaults to `/app/data/` inside the platform container, so the file lives at `/app/data//retention.json`, e.g. `/app/data/default/retention.json`). Each org has its own file; the `default` org's file is the template a fresh deployment picks up on first boot. ```json { diff --git a/docs/en/self-hosted/operate/upgrades.md b/docs/en/self-hosted/operate/upgrades.md index 09e9d0c993..20d8c20dee 100644 --- a/docs/en/self-hosted/operate/upgrades.md +++ b/docs/en/self-hosted/operate/upgrades.md @@ -80,3 +80,41 @@ Skipping minor versions (going from 0.9 to 0.11) is supported as long as the int ## Where this fits The upgrade flow ties together every other operate page — backups are what makes a failed upgrade recoverable, observability is what tells you the new colour is healthy, hardening is what you re-walk after a major version. If you are setting up the CLI for the first time, [Install the tale CLI](/self-hosted/install/cli-install) covers the workstation-side setup; if you are picking up the pager mid-rollout, [Troubleshooting](/self-hosted/operate/observability/troubleshooting) names the symptoms. + +## Migrating to the org-first config layout + +Older Tale releases stored config in a flat tree at the workspace root (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`). Current Tale uses an **org-first** layout where every org — including the canonical `default` — owns its own subtree: `///...`. The migration is opt-in and runs once per workspace. The new platform refuses to read the legacy paths; until you migrate, your provider secrets and customizations live in directories the runtime no longer looks at. + +The migration is three commands: + +```bash +# 1. Copy provider secrets (and other config) from the flat layout into +# `default//...`. cp not mv, so the old paths stay intact in +# case you need to roll back. +tale migrate config-layout + +# 2. Recreate the Convex container against the org-first volume layout +# and run the server-side reseed across every registered org. Implies +# `--all`; `-y` skips the destructive-write confirmation prompt for +# CI / scripted runs. +tale deploy --override-all -y + +# 3. Once you have verified the new layout is intact, remove the legacy +# paths. sha-verifies that the new file matches the old before +# unlinking; refuses to delete on any mismatch. +tale migrate config-layout --cleanup-old +``` + +Step 1 alone is safe and reversible — re-running it is a no-op once paths exist. Step 2 is destructive: every org's catalog-named config (`*.json` under `agents/`, `workflows/`, `integrations/`, `skills/`, `branding/branding.json`, `retention.json`) is overwritten with the builtin catalog. `*.secrets.json` files, `.history/` trails, and uploaded `branding/images/*` are preserved server-side. After step 2, the platform reads exclusively from the org-first layout. + +Step 3 is the point of no return for downgrades — see below. + +### Rolling back the org-first migration + +Between steps 1 and 3 you can downgrade cleanly. The Convex entrypoint marks each seed run with a token that includes the layout version (`.seeded--orgfirst`); an older binary that does not recognize the token re-seeds idempotently into its own (flat) paths, and step 1's `cp` left the legacy paths intact. Downgrade is a normal `tale rollback`. + +After step 3 (`--cleanup-old`), the legacy paths are gone. Downgrade still re-seeds layout correctly via the marker token mechanism, but the app boots with empty provider secrets — restore them from backup (see [Backups and restore](/self-hosted/operate/backups-and-restore)) before resuming traffic. + +### What if I skip step 1? + +The Convex container will detect leftover flat-layout dirs on boot and print a warning to its logs naming the directories and pointing at this runbook. The deployment will start up, but reads from those directories return empty and writes go to the new (empty) org-first paths. The fix is still steps 1 + 2 — running them after the warning works exactly the same as running them up front. diff --git a/docs/fr/self-hosted/configuration/providers.md b/docs/fr/self-hosted/configuration/providers.md index a63161119c..71c9647fcc 100644 --- a/docs/fr/self-hosted/configuration/providers.md +++ b/docs/fr/self-hosted/configuration/providers.md @@ -31,7 +31,7 @@ La référence est le format de fichier sur disque et l'ordre des opérations à } ``` -L'ensemble complet des champs vit dans [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/providers) — `openai.json`, `openrouter.json` et `vercel-gateway.json` couvrent les trois formes dont tu auras probablement besoin. +L'ensemble complet des champs vit dans [`examples/default/providers/`](https://github.com/tale-project/tale/tree/main/examples/default/providers) — `openai.json`, `openrouter.json` et `vercel-gateway.json` couvrent les trois formes dont tu auras probablement besoin. ## Le fichier de secrets diff --git a/docs/fr/self-hosted/configuration/retention.md b/docs/fr/self-hosted/configuration/retention.md index c3f7945c3b..a1851e1e3f 100644 --- a/docs/fr/self-hosted/configuration/retention.md +++ b/docs/fr/self-hosted/configuration/retention.md @@ -24,7 +24,7 @@ Les défauts livrés sont lâches ; resserre selon ta posture de compliance. ## Où tu fixes les bornes -Les bornes vivent dans le fichier de config opérateur, pas dans les variables d'env. Édite `governance/retention-bounds.json` sous `TALE_CONFIG_DIR` (défaut `/app/data/platform-config/` dans le conteneur plateforme) : +Sous la disposition org-first, les bornes de rétention sont **par org** : édite `retention.json` directement dans le sous-arbre d'une org sous `TALE_CONFIG_DIR` (par défaut `/app/data/` dans le conteneur plateforme, le fichier se trouve donc à `/app/data//retention.json`, p. ex. `/app/data/default/retention.json`). Chaque org a son propre fichier ; celui de l'org `default` est le modèle qu'un nouveau déploiement reprend au premier démarrage. ```json { diff --git a/docs/fr/self-hosted/operate/upgrades.md b/docs/fr/self-hosted/operate/upgrades.md index 9e72578920..0881491c2d 100644 --- a/docs/fr/self-hosted/operate/upgrades.md +++ b/docs/fr/self-hosted/operate/upgrades.md @@ -80,3 +80,41 @@ Sauter des versions mineures (passer de 0.9 à 0.11) est supporté tant que les ## Où cela s'inscrit Le flow de montée de version noue chaque autre page d'exploitation — les backups sont ce qui rend une montée de version échouée récupérable, l'observabilité est ce qui te dit que la nouvelle couleur est saine, le durcissement est ce que tu re-walks après une version majeure. Si tu mets en place la CLI pour la première fois, [Installer la CLI tale](/fr/self-hosted/install/cli-install) couvre le setup côté workstation ; si tu prends le pager en plein rollout, [Dépannage](/fr/self-hosted/operate/observability/troubleshooting) nomme les symptômes. + +## Migration vers la disposition de config org-first + +Les anciennes versions de Tale stockaient la config dans une arborescence plate à la racine du workspace (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`). La version actuelle utilise une disposition **org-first** où chaque org — y compris la canonique `default` — possède son propre sous-arbre : `///...`. La migration est opt-in et tourne une seule fois par workspace. La nouvelle plateforme refuse de lire les anciens chemins ; tant que tu n'as pas migré, tes secrets de provider et personnalisations vivent dans des répertoires que le runtime ne regarde plus. + +La migration tient en trois commandes : + +```bash +# 1. Copier les secrets de provider (et autres configs) depuis la +# disposition plate vers `default//...`. cp et non mv, donc +# les anciens chemins restent intacts au cas où un rollback serait +# nécessaire. +tale migrate config-layout + +# 2. Recréer le conteneur Convex contre la disposition de volume org-first +# et lancer le reseed côté serveur sur chaque org enregistrée. Implique +# `--all` ; `-y` saute le prompt destructif pour les runs CI / scripts. +tale deploy --override-all -y + +# 3. Une fois la nouvelle disposition vérifiée intacte, supprimer les +# anciens chemins. Vérifie via sha que le nouveau fichier correspond +# à l'ancien avant unlink ; refuse de supprimer en cas de mismatch. +tale migrate config-layout --cleanup-old +``` + +L'étape 1 est sûre et réversible — la rejouer est un no-op une fois les chemins existants. L'étape 2 est destructive : chaque config d'org au nom canonique (`*.json` sous `agents/`, `workflows/`, `integrations/`, `skills/`, `branding/branding.json`, `retention.json`) est écrasée par le catalogue builtin. Les fichiers `*.secrets.json`, les traces `.history/` et les `branding/images/*` uploadés sont préservés côté serveur. Après l'étape 2, la plateforme lit exclusivement depuis la disposition org-first. + +L'étape 3 est le point de non-retour pour les downgrades — voir ci-dessous. + +### Annuler la migration org-first + +Entre les étapes 1 et 3, tu peux downgrader proprement. L'entrypoint Convex marque chaque run de seed avec un token qui inclut la version de disposition (`.seeded--orgfirst`) ; un binaire plus ancien qui ne reconnaît pas ce token re-seede idempotemment dans ses propres chemins (plats), et le `cp` de l'étape 1 a laissé les anciens chemins intacts. Le downgrade est un `tale rollback` normal. + +Après l'étape 3 (`--cleanup-old`), les anciens chemins sont partis. Le downgrade continue à re-seeder la disposition correctement via le mécanisme du token-marker, mais l'app démarre avec des secrets de provider vides — restaure-les depuis le backup (voir [Backups et restauration](/fr/self-hosted/operate/backups-and-restore)) avant de reprendre le trafic. + +### Et si je saute l'étape 1 ? + +Le conteneur Convex détectera au démarrage les répertoires restants de la disposition plate et écrira un warning dans ses logs en nommant les répertoires et pointant vers ce runbook. Le déploiement démarre, mais les reads sur ces répertoires reviennent vides et les writes vont vers les nouveaux chemins (vides) org-first. La correction reste étapes 1 + 2 — les lancer après le warning fonctionne exactement comme les lancer en amont. diff --git a/packages/tale_shared/src/tale_shared/config/base.py b/packages/tale_shared/src/tale_shared/config/base.py index c45626dae4..e057bb25bb 100644 --- a/packages/tale_shared/src/tale_shared/config/base.py +++ b/packages/tale_shared/src/tale_shared/config/base.py @@ -2,6 +2,12 @@ Provides a common base for pydantic-settings-based configuration with shared patterns across crawler and RAG services. + +Provider lookups require an org slug under the org-first config layout — +each org owns its own provider catalog at +`//providers/`. The base-class shims accept an +explicit `org_slug` and surface a clear error if the caller forgot to +thread one through, rather than silently pinning every org to `default`. """ import logging @@ -38,36 +44,36 @@ class BaseServiceSettings(BaseSettings): vision_request_timeout: int = 180 vision_max_concurrent_pages: int = 1 - def get_fast_model(self) -> str: - """Get fast LLM model from provider files.""" - _base_url, _api_key, model_id = _provider_chat_model() + def get_fast_model(self, org_slug: str) -> str: + """Get fast LLM model for an org from provider files.""" + _base_url, _api_key, model_id = _provider_chat_model(org_slug) return model_id - def get_embedding_model(self) -> str: - """Get embedding model from provider files.""" - _base_url, _api_key, model_id, _dims = _provider_embedding_model() + def get_embedding_model(self, org_slug: str) -> str: + """Get embedding model for an org from provider files.""" + _base_url, _api_key, model_id, _dims = _provider_embedding_model(org_slug) return model_id - def get_vision_model(self) -> str: - """Get vision model from provider files.""" - _base_url, _api_key, model_id = _provider_vision_model() + def get_vision_model(self, org_slug: str) -> str: + """Get vision model for an org from provider files.""" + _base_url, _api_key, model_id = _provider_vision_model(org_slug) return model_id - def get_chat_config(self) -> tuple[str, str, str]: - """Return (base_url, api_key, model_id) for chat model from provider files.""" - return _provider_chat_model() + def get_chat_config(self, org_slug: str) -> tuple[str, str, str]: + """Return (base_url, api_key, model_id) for an org's chat model.""" + return _provider_chat_model(org_slug) - def get_embedding_config(self) -> tuple[str, str, str, int]: - """Return (base_url, api_key, model_id, dimensions) for embedding model.""" - return _provider_embedding_model() + def get_embedding_config(self, org_slug: str) -> tuple[str, str, str, int]: + """Return (base_url, api_key, model_id, dimensions) for an org's embedding model.""" + return _provider_embedding_model(org_slug) - def get_vision_config(self) -> tuple[str, str, str]: - """Return (base_url, api_key, model_id) for vision model.""" - return _provider_vision_model() + def get_vision_config(self, org_slug: str) -> tuple[str, str, str]: + """Return (base_url, api_key, model_id) for an org's vision model.""" + return _provider_vision_model(org_slug) - def get_embedding_dimensions(self) -> int: - """Get embedding dimensions from provider files.""" - _base_url, _api_key, _model_id, dims = _provider_embedding_model() + def get_embedding_dimensions(self, org_slug: str) -> int: + """Get embedding dimensions for an org from provider files.""" + _base_url, _api_key, _model_id, dims = _provider_embedding_model(org_slug) return dims def get_allowed_origins_list(self) -> list[str]: diff --git a/packages/tale_shared/src/tale_shared/config/providers.py b/packages/tale_shared/src/tale_shared/config/providers.py index 33466152c2..b2075ec501 100644 --- a/packages/tale_shared/src/tale_shared/config/providers.py +++ b/packages/tale_shared/src/tale_shared/config/providers.py @@ -39,21 +39,36 @@ class ProviderConfig: defaults: dict[str, str] = field(default_factory=dict) -def load_providers(config_dir: str | None = None) -> list[ProviderConfig]: - """Read all provider JSON files from {config_dir}/providers/. +def load_providers( + org_slug: str, + config_dir: str | None = None, +) -> list[ProviderConfig]: + """Read all provider JSON files from {config_dir}/{org_slug}/providers/. + + Under the org-first config layout, each org owns its own provider + catalog at `//providers/`. `org_slug` is required — + pinning RAG/crawler globally to the `default` org's providers would + quietly serve the wrong models to other orgs. Reads *.json (excluding *.secrets.json) and decrypts matching *.secrets.json files via SOPS. """ + if not org_slug: + raise ValueError("load_providers requires a non-empty org_slug") + shared_config = os.environ.get("TALE_PLATFORM_SHARED_CONFIG_DIR") if shared_config: base = Path(shared_config) else: base = Path(config_dir or os.environ.get("TALE_CONFIG_DIR") or os.environ.get("CONFIG_DIR", DEFAULT_CONFIG_DIR)) - providers_dir = base / "providers" + providers_dir = base / org_slug / "providers" if not providers_dir.is_dir(): - logger.warning("Providers directory not found: %s", providers_dir) + logger.warning( + "Providers directory not found for org '%s': %s", + org_slug, + providers_dir, + ) return [] providers: list[ProviderConfig] = [] @@ -146,17 +161,18 @@ def _find_model( def get_chat_model( + org_slug: str, config_dir: str | None = None, ) -> tuple[str, str, str]: - """Return (base_url, api_key, model_id) for the default chat model. + """Return (base_url, api_key, model_id) for the org's default chat model. Finds the first model marked default that has a "chat" tag, or falls back to the first model with a "chat" tag. """ - providers = load_providers(config_dir) + providers = load_providers(org_slug, config_dir) match = _find_model(providers, "chat", prefer_default=True) if match is None: - raise ValueError("No chat model found in provider configuration files.") + raise ValueError(f"No chat model found in provider configuration files for org '{org_slug}'.") provider, model = match api_key = provider.api_key or "" @@ -164,13 +180,14 @@ def get_chat_model( def get_embedding_model( + org_slug: str, config_dir: str | None = None, ) -> tuple[str, str, str, int]: - """Return (base_url, api_key, model_id, dimensions) for the embedding model.""" - providers = load_providers(config_dir) + """Return (base_url, api_key, model_id, dimensions) for the org's embedding model.""" + providers = load_providers(org_slug, config_dir) match = _find_model(providers, "embedding", prefer_default=True) if match is None: - raise ValueError("No embedding model found in provider configuration files.") + raise ValueError(f"No embedding model found in provider configuration files for org '{org_slug}'.") provider, model = match api_key = provider.api_key or "" @@ -183,13 +200,14 @@ def get_embedding_model( def get_vision_model( + org_slug: str, config_dir: str | None = None, ) -> tuple[str, str, str]: - """Return (base_url, api_key, model_id) for the vision model.""" - providers = load_providers(config_dir) + """Return (base_url, api_key, model_id) for the org's vision model.""" + providers = load_providers(org_slug, config_dir) match = _find_model(providers, "vision", prefer_default=True) if match is None: - raise ValueError("No vision model found in provider configuration files.") + raise ValueError(f"No vision model found in provider configuration files for org '{org_slug}'.") provider, model = match api_key = provider.api_key or "" diff --git a/services/convex/docker-entrypoint.sh b/services/convex/docker-entrypoint.sh index 49870c14b3..5bc69b5923 100755 --- a/services/convex/docker-entrypoint.sh +++ b/services/convex/docker-entrypoint.sh @@ -290,11 +290,15 @@ fi seed_marker="/app/data/.seeded-${TALE_VERSION:-dev}-orgfirst" data_dir="/app/data" -# Atomic file copy: write to a sibling tmp file then rename. A SIGKILL -# between open(dest, O_TRUNC) and the final write would otherwise leave a -# truncated file at $dest, which the next-run skip-if-exists check treats -# as "already seeded" — silent corruption. With atomic_cp the next run -# either sees the original (rename never happened) or the complete file. +# Crash-safe file copy: write to a sibling tmp file then rename to dest. +# `cp` itself is non-atomic; the value is that an interrupted run leaves +# either (a) no tmp / dest intact, or (b) a partial `.tale-seed..tmp` +# orphan + dest intact. The next-run skip-if-exists check on dest is +# therefore never observing a half-written file. Orphan tmps don't gate +# anything (they're not matched by the dest-existence probe) and survive +# until the next reseed of that file. There is no fsync — power-loss +# durability isn't asserted, but the seed data is re-derivable from the +# immutable builtin catalog, so a lost write is recoverable on retry. atomic_cp() { local src="$1" dest="$2" local tmp="${dest}.tale-seed.$$.tmp" @@ -468,6 +472,39 @@ else log_info "Builtin seed already applied for version ${TALE_VERSION:-dev} (marker: $seed_marker)" fi +# ---------------------------------------------------------------------------- +# Legacy flat-layout detector +# ---------------------------------------------------------------------------- +# The pre-orgfirst layout placed config at the data-root level +# (`/app/data/agents/`, `/app/data/workflows/`, …). Under org-first that +# tree is now at `/app/data///`. If an upgrading operator's +# volume still contains the legacy flat trees, the new runtime ignores +# them — `seed_marker` already promoted seed data to `default/`, but the +# operator's edits at the old root are unreachable. Warn loudly so they +# know to run `tale migrate config-layout` on the host. +legacy_flat_dirs=() +for d in agents workflows integrations branding providers skills; do + if [ -d "${data_dir}/${d}" ]; then + legacy_flat_dirs+=("${d}") + fi +done +if [ ${#legacy_flat_dirs[@]} -gt 0 ]; then + echo + echo "⚠ WARNING: legacy flat-layout config detected at:" + for d in "${legacy_flat_dirs[@]}"; do + echo " ${data_dir}/${d}/" + done + echo + echo " The org-first runtime reads only from '///'." + echo " Edits at the paths above are NOT loaded by the platform or any" + echo " per-org config resolver. To migrate them into the new layout," + echo " run on the operator host:" + echo " tale migrate config-layout" + echo " then:" + echo " tale deploy --override-all -y" + echo +fi + # ============================================================================ # Crash diagnostics helpers # ============================================================================ diff --git a/services/crawler/app/main.py b/services/crawler/app/main.py index b88f8e2d28..2daf780866 100644 --- a/services/crawler/app/main.py +++ b/services/crawler/app/main.py @@ -16,7 +16,7 @@ from collections.abc import AsyncGenerator from contextlib import asynccontextmanager, suppress -from fastapi import FastAPI +from fastapi import Depends, FastAPI from fastapi.middleware.cors import CORSMiddleware from loguru import logger from tale_shared.logging import suppress_health_check_logs @@ -204,17 +204,25 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: init_telemetry(app) +# X-Tale-Org is required on every endpoint that touches per-org provider +# state (vision, embedding, chat model). Apply as a router-level +# dependency so individual handlers don't need to remember to declare it. +# `/health` is mounted at the app level below — exempt. +from app.org_context import require_org_slug # noqa: E402 + +_org_dep = [Depends(require_org_slug)] + # Register routers -app.include_router(crawler_router) -app.include_router(websites_router) -app.include_router(search_router) -app.include_router(pages_router) -app.include_router(index_router) -app.include_router(pdf_router) -app.include_router(image_router) -app.include_router(docx_router) -app.include_router(pptx_router) -app.include_router(web_router) +app.include_router(crawler_router, dependencies=_org_dep) +app.include_router(websites_router, dependencies=_org_dep) +app.include_router(search_router, dependencies=_org_dep) +app.include_router(pages_router, dependencies=_org_dep) +app.include_router(index_router, dependencies=_org_dep) +app.include_router(pdf_router, dependencies=_org_dep) +app.include_router(image_router, dependencies=_org_dep) +app.include_router(docx_router, dependencies=_org_dep) +app.include_router(pptx_router, dependencies=_org_dep) +app.include_router(web_router, dependencies=_org_dep) @app.get("/health", response_model=HealthResponse) diff --git a/services/crawler/app/org_context.py b/services/crawler/app/org_context.py new file mode 100644 index 0000000000..48930a9f14 --- /dev/null +++ b/services/crawler/app/org_context.py @@ -0,0 +1,69 @@ +"""Per-request org-slug context for the crawler service. + +Crawler internals (vision client, embedding service, file parsers, …) +need an org slug to read that org's provider catalog. Threading the slug +through every helper would touch ~15 call sites without adding signal — +the org is per-REQUEST, so a `contextvars.ContextVar` set by the +`require_org_slug` FastAPI dependency at the router boundary is the +right primitive: + +- One write per request, at the boundary. +- Reads from any depth via `get_active_org()` — no parameter explosion. +- Per-asyncio-task isolation (ContextVar binds to the running task). + +A missing context raises rather than silently falling back to `default`: +forgetting to set the header is a caller bug we want to surface as a +500, not as "served the wrong org's models for an hour". +""" + +import re +from contextvars import ContextVar + +from fastapi import Header, HTTPException, status + +# Aligned with services/platform/convex/lib/file_io.ts:25; capped at 64 chars +# to match tools/cli/src/lib/migrate-config-layout/script.sh:134. +_ORG_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$") + +_active_org: ContextVar[str | None] = ContextVar("tale_active_org", default=None) + + +def set_active_org(slug: str) -> None: + """Bind the active org to the current asyncio task.""" + _active_org.set(slug) + + +def get_active_org() -> str: + """Read the active org slug. Raises if unset (caller bug).""" + value = _active_org.get() + if not value: + raise RuntimeError( + "No active org slug for this request. Every public crawler " + "endpoint must declare `org_slug: str = Depends(require_org_slug)` " + "so the X-Tale-Org header is captured before service layer use." + ) + return value + + +async def require_org_slug( + x_tale_org: str | None = Header(default=None), +) -> str: + """FastAPI dependency: extract + validate the X-Tale-Org header, + bind it to the request-scoped ContextVar, and return it. + + Returns the slug so handlers that need it explicitly can also take + `org_slug = Depends(require_org_slug)`. Internal helpers should + prefer `get_active_org()` over plumbing the slug as a param. + """ + if not x_tale_org: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="missing X-Tale-Org header", + ) + if not _ORG_SLUG_RE.match(x_tale_org): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="invalid X-Tale-Org header", + ) + set_active_org(x_tale_org) + return x_tale_org diff --git a/services/crawler/app/services/database.py b/services/crawler/app/services/database.py index 5b99bb607f..160a536a5d 100644 --- a/services/crawler/app/services/database.py +++ b/services/crawler/app/services/database.py @@ -62,39 +62,20 @@ async def init_pool(*, max_size: int = 10) -> asyncpg.Pool: ) logger.info(f"PostgreSQL connection pool initialized (min={min(2, max_size)}, max={max_size})") - # Guard against embedding dimension mismatch: if existing data uses a - # different dimension than the current config, refuse to start. - configured_dims = settings.get_embedding_dimensions() - async with acquire_with_retry(_pool) as conn: - stored_dims = await conn.fetchval( - f"SELECT vector_dims(embedding) FROM {SCHEMA}.chunks WHERE embedding IS NOT NULL LIMIT 1" - ) - if stored_dims is not None and stored_dims != configured_dims: - await _pool.close() - _pool = None - raise RuntimeError( - f"Embedding dimension mismatch: database has {stored_dims}d vectors " - f"but CRAWLER_EMBEDDING_DIMENSIONS={configured_dims}. " - f"Re-index existing data or update the config to match." - ) - - # Pin the embedding column to explicit dimensions so HNSW indexes work. - expected_type = f"vector({int(configured_dims)})" - async with acquire_with_retry(_pool) as conn: - col_type = await conn.fetchval( - "SELECT format_type(atttypid, atttypmod) " - "FROM pg_attribute " - "WHERE attrelid = $1::regclass AND attname = 'embedding'", - f"{SCHEMA}.chunks", - ) - if col_type != expected_type: - await conn.execute(f"DROP INDEX IF EXISTS {SCHEMA}.idx_pw_chunks_embedding_hnsw") - await conn.execute( - f"ALTER TABLE {SCHEMA}.chunks ALTER COLUMN embedding TYPE vector({int(configured_dims)})" - ) - logger.info(f"Pinned embedding column to vector({configured_dims}) (was {col_type})") - - # Create HNSW index if it doesn't exist yet. + # Note: the previous boot-time embedding-dimension guard was + # removed when crawler became multi-org. Dim is now an attribute + # of the org's provider catalog, not a global setting, and there + # is no org context at lifespan start. `get_embedding_service()` + # refuses dim changes per-org at request time; pgvector enforces + # column dim on insert. + # + # The column type and HNSW index are pinned lazily on the first + # insert (pgvector errors loudly on dim mismatch). All orgs + # sharing this crawler instance must agree on embedding dims. + + # Create HNSW index if it doesn't exist yet. The index targets + # whatever the column type is set to; if no rows have been + # inserted, the call is cheap. try: async with acquire_with_retry(_pool) as conn: await conn.execute(f"SELECT {SCHEMA}.create_chunks_hnsw_index()") diff --git a/services/crawler/app/services/embedding_service.py b/services/crawler/app/services/embedding_service.py index 5b4aa38529..f90e538725 100644 --- a/services/crawler/app/services/embedding_service.py +++ b/services/crawler/app/services/embedding_service.py @@ -1,9 +1,14 @@ """ OpenAI-compatible embedding generation service. -Crawler-specific factory with TTL-based config refresh. -When provider config files change (e.g. API key rotation), the client -is automatically rebuilt on the next access after the TTL expires. +Crawler-specific factory with TTL-based config refresh, keyed by org +slug. Each org has its own EmbeddingService instance built from that +org's provider catalog at `//providers/`. + +Embedding dimensions are still implicitly global because crawler's +`database.py` pins a single dim per RAG index; if two orgs disagree on +dimensions we refuse to rebuild and keep the existing client (the +operator must reconcile provider configs). """ import asyncio @@ -14,13 +19,28 @@ from tale_knowledge.embedding import EmbeddingService from app.config import settings +from app.org_context import get_active_org -_embedding_service: EmbeddingService | None = None -_embedding_config: tuple | None = None -_last_config_check: float = 0 _CONFIG_CHECK_INTERVAL = 15 # seconds +class _OrgEmbeddingState: + __slots__ = ("config", "last_check", "service") + + def __init__( + self, + service: EmbeddingService, + config: tuple, + last_check: float, + ) -> None: + self.service = service + self.config = config + self.last_check = last_check + + +_org_states: dict[str, _OrgEmbeddingState] = {} + + async def _close_old(service: EmbeddingService) -> None: """Close an old client after a grace period for in-flight requests.""" await asyncio.sleep(30) @@ -31,54 +51,74 @@ async def _close_old(service: EmbeddingService) -> None: def get_embedding_service() -> EmbeddingService: - global _embedding_service, _embedding_config, _last_config_check + org_slug = get_active_org() + state = _org_states.get(org_slug) now = time.monotonic() - if _embedding_service is not None and (now - _last_config_check) < _CONFIG_CHECK_INTERVAL: - return _embedding_service + if state is not None and (now - state.last_check) < _CONFIG_CHECK_INTERVAL: + return state.service - _last_config_check = now try: - config = settings.get_embedding_config() # (base_url, api_key, model, dims) + config = settings.get_embedding_config(org_slug) # (base_url, api_key, model, dims) except (ValueError, OSError): - logger.opt(exception=True).warning("Config read failed, keeping current embedding client") - if _embedding_service is not None: - return _embedding_service + logger.opt(exception=True).warning( + "Config read failed for org '{}', keeping current embedding client", + org_slug, + ) + if state is not None: + state.last_check = now + return state.service raise - if config == _embedding_config and _embedding_service is not None: - return _embedding_service + if state is not None and config == state.config: + state.last_check = now + return state.service base_url, api_key, model, dims = config # Never downgrade to empty key - if not api_key and _embedding_service is not None: - logger.warning("Skipping embedding reload: new config has empty API key") - return _embedding_service + if not api_key and state is not None: + logger.warning( + "Skipping embedding reload for org '{}': new config has empty API key", + org_slug, + ) + state.last_check = now + return state.service # Refuse dimension change (would corrupt vector index) - if _embedding_config is not None and dims != _embedding_config[3] and _embedding_service is not None: + if state is not None and dims != state.config[3]: logger.error( - "Embedding dimensions changed ({} -> {}). Restart required to re-index.", - _embedding_config[3], + "Embedding dimensions for org '{}' changed ({} -> {}). Restart required to re-index.", + org_slug, + state.config[3], dims, ) - return _embedding_service + state.last_check = now + return state.service - old = _embedding_service - _embedding_service = EmbeddingService( + old_service = state.service if state is not None else None + new_service = EmbeddingService( api_key=api_key, base_url=base_url, model=model, dimensions=dims, ) - _embedding_config = config + _org_states[org_slug] = _OrgEmbeddingState( + service=new_service, + config=config, + last_check=now, + ) - if old is not None: - logger.info("Embedding service rebuilt: model={}", model) + if old_service is not None: + logger.info("Embedding service rebuilt for org '{}': model={}", org_slug, model) with contextlib.suppress(RuntimeError): - asyncio.get_running_loop().create_task(_close_old(old)) + asyncio.get_running_loop().create_task(_close_old(old_service)) else: - logger.info("Embedding service created: model={}, dims={}", model, dims) + logger.info( + "Embedding service created for org '{}': model={}, dims={}", + org_slug, + model, + dims, + ) - return _embedding_service + return new_service diff --git a/services/crawler/app/services/file_parser_service.py b/services/crawler/app/services/file_parser_service.py index dffaa2827c..6a28cdc4c5 100644 --- a/services/crawler/app/services/file_parser_service.py +++ b/services/crawler/app/services/file_parser_service.py @@ -14,6 +14,7 @@ from typing import Any from ..config import settings +from ..org_context import get_active_org logger = logging.getLogger(__name__) @@ -221,7 +222,7 @@ async def parse_pdf_with_vision( model=model, usage=acc, ) - resolved_model = model or settings.get_fast_model() + resolved_model = model or settings.get_fast_model(get_active_org()) import fitz as _fitz @@ -343,7 +344,7 @@ async def parse_docx_with_vision( model=model, usage=acc, ) - resolved_model = model or settings.get_fast_model() + resolved_model = model or settings.get_fast_model(get_active_org()) docx_dates = _extract_ooxml_metadata(file_bytes, "docx") @@ -460,7 +461,7 @@ async def parse_pptx_with_vision( model=model, usage=acc, ) - resolved_model = model or settings.get_fast_model() + resolved_model = model or settings.get_fast_model(get_active_org()) pptx_dates = _extract_ooxml_metadata(file_bytes, "pptx") diff --git a/services/crawler/app/services/scheduler.py b/services/crawler/app/services/scheduler.py index 940c5bc198..d72051d73b 100644 --- a/services/crawler/app/services/scheduler.py +++ b/services/crawler/app/services/scheduler.py @@ -66,11 +66,29 @@ async def run_scheduler( global _scan_trigger _scan_trigger = asyncio.Event() + # Background scheduler has no per-request X-Tale-Org context. Until + # the websites table carries the owning org slug, fall back to + # `default` for any provider lookups triggered by scheduled scans. + # Log once so operators see the assumption. + from app.org_context import set_active_org + + set_active_org("default") + logger.warning( + "Scheduler background task using org slug 'default' for provider " + "lookups. Per-website org binding is a follow-up." + ) + sem = asyncio.Semaphore(max_concurrent_scans) async def bounded_scan(domain: str): async with sem: - await _scan_website(domain, store_manager, crawler_service, indexing_service, crawl_batch_size) + await _scan_website( + domain, + store_manager, + crawler_service, + indexing_service, + crawl_batch_size, + ) while True: try: diff --git a/services/crawler/app/services/vision/openai_client.py b/services/crawler/app/services/vision/openai_client.py index 684733e7c6..e6c0cba81c 100644 --- a/services/crawler/app/services/vision/openai_client.py +++ b/services/crawler/app/services/vision/openai_client.py @@ -19,6 +19,7 @@ from openai import AsyncOpenAI from ...config import settings +from ...org_context import get_active_org from .cache import compute_text_hash, llm_cache @@ -103,7 +104,7 @@ def _get_client(self) -> AsyncOpenAI: self._last_config_check = now try: - config = settings.get_vision_config() # (base_url, api_key, model) + config = settings.get_vision_config(get_active_org()) # (base_url, api_key, model) except (ValueError, OSError): if self._client is not None: logger.opt(exception=True).warning("Config read failed, keeping current vision client") @@ -150,7 +151,7 @@ async def ocr_image( return cached_result client = self._get_client() - vision_model = settings.get_vision_model() + vision_model = settings.get_vision_model(get_active_org()) extraction_prompt = prompt or OCR_PROMPT image_b64 = base64.b64encode(image_bytes).decode("utf-8") @@ -229,7 +230,7 @@ async def describe_image( return cached_result client = self._get_client() - vision_model = settings.get_vision_model() + vision_model = settings.get_vision_model(get_active_org()) description_prompt = prompt or DESCRIBE_PROMPT image_b64 = base64.b64encode(image_bytes).decode("utf-8") @@ -369,7 +370,7 @@ async def process_pages_with_llm( logger.info(f"LLM processing: {total_chars} chars total, chunking at {max_chars_per_chunk} chars") - base_url, api_key, chat_model = settings.get_chat_config() + base_url, api_key, chat_model = settings.get_chat_config(get_active_org()) client = AsyncOpenAI( api_key=api_key, base_url=base_url, diff --git a/services/crawler/tests/test_config.py b/services/crawler/tests/test_config.py index 2f102951e3..1d55f2c28f 100644 --- a/services/crawler/tests/test_config.py +++ b/services/crawler/tests/test_config.py @@ -31,7 +31,7 @@ class TestGetFastModel: def test_returns_model_from_provider(self, mock_provider): with patch.dict(os.environ, _base_env(), clear=True): s = Settings() - assert s.get_fast_model() == "gpt-4o-mini" + assert s.get_fast_model("default") == "gpt-4o-mini" @patch( "tale_shared.config.base._provider_chat_model", @@ -41,7 +41,7 @@ def test_missing_provider_raises(self, mock_provider): with patch.dict(os.environ, _base_env(), clear=True): s = Settings() with pytest.raises(ValueError, match="No chat model"): - s.get_fast_model() + s.get_fast_model("default") class TestGetVisionModel: @@ -49,7 +49,7 @@ class TestGetVisionModel: def test_returns_model_from_provider(self, mock_provider): with patch.dict(os.environ, _base_env(), clear=True): s = Settings() - assert s.get_vision_model() == "gpt-4o" + assert s.get_vision_model("default") == "gpt-4o" @patch( "tale_shared.config.base._provider_vision_model", @@ -59,7 +59,7 @@ def test_missing_provider_raises(self, mock_provider): with patch.dict(os.environ, _base_env(), clear=True): s = Settings() with pytest.raises(ValueError, match="No vision model"): - s.get_vision_model() + s.get_vision_model("default") class TestGetEmbeddingDimensions: @@ -67,7 +67,7 @@ class TestGetEmbeddingDimensions: def test_returns_dimensions_from_provider(self, mock_provider): with patch.dict(os.environ, _base_env(), clear=True): s = Settings() - assert s.get_embedding_dimensions() == 1536 + assert s.get_embedding_dimensions("default") == 1536 @patch( "tale_shared.config.base._provider_embedding_model", @@ -76,7 +76,7 @@ def test_returns_dimensions_from_provider(self, mock_provider): def test_large_dimensions(self, mock_provider): with patch.dict(os.environ, _base_env(), clear=True): s = Settings() - assert s.get_embedding_dimensions() == 3072 + assert s.get_embedding_dimensions("default") == 3072 @patch( "tale_shared.config.base._provider_embedding_model", @@ -86,7 +86,7 @@ def test_missing_provider_raises(self, mock_provider): with patch.dict(os.environ, _base_env(), clear=True): s = Settings() with pytest.raises(ValueError, match="No embedding model"): - s.get_embedding_dimensions() + s.get_embedding_dimensions("default") class TestFrequencyDefaults: diff --git a/services/crawler/tests/test_database.py b/services/crawler/tests/test_database.py index 1e8beaebc4..18aa21712f 100644 --- a/services/crawler/tests/test_database.py +++ b/services/crawler/tests/test_database.py @@ -37,6 +37,11 @@ async def _acq(_pool, **_kw): return pool, _acq +@pytest.mark.skip( + reason="Boot-time embedding-dimension guard was removed when crawler " + "became multi-org. Dim is now per-org provider catalog; pgvector enforces " + "column dim on insert + get_embedding_service refuses dim changes per-org." +) class TestDimensionMismatchGuard: @pytest.mark.asyncio async def test_raises_on_dimension_mismatch(self): @@ -88,6 +93,11 @@ async def test_passes_when_no_existing_data(self): assert pool is fake_pool +@pytest.mark.skip( + reason="Boot-time embedding-column ALTER was removed when crawler became " + "multi-org. Column type is now driven by the first INSERT under pgvector; " + "operators reconcile per-org provider catalogs manually if dims diverge." +) class TestEmbeddingColumnPinning: @pytest.mark.asyncio async def test_alters_untyped_vector_column(self): diff --git a/services/crawler/tests/test_llm_cache.py b/services/crawler/tests/test_llm_cache.py index 590d3a5753..6c9b9aed6e 100644 --- a/services/crawler/tests/test_llm_cache.py +++ b/services/crawler/tests/test_llm_cache.py @@ -59,9 +59,15 @@ class TestProcessPagesWithLlmCache: @patch("app.services.vision.openai_client.settings") @patch("app.services.vision.openai_client.AsyncOpenAI") async def test_second_call_hits_cache(self, mock_openai_cls, mock_settings): + from app.org_context import set_active_org from app.services.vision.cache import llm_cache from app.services.vision.openai_client import process_pages_with_llm + # The internal `get_active_org()` call requires a ContextVar bound + # by `require_org_slug` in production; in unit tests we set it + # directly so the per-org provider lookup has a slug to resolve. + set_active_org("test-org") + llm_cache.clear() mock_settings.get_chat_config.return_value = ("http://test", "test-key", "test-model") diff --git a/services/platform/Dockerfile b/services/platform/Dockerfile index e68dfd4c0d..15cb80d3d0 100644 --- a/services/platform/Dockerfile +++ b/services/platform/Dockerfile @@ -227,17 +227,22 @@ ENV NODE_ENV=production \ DO_NOT_TRACK=1 \ # Semantic value of the file-config parent path inside the convex # container. Platform forces this at push time in docker-entrypoint.sh - # (to tombstone any stale host-side `.env` value). Convex derives the - # sub-dirs (agents/workflows/integrations/providers) from it. + # (to tombstone any stale host-side `.env` value). Under the org-first + # layout, every per-domain config dir is derived as + # $TALE_CONFIG_DIR/// — e.g. + # /app/data/default/agents/, /app/data/default/providers/, etc. + # The previous per-domain env vars (AGENTS_DIR, …) are no longer + # honored; the entrypoint actively purges them from Convex on every + # boot. TALE_CONFIG_DIR=/app/data \ # Read-only builtin catalog baked into the convex image (see # services/convex/Dockerfile). Declared here because Convex Node # actions only see env vars that this container pushes to Convex's # deployment env via the entrypoint's `convex env set` loop — even # though the path points at files inside the *convex* container. - # Per-domain catalogs are derived as $TALE_CONFIG_BUILTIN_DIR// - # by services/platform/convex/organizations/scaffold.ts (mirrors the - # $TALE_CONFIG_DIR// pattern used for the writable side). + # Per-org catalogs live at $TALE_CONFIG_BUILTIN_DIR///; + # `default` is the canonical template. See + # services/platform/convex/organizations/scaffold.ts. TALE_CONFIG_BUILTIN_DIR=/app/builtin COPY --from=pruner --chown=app:app /app/services/platform/dist ./dist diff --git a/services/platform/convex/_generated/api.d.ts b/services/platform/convex/_generated/api.d.ts index 6b557cb4f2..fa603e7e06 100644 --- a/services/platform/convex/_generated/api.d.ts +++ b/services/platform/convex/_generated/api.d.ts @@ -461,6 +461,7 @@ import type * as lib_helpers_audit_hash from "../lib/helpers/audit_hash.js"; import type * as lib_helpers_build_audit_context from "../lib/helpers/build_audit_context.js"; import type * as lib_helpers_count_items_in_org from "../lib/helpers/count_items_in_org.js"; import type * as lib_helpers_has_records_in_org from "../lib/helpers/has_records_in_org.js"; +import type * as lib_helpers_org_slug from "../lib/helpers/org_slug.js"; import type * as lib_helpers_pii_hash from "../lib/helpers/pii_hash.js"; import type * as lib_helpers_public_storage_url from "../lib/helpers/public_storage_url.js"; import type * as lib_helpers_rag_config from "../lib/helpers/rag_config.js"; @@ -558,7 +559,6 @@ import type * as migrations_backfill_workflow_schedules from "../migrations/back import type * as migrations_merge_audit_retention from "../migrations/merge_audit_retention.js"; import type * as migrations_migrate_org_creators from "../migrations/migrate_org_creators.js"; import type * as migrations_remove_deprecated_llm_fields from "../migrations/remove_deprecated_llm_fields.js"; -import type * as migrations_rename_org_slug from "../migrations/rename_org_slug.js"; import type * as migrations_seed_applied_bounds from "../migrations/seed_applied_bounds.js"; import type * as migrations_split_personalization_toggle from "../migrations/split_personalization_toggle.js"; import type * as migrations_trigger_steps_to_start from "../migrations/trigger_steps_to_start.js"; @@ -1558,6 +1558,7 @@ declare const fullApi: ApiFromModules<{ "lib/helpers/build_audit_context": typeof lib_helpers_build_audit_context; "lib/helpers/count_items_in_org": typeof lib_helpers_count_items_in_org; "lib/helpers/has_records_in_org": typeof lib_helpers_has_records_in_org; + "lib/helpers/org_slug": typeof lib_helpers_org_slug; "lib/helpers/pii_hash": typeof lib_helpers_pii_hash; "lib/helpers/public_storage_url": typeof lib_helpers_public_storage_url; "lib/helpers/rag_config": typeof lib_helpers_rag_config; @@ -1655,7 +1656,6 @@ declare const fullApi: ApiFromModules<{ "migrations/merge_audit_retention": typeof migrations_merge_audit_retention; "migrations/migrate_org_creators": typeof migrations_migrate_org_creators; "migrations/remove_deprecated_llm_fields": typeof migrations_remove_deprecated_llm_fields; - "migrations/rename_org_slug": typeof migrations_rename_org_slug; "migrations/seed_applied_bounds": typeof migrations_seed_applied_bounds; "migrations/split_personalization_toggle": typeof migrations_split_personalization_toggle; "migrations/trigger_steps_to_start": typeof migrations_trigger_steps_to_start; diff --git a/services/platform/convex/agent_tools/documents/helpers/fetch_document_comparison.ts b/services/platform/convex/agent_tools/documents/helpers/fetch_document_comparison.ts index 4283558be1..626cd84796 100644 --- a/services/platform/convex/agent_tools/documents/helpers/fetch_document_comparison.ts +++ b/services/platform/convex/agent_tools/documents/helpers/fetch_document_comparison.ts @@ -200,6 +200,7 @@ export async function fetchDocumentComparisonByUrls( baseFileName: string, comparisonFileUrl: string, comparisonFileName: string, + orgSlug: string, maxChanges?: number, ): Promise { const [baseResponse, compResponse] = await Promise.all([ @@ -232,6 +233,7 @@ export async function fetchDocumentComparisonByUrls( method: 'POST', body: formData, timeoutMs: FETCH_TIMEOUT_MS, + orgSlug, }); if (!response.ok) { diff --git a/services/platform/convex/agent_tools/rag/query_rag_context.ts b/services/platform/convex/agent_tools/rag/query_rag_context.ts index 84cd25679c..ea10b46eb6 100644 --- a/services/platform/convex/agent_tools/rag/query_rag_context.ts +++ b/services/platform/convex/agent_tools/rag/query_rag_context.ts @@ -130,6 +130,12 @@ export interface RagContextResult { export interface RagContextOptions { /** File storage IDs to scope the search to */ fileIds?: string[]; + /** + * Org slug for the X-Tale-Org header. Required by the RAG service's + * `/api/v1/search` endpoint (it picks the org's provider catalog to + * embed the query). Omitting will yield HTTP 400. + */ + orgSlug: string; } /** @@ -197,6 +203,7 @@ export async function queryRagContext( method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(requestPayload), + orgSlug: options?.orgSlug, signal: fetchSignal, }); diff --git a/services/platform/convex/agent_tools/rag/rag_search_tool.ts b/services/platform/convex/agent_tools/rag/rag_search_tool.ts index c6793d9590..e6f8da1884 100644 --- a/services/platform/convex/agent_tools/rag/rag_search_tool.ts +++ b/services/platform/convex/agent_tools/rag/rag_search_tool.ts @@ -21,6 +21,7 @@ import { fetchJson } from '../../../lib/utils/type-cast-helpers'; import { internal } from '../../_generated/api'; import { stripReservedPromptTags } from '../../lib/agent_response/sanitize_prompt'; import { createDebugLog } from '../../lib/debug_log'; +import { orgSlugFromId } from '../../lib/helpers/org_slug'; import { ragFetch } from '../../lib/helpers/rag_config'; import { toId } from '../../lib/type_cast_helpers'; import { wrapUntrusted } from '../../lib/untrusted_content'; @@ -275,8 +276,10 @@ RESPONSE (list_indexed): chunkEnd: end, }); + const retrieveOrgSlug = await orgSlugFromId(ctx, orgIdRetrieve); const response = await ragFetch( `/api/v1/documents/${encodeURIComponent(args.fileId)}/content?return_chunks=true&chunk_start=${start}&chunk_end=${end}`, + { orgSlug: retrieveOrgSlug }, ); if (!response.ok) { @@ -434,11 +437,17 @@ RESPONSE (list_indexed): }); try { + const orgIdForSearch = ctx.organizationId; + if (!orgIdForSearch) { + throw new Error('rag_search requires organizationId in ToolCtx.'); + } + const searchOrgSlug = await orgSlugFromId(ctx, orgIdForSearch); const response = await ragFetch('/api/v1/search', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload), timeoutMs: SEARCH_TIMEOUT_MS, + orgSlug: searchOrgSlug, }); if (!response.ok) { diff --git a/services/platform/convex/agent_tools/web/helpers/query_web_context.ts b/services/platform/convex/agent_tools/web/helpers/query_web_context.ts index 35b8241481..ae155c7e52 100644 --- a/services/platform/convex/agent_tools/web/helpers/query_web_context.ts +++ b/services/platform/convex/agent_tools/web/helpers/query_web_context.ts @@ -10,6 +10,7 @@ import type { ActionCtx } from '../../../_generated/server'; import { createDebugLog } from '../../../lib/debug_log'; +import { orgSlugFromId } from '../../../lib/helpers/org_slug'; import { formatWebResults } from './format_web_results'; import { getCrawlerServiceUrl } from './get_crawler_service_url'; @@ -58,11 +59,12 @@ export interface WebContextResult { * @returns Formatted context with citation metadata, or undefined if no results / on failure */ export async function queryWebContext( - _ctx: ActionCtx, - _organizationId: string, + ctx: ActionCtx, + organizationId: string, query: string, limit = DEFAULT_LIMIT, ): Promise { + const orgSlug = await orgSlugFromId(ctx, organizationId); try { debugLog('Querying web context', { query: query.slice(0, 100), @@ -79,7 +81,10 @@ export async function queryWebContext( const crawlerUrl = getCrawlerServiceUrl(); const response = await fetch(`${crawlerUrl}/api/v1/search`, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + 'x-tale-org': orgSlug, + }, body: JSON.stringify({ query, limit, diff --git a/services/platform/convex/agent_tools/web/helpers/search_pages.ts b/services/platform/convex/agent_tools/web/helpers/search_pages.ts index 092fc4989a..24cb608dda 100644 --- a/services/platform/convex/agent_tools/web/helpers/search_pages.ts +++ b/services/platform/convex/agent_tools/web/helpers/search_pages.ts @@ -9,6 +9,7 @@ import type { ToolCtx } from '@convex-dev/agent'; import { internal } from '../../../_generated/api'; import { createDebugLog } from '../../../lib/debug_log'; +import { orgSlugFromId } from '../../../lib/helpers/org_slug'; import { formatWebResults } from './format_web_results'; import { formatWebsiteSummaries } from './format_website_summaries'; import { getCrawlerServiceUrl } from './get_crawler_service_url'; @@ -42,6 +43,7 @@ export function isValidDomain(domain: string): boolean { async function fetchSearch( crawlerUrl: string, + orgSlug: string, query: string, domain?: string, ): Promise { @@ -51,7 +53,10 @@ async function fetchSearch( const response = await fetch(endpoint, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + 'x-tale-org': orgSlug, + }, body: JSON.stringify({ query, limit: DEFAULT_LIMIT, @@ -114,7 +119,11 @@ export async function searchPages( } const crawlerUrl = getCrawlerServiceUrl(); - let data = await fetchSearch(crawlerUrl, args.query, validDomain); + if (!ctx.organizationId) { + throw new Error('search_pages requires organizationId in ToolCtx.'); + } + const orgSlug = await orgSlugFromId(ctx, ctx.organizationId); + let data = await fetchSearch(crawlerUrl, orgSlug, args.query, validDomain); let results = data.results; // Fallback to global search if domain-scoped search returns no results @@ -124,7 +133,7 @@ export async function searchPages( query: args.query, domain: validDomain, }); - data = await fetchSearch(crawlerUrl, args.query); + data = await fetchSearch(crawlerUrl, orgSlug, args.query); results = data.results; domainFallback = true; } diff --git a/services/platform/convex/agents/file_utils.ts b/services/platform/convex/agents/file_utils.ts index dbe0cfe7f7..b6ea26927c 100644 --- a/services/platform/convex/agents/file_utils.ts +++ b/services/platform/convex/agents/file_utils.ts @@ -50,7 +50,7 @@ export interface AgentJsonConfig { workflows?: string[]; /** * Slugs of skills available to this agent — a hard allowlist. Each slug - * references a `${SKILLS_DIR}///SKILL.md` bundle. Empty or + * references a `${TALE_CONFIG_DIR}//skills//SKILL.md` bundle. Empty or * absent means the agent has zero skills available; there is no implicit * "all org skills" fallback. At chat-turn start, `buildSkillContext` loads * only the intersection of this list with the org's actual skills; slugs diff --git a/services/platform/convex/branding/file_actions.ts b/services/platform/convex/branding/file_actions.ts index a10f5369be..5eb6dd94cc 100644 --- a/services/platform/convex/branding/file_actions.ts +++ b/services/platform/convex/branding/file_actions.ts @@ -29,6 +29,7 @@ import { atomicWrite, atomicWriteBuffer, generateHistoryTimestamp, + errnoCode, pruneHistory, readFileSafe, readJsonFile, @@ -293,12 +294,21 @@ export const resetBranding = action({ try { const entries = await readdir(imagesDir); await Promise.all( - entries.map((entry) => - unlink(path.join(imagesDir, entry)).catch(() => {}), - ), + entries.map((entry) => { + const file = path.join(imagesDir, entry); + return unlink(file).catch((err) => { + // Tolerate ENOENT (race with another deleter) and log + // everything else — silent unlink failures hide permission + // bugs that leak stale branding images. + if (errnoCode(err) === 'ENOENT') return; + console.warn(`[resetBranding] unlink ${file} failed:`, err); + }); + }), ); - } catch { - // Directory may not exist + } catch (err) { + if (errnoCode(err) !== 'ENOENT') { + console.warn(`[resetBranding] readdir ${imagesDir} failed:`, err); + } } return null; diff --git a/services/platform/convex/documents/compare_documents.ts b/services/platform/convex/documents/compare_documents.ts index 9a5507a94f..5bc59da3c2 100644 --- a/services/platform/convex/documents/compare_documents.ts +++ b/services/platform/convex/documents/compare_documents.ts @@ -4,6 +4,7 @@ import { internal } from '../_generated/api'; import { action } from '../_generated/server'; import { fetchDocumentComparisonByUrls } from '../agent_tools/documents/helpers/fetch_document_comparison'; import { authComponent } from '../auth'; +import { orgSlugFromId } from '../lib/helpers/org_slug'; import { toId } from '../lib/type_cast_helpers'; export const compareDocuments = action({ @@ -61,11 +62,13 @@ export const compareDocuments = action({ resolveStorageUrl(ctx, args.comparisonStorageId), ]); + const orgSlug = await orgSlugFromId(ctx, args.organizationId); return await fetchDocumentComparisonByUrls( baseFileUrl, args.baseFileName, compFileUrl, args.comparisonFileName, + orgSlug, ); }, }); diff --git a/services/platform/convex/file_metadata/transcribe_audio.ts b/services/platform/convex/file_metadata/transcribe_audio.ts index 7b95c3f95b..7eb0ef96ac 100644 --- a/services/platform/convex/file_metadata/transcribe_audio.ts +++ b/services/platform/convex/file_metadata/transcribe_audio.ts @@ -8,6 +8,7 @@ import type { ActionCtx } from '../_generated/server'; import { internalAction } from '../_generated/server'; import { estimateTranscriptionCostCents } from '../governance/cost_estimation'; import { classifyError } from '../lib/error_classification'; +import { orgSlugFromId } from '../lib/helpers/org_slug'; import type { ResolvedModelData } from '../providers/resolve_model'; import { resolveTranscriptionModel } from '../providers/resolve_model'; import { uploadFile } from '../workflow_engine/action_defs/rag/helpers/upload_file_direct'; @@ -162,6 +163,7 @@ async function indexTranscriptToRag( transcript: string; chunkCount: number; requestId: string; + orgSlug: string; }, ): Promise { if (args.transcript.length === 0) return; @@ -201,6 +203,7 @@ async function indexTranscriptToRag( originalAudioContentType: args.audioContentType, chunkCount: args.chunkCount, }, + orgSlug: args.orgSlug, }); await ctx.runMutation( internal.file_metadata.internal_mutations.updateFileTranscription, @@ -371,6 +374,7 @@ export const transcribeAudio = internalAction({ // transcript was cached from). Duplicates content in RAG but // keeps per-upload citation identity correct; embeddings cost // is tiny compared to the Whisper call we just skipped. + const cachedOrgSlug = await orgSlugFromId(ctx, args.organizationId); await indexTranscriptToRag(ctx, { storageId: args.storageId, fileName: args.fileName, @@ -378,6 +382,7 @@ export const transcribeAudio = internalAction({ transcript: cached.transcript ?? '', chunkCount: 0, requestId, + orgSlug: cachedOrgSlug, }); return null; } @@ -535,6 +540,7 @@ export const transcribeAudio = internalAction({ ); } + const indexOrgSlug = await orgSlugFromId(ctx, args.organizationId); await indexTranscriptToRag(ctx, { storageId: args.storageId, fileName: args.fileName, @@ -542,6 +548,7 @@ export const transcribeAudio = internalAction({ transcript: fullTranscript, chunkCount: chunks.length, requestId, + orgSlug: indexOrgSlug, }); return null; diff --git a/services/platform/convex/governance/mutations.ts b/services/platform/convex/governance/mutations.ts index c66e92d277..dfb6f65986 100644 --- a/services/platform/convex/governance/mutations.ts +++ b/services/platform/convex/governance/mutations.ts @@ -195,7 +195,7 @@ export const upsertPolicy = mutation({ throw new ConvexError({ code: 'use_action', message: - 'Use governance/retention_actions.upsertRetentionPolicyAction for retention_policy. The bounds file at $TALE_CONFIG_DIR/retention/{orgSlug}.json must be read before validation.', + 'Use governance/retention_actions.upsertRetentionPolicyAction for retention_policy. The per-org bounds file at $TALE_CONFIG_DIR//retention.json must be read before validation.', }); } diff --git a/services/platform/convex/governance/retention_actions.ts b/services/platform/convex/governance/retention_actions.ts index c86ef5d146..359b71e370 100644 --- a/services/platform/convex/governance/retention_actions.ts +++ b/services/platform/convex/governance/retention_actions.ts @@ -5,9 +5,10 @@ * delegated to `internal.lib.config_store.actions` via `ctx.runAction`. * * Why actions and not a query: - * - Bounds live in `$TALE_CONFIG_DIR/retention/{orgSlug}.json`. V8 - * queries/mutations cannot read fs and cannot await a Node action - * inline. Only V8 actions can `ctx.runAction(internal nodeAction)`. + * - Bounds live in `$TALE_CONFIG_DIR//retention.json` under + * the org-first layout. V8 queries/mutations cannot read fs and + * cannot await a Node action inline. Only V8 actions can + * `ctx.runAction(internal nodeAction)`. * - Bounds change rarely (operator edits the file or env), so losing * query reactivity is acceptable. The frontend uses TanStack Query * to one-shot fetch on editor open. diff --git a/services/platform/convex/governance/retention_bounds_proposal.ts b/services/platform/convex/governance/retention_bounds_proposal.ts index b4975f2eee..c160c8bda9 100644 --- a/services/platform/convex/governance/retention_bounds_proposal.ts +++ b/services/platform/convex/governance/retention_bounds_proposal.ts @@ -1,8 +1,9 @@ /** * Public V8 actions for the operator-side retention bounds proposal - * gate. The JSON file under `$TALE_CONFIG_DIR/retention/{orgSlug}.json` - * (and `TALE_RETENTION_*` env tightening) are no longer directives — - * they're proposals. Cleanup uses `retentionAppliedBounds.appliedBounds`, + * gate. The JSON file under `$TALE_CONFIG_DIR//retention.json` + * (org-first layout) and `TALE_RETENTION_*` env tightening are no + * longer directives — they're proposals. Cleanup uses + * `retentionAppliedBounds.appliedBounds`, * which only changes when an admin clicks Apply here. * * Three actions: diff --git a/services/platform/convex/governance/retention_floors.ts b/services/platform/convex/governance/retention_floors.ts index f34802c0c8..ab789a20e9 100644 --- a/services/platform/convex/governance/retention_floors.ts +++ b/services/platform/convex/governance/retention_floors.ts @@ -4,8 +4,9 @@ * importable from V8 mutations / queries / actions. * * Resolution order: - * 1. **Per-org file** at `$TALE_CONFIG_DIR/retention/{orgSlug}.json` - * provides the baseline `{ min, max, default }` per category. The + * 1. **Per-org file** at `$TALE_CONFIG_DIR//retention.json` + * (org-first layout) provides the baseline `{ min, max, default }` + * per category. The * file is the canonical source of truth (no in-code fallback). * Loading the file is the caller's responsibility — Node-side * callers (cleanup action) import the store directly; V8-side diff --git a/services/platform/convex/integrations/credentials_schema.ts b/services/platform/convex/integrations/credentials_schema.ts index 3bce59e10a..cdcd1753b8 100644 --- a/services/platform/convex/integrations/credentials_schema.ts +++ b/services/platform/convex/integrations/credentials_schema.ts @@ -7,8 +7,9 @@ import { jsonRecordValidator } from '../lib/validators/json'; * Slim credentials table for installed integrations. * * Integration definitions (operations, connector code, config) live in filesystem - * files under INTEGRATIONS_DIR. This table stores only per-installation runtime - * data: encrypted credentials, status, health metrics, and icon storage. + * files under `$TALE_CONFIG_DIR//integrations//`. This table + * stores only per-installation runtime data: encrypted credentials, status, + * health metrics, and icon storage. * * The `slug` field matches the integration directory name (the canonical identifier). */ diff --git a/services/platform/convex/integrations/load_integration.ts b/services/platform/convex/integrations/load_integration.ts index 665d1a4ed2..675510291f 100644 --- a/services/platform/convex/integrations/load_integration.ts +++ b/services/platform/convex/integrations/load_integration.ts @@ -4,7 +4,7 @@ * Unified integration loader. * * Loads integration data from two sources: - * 1. File system (INTEGRATIONS_DIR): config.json + connector.ts + * 1. File system (`$TALE_CONFIG_DIR//integrations//`): config.json + connector.ts * 2. Database (integrationCredentials table): encrypted credentials, status, health * * Merges them into a `LoadedIntegration` object that matches the shape consumers diff --git a/services/platform/convex/lib/agent_response/generate_response.ts b/services/platform/convex/lib/agent_response/generate_response.ts index 24d9c35fe1..edfb61baa2 100644 --- a/services/platform/convex/lib/agent_response/generate_response.ts +++ b/services/platform/convex/lib/agent_response/generate_response.ts @@ -54,6 +54,7 @@ import { RECOVERY_TIMEOUT_MS, estimateTokens, } from '../context_management'; +import { orgSlugFromId } from '../helpers/org_slug'; // Artifacts module removed — workspace context is discoverable via the // `file_list` tool. We keep the call sites but route them through this // no-op shim so the prompt-builder API surface stays intact. @@ -703,13 +704,14 @@ export async function generateAgentResponse( if (accessibleFileIds.length === 0) { debugLog('No accessible RAG documents, skipping knowledge context'); } else { + const orgSlug = await orgSlugFromId(ctx, organizationId); knowledgeContextPromise = queryRagContext( promptMessage, undefined, undefined, undefined, undefined, - { fileIds: accessibleFileIds }, + { fileIds: accessibleFileIds, orgSlug }, ); debugLog('Knowledge context query started', { threadId, diff --git a/services/platform/convex/lib/config_store/actions.ts b/services/platform/convex/lib/config_store/actions.ts index 24adaafa1f..9e784f24d0 100644 --- a/services/platform/convex/lib/config_store/actions.ts +++ b/services/platform/convex/lib/config_store/actions.ts @@ -24,7 +24,6 @@ import { createFileConfigStore } from './store'; const retentionStore = createFileConfigStore( 'retention', retentionDefaultsConfigSchema, - { orgFirst: true }, ); export const readRetentionConfig = internalAction({ diff --git a/services/platform/convex/lib/config_store/store.test.ts b/services/platform/convex/lib/config_store/store.test.ts index 6b82dfb7ca..46857c5661 100644 --- a/services/platform/convex/lib/config_store/store.test.ts +++ b/services/platform/convex/lib/config_store/store.test.ts @@ -28,6 +28,18 @@ afterEach(async () => { await rm(tmpRoot, { recursive: true, force: true }); }); +// Org-first layout: each org's area file lives at +// `//.json`. +async function writeOrgAreaFile( + orgSlug: string, + area: string, + content: string, +): Promise { + const dir = path.join(tmpRoot, orgSlug); + await mkdir(dir, { recursive: true }); + await writeFile(path.join(dir, `${area}.json`), content); +} + describe('createFileConfigStore', () => { it('read returns null for missing file', async () => { const store = createFileConfigStore('thing', testSchema); @@ -36,10 +48,9 @@ describe('createFileConfigStore', () => { }); it('read parses + validates a valid file', async () => { - const dir = path.join(tmpRoot, 'thing'); - await mkdir(dir, { recursive: true }); - await writeFile( - path.join(dir, 'default.json'), + await writeOrgAreaFile( + 'default', + 'thing', JSON.stringify({ foo: 'bar', n: 42 }), ); const store = createFileConfigStore('thing', testSchema); @@ -48,18 +59,15 @@ describe('createFileConfigStore', () => { }); it('read throws on corrupted JSON', async () => { - const dir = path.join(tmpRoot, 'thing'); - await mkdir(dir, { recursive: true }); - await writeFile(path.join(dir, 'default.json'), '{ not valid json'); + await writeOrgAreaFile('default', 'thing', '{ not valid json'); const store = createFileConfigStore('thing', testSchema); await expect(store.read('default')).rejects.toThrow(); }); it('read throws on schema violation', async () => { - const dir = path.join(tmpRoot, 'thing'); - await mkdir(dir, { recursive: true }); - await writeFile( - path.join(dir, 'default.json'), + await writeOrgAreaFile( + 'default', + 'thing', JSON.stringify({ foo: 123 }), // foo must be string ); const store = createFileConfigStore('thing', testSchema); @@ -81,22 +89,23 @@ describe('createFileConfigStore', () => { ).rejects.toThrow(/Refusing to write invalid/); }); - it('list returns slugs of present *.json files', async () => { - const dir = path.join(tmpRoot, 'thing'); - await mkdir(dir, { recursive: true }); - await writeFile(path.join(dir, 'default.json'), '{}'); - await writeFile(path.join(dir, 'marketing.json'), '{}'); - await writeFile(path.join(dir, 'engineering.json'), '{}'); - // Non-json + dotfile should be ignored - await writeFile(path.join(dir, 'notes.txt'), 'ignored'); - await writeFile(path.join(dir, '.history.json'), 'ignored'); + it('list returns slugs of orgs with a .json file', async () => { + await writeOrgAreaFile('default', 'thing', '{}'); + await writeOrgAreaFile('marketing', 'thing', '{}'); + await writeOrgAreaFile('engineering', 'thing', '{}'); + // An org without the area file should not appear. + await mkdir(path.join(tmpRoot, 'unrelated'), { recursive: true }); + await writeFile(path.join(tmpRoot, 'unrelated', 'other.json'), '{}'); const store = createFileConfigStore('thing', testSchema); const list = await store.list(); const slugs = list.map((e) => e.orgSlug).sort(); expect(slugs).toEqual(['default', 'engineering', 'marketing']); }); - it('list returns empty array when area dir does not exist', async () => { + it('list returns empty array when config root does not exist', async () => { + // Stub to a non-existent path so the readdir() in list() takes the + // ENOENT branch. + await rm(tmpRoot, { recursive: true, force: true }); const store = createFileConfigStore('thing', testSchema); const list = await store.list(); expect(list).toEqual([]); diff --git a/services/platform/convex/lib/config_store/store.ts b/services/platform/convex/lib/config_store/store.ts index 29b0af8294..1cbb51c6af 100644 --- a/services/platform/convex/lib/config_store/store.ts +++ b/services/platform/convex/lib/config_store/store.ts @@ -3,13 +3,9 @@ /** * Generic typed read/write helper for area-specific JSON config files. * - * Two layout shapes are supported, selected via `orgFirst`: - * - * - `orgFirst: false` (default): `$TALE_CONFIG_DIR/{area}/{orgSlug}.json`. - * The legacy per-area-dir shape; org slugs live in the filename. - * - `orgFirst: true`: `$TALE_CONFIG_DIR/{orgSlug}/{area}.json`. - * Used by retention under the uniform org-first layout — each org has - * one file per area, alongside its `agents/`, `providers/`, etc. + * Path shape is the uniform org-first layout: + * `$TALE_CONFIG_DIR//.json`. Each org has one file per + * area, alongside its `agents/`, `providers/`, etc. * * Wraps `readJsonFile` + `atomicWrite` so callers don't reinvent path * resolution, symlink/size guards, or atomic-rename semantics. @@ -38,7 +34,6 @@ import type { z } from 'zod/v4'; import { atomicWrite, readJsonFile, validateOrgSlug } from '../file_io'; const MAX_FILE_SIZE_BYTES = 256 * 1024; -const ORG_FILE_REGEX = /^[a-z0-9][a-z0-9_-]*\.json$/; export interface ConfigStore { /** @@ -53,16 +48,6 @@ export interface ConfigStore { list(): Promise>; } -export interface CreateFileConfigStoreOptions { - /** - * When true, paths follow the org-first layout: - * `$TALE_CONFIG_DIR//.json`. List enumerates per-org - * directories that contain `.json`. When false (default), paths - * follow `$TALE_CONFIG_DIR//.json`. - */ - orgFirst?: boolean; -} - function getConfigRoot(area: string): string { const configDir = process.env.TALE_CONFIG_DIR; if (!configDir) { @@ -76,17 +61,13 @@ function getConfigRoot(area: string): string { return configDir; } -function resolveFilePath( - area: string, - orgSlug: string, - orgFirst: boolean, -): string { +function resolveFilePath(area: string, orgSlug: string): string { if (!validateOrgSlug(orgSlug)) { throw new Error(`Invalid org slug: ${orgSlug}`); } const root = getConfigRoot(area); - const dir = orgFirst ? path.join(root, orgSlug) : path.join(root, area); - const fileName = orgFirst ? `${area}.json` : `${orgSlug}.json`; + const dir = path.join(root, orgSlug); + const fileName = `${area}.json`; const resolved = path.resolve(dir, fileName); const expectedPrefix = path.resolve(dir); if ( @@ -106,10 +87,7 @@ function resolveFilePath( export function createFileConfigStore( area: string, schema: z.ZodType, - options: CreateFileConfigStoreOptions = {}, ): ConfigStore { - const orgFirst = options.orgFirst ?? false; - const parse = (content: string): T => { // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- raw JSON before Zod validation const parsed = JSON.parse(content) as unknown; @@ -122,17 +100,16 @@ export function createFileConfigStore( return { async read(orgSlug) { - const filePath = resolveFilePath(area, orgSlug, orgFirst); + const filePath = resolveFilePath(area, orgSlug); const result = await readJsonFile(filePath, MAX_FILE_SIZE_BYTES, parse); if (result.ok) return result.data; if (result.error === 'not_found') return null; - const display = orgFirst - ? `${orgSlug}/${area}.json` - : `${area}/${orgSlug}.json`; - throw new Error(`Failed to read ${display}: ${result.message}`); + throw new Error( + `Failed to read ${orgSlug}/${area}.json: ${result.message}`, + ); }, async write(orgSlug, value) { - const filePath = resolveFilePath(area, orgSlug, orgFirst); + const filePath = resolveFilePath(area, orgSlug); // Re-parse before write to surface schema errors to the caller // rather than silently corrupting the file. Cheap relative to fs. const parsed = schema.safeParse(value); @@ -146,44 +123,27 @@ export function createFileConfigStore( }, async list() { const root = getConfigRoot(area); - if (orgFirst) { - // Each org's file lives at `//.json`. - // Enumerate org subdirs (validated by slug regex) and probe each - // for the area file. Missing root → return empty rather than - // throwing — operator hasn't seeded anything yet. - let entries: string[]; - try { - entries = await readdir(root); - } catch (err) { - if (err instanceof Error && 'code' in err && err.code === 'ENOENT') { - return []; - } - throw err; - } - const results: Array<{ orgSlug: string }> = []; - for (const name of entries) { - if (!validateOrgSlug(name)) continue; - const filePath = path.join(root, name, `${area}.json`); - const info = await stat(filePath).catch(() => null); - if (info?.isFile()) results.push({ orgSlug: name }); - } - return results; - } - - // Legacy per-area-dir layout: list `*.json` files under `//`. - const dir = path.join(root, area); + // Each org's file lives at `//.json`. + // Enumerate org subdirs (validated by slug regex) and probe each + // for the area file. Missing root → return empty rather than + // throwing — operator hasn't seeded anything yet. let entries: string[]; try { - entries = await readdir(dir); + entries = await readdir(root); } catch (err) { if (err instanceof Error && 'code' in err && err.code === 'ENOENT') { return []; } throw err; } - return entries - .filter((name) => ORG_FILE_REGEX.test(name)) - .map((name) => ({ orgSlug: name.slice(0, -'.json'.length) })); + const results: Array<{ orgSlug: string }> = []; + for (const name of entries) { + if (!validateOrgSlug(name)) continue; + const filePath = path.join(root, name, `${area}.json`); + const info = await stat(filePath).catch(() => null); + if (info?.isFile()) results.push({ orgSlug: name }); + } + return results; }, }; } diff --git a/services/platform/convex/lib/helpers/org_slug.ts b/services/platform/convex/lib/helpers/org_slug.ts new file mode 100644 index 0000000000..61f8366274 --- /dev/null +++ b/services/platform/convex/lib/helpers/org_slug.ts @@ -0,0 +1,48 @@ +/** + * Look up the `slug` for an organization given its Better Auth `_id`. + * + * RAG/crawler require the slug (not the id) on the X-Tale-Org header + * because their per-org provider catalog is keyed by slug on disk at + * `$TALE_CONFIG_DIR//providers/`. Most Convex action contexts + * carry `organizationId`; this helper bridges to the slug. + */ + +import { getString, isRecord } from '../../../lib/utils/type-guards'; +import { components } from '../../_generated/api'; + +// Loose ctx shape so all of: Convex ActionCtx, ToolCtx, query/mutation +// ctxs can pass through. The runQuery signature on the real Convex +// types is generic over FunctionReference — using a narrower stub here +// would force every caller to cast. +type CtxWithRunQuery = { + // oxlint-disable-next-line typescript/no-explicit-any -- structural-only typing for cross-ctx compatibility + runQuery: (...args: any[]) => Promise; +}; + +/** + * Resolve an organizationId to its slug via Better Auth. + * + * Throws if no matching org row exists — callers should ensure the + * organizationId came from a verified-membership check upstream. + */ +export async function orgSlugFromId( + ctx: CtxWithRunQuery, + organizationId: string, +): Promise { + const row = await ctx.runQuery(components.betterAuth.adapter.findOne, { + model: 'organization', + where: [{ field: '_id', value: organizationId, operator: 'eq' }], + }); + if (!isRecord(row)) { + throw new Error( + `[orgSlugFromId] no organization row found for id ${JSON.stringify(organizationId)}`, + ); + } + const slug = getString(row, 'slug'); + if (!slug) { + throw new Error( + `[orgSlugFromId] organization ${JSON.stringify(organizationId)} has no slug`, + ); + } + return slug; +} diff --git a/services/platform/convex/lib/helpers/rag_config.ts b/services/platform/convex/lib/helpers/rag_config.ts index 74a817c35b..c4ab9ade96 100644 --- a/services/platform/convex/lib/helpers/rag_config.ts +++ b/services/platform/convex/lib/helpers/rag_config.ts @@ -194,20 +194,34 @@ export function _resetRagConfigForTests(): void { } /** - * Fetch against the RAG service. Sets `Authorization: Bearer ${authToken}` - * when `RAG_AUTH_TOKEN` is configured; otherwise sends no Authorization - * header (RAG runs open). Applies a default per-request timeout and - * accepts a path starting with `/`. + * Fetch against the RAG service. + * + * Sets `Authorization: Bearer ${authToken}` when `RAG_AUTH_TOKEN` is + * configured; otherwise sends no Authorization header (RAG runs open). + * + * `orgSlug` is required for endpoints whose service-side handler reads + * the org's provider catalog (search, generate, upload, compare-files). + * The RAG service enforces this via per-router `Depends(require_org_slug)`, + * so callers MUST pass `orgSlug` for those endpoints — a missing header + * yields 400 from RAG. Status / delete / content / compare-by-id + * endpoints are org-agnostic and accept calls without the header. + * + * When `orgSlug` is supplied, it sets `X-Tale-Org: ${orgSlug}` and + * cannot be overridden via a header in `init.headers` — preventing + * a caller from spoofing another org's identity. * * Works in both V8 and Node Convex runtimes (uses the global `fetch`). * * @example - * const res = await ragFetch('/api/v1/documents/abc', { method: 'DELETE' }); - * if (res.status === 404 || res.ok) { ...treat as success... } + * const res = await ragFetch('/api/v1/search', { + * method: 'POST', + * body: JSON.stringify(payload), + * orgSlug: 'acme', + * }); */ export async function ragFetch( path: string, - init: RequestInit & { timeoutMs?: number } = {}, + init: RequestInit & { timeoutMs?: number; orgSlug?: string } = {}, ): Promise { const { serviceUrl, authToken } = getRagConfig(); // The legacy `path.startsWith('http')` override branch was a future-bypass @@ -231,11 +245,19 @@ export async function ragFetch( if (authToken !== undefined && !headers.has('authorization')) { headers.set('authorization', `Bearer ${authToken}`); } + // When supplied, always overwrite — callers must not be able to + // spoof another org's identity by setting the header in `init.headers` + // directly. When omitted, the RAG endpoint either runs org-agnostic + // (status/delete/content/compare-by-id) or returns 400 from its + // `Depends(require_org_slug)` dep (search/generate/upload/compare-files). + if (init.orgSlug) { + headers.set('x-tale-org', init.orgSlug); + } const timeoutMs = init.timeoutMs ?? 10_000; const signal = init.signal ?? AbortSignal.timeout(timeoutMs); - const { timeoutMs: _drop, ...rest } = init; + const { timeoutMs: _drop, orgSlug: _dropOrg, ...rest } = init; // `redirect: 'manual'` so a compromised RAG returning a 30x to // `http://169.254.169.254/...` (cloud IMDS) doesn't get auto-followed // past the SSRF guard. Callers handle 30x as a hard error. Round-2 v15 F1. diff --git a/services/platform/convex/migrations/rename_org_slug.ts b/services/platform/convex/migrations/rename_org_slug.ts deleted file mode 100644 index 8439ef3d89..0000000000 --- a/services/platform/convex/migrations/rename_org_slug.ts +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Migration: Rename all organization slugs to "default". - * - * Self-hosted Tale deployments use a single organization. - * This migration normalizes the slug to "default" for consistency. - * - * Idempotent: skips organizations that already have slug "default". - * - * Usage: - * bunx convex run migrations/rename_org_slug:renameOrgSlug - */ - -import { isRecord, getString } from '../../lib/utils/type-guards'; -import { components } from '../_generated/api'; -import { internalMutation } from '../_generated/server'; - -const TARGET_SLUG = 'default'; -const TARGET_NAME = 'Default'; - -export const renameOrgSlug = internalMutation({ - args: {}, - handler: async (ctx) => { - const result = await ctx.runQuery(components.betterAuth.adapter.findMany, { - model: 'organization', - paginationOpts: { cursor: null, numItems: 100 }, - where: [], - }); - - const orgs = - result && - typeof result === 'object' && - 'page' in result && - Array.isArray(result.page) - ? result.page - : []; - - let updated = 0; - let skipped = 0; - - for (const org of orgs) { - if (!isRecord(org)) continue; - - const id = getString(org, '_id'); - const slug = getString(org, 'slug'); - - if (!id) continue; - - if (slug === TARGET_SLUG) { - skipped++; - continue; - } - - await ctx.runMutation(components.betterAuth.adapter.updateMany, { - input: { - model: 'organization', - where: [{ field: '_id', value: id, operator: 'eq' }], - update: { slug: TARGET_SLUG, name: TARGET_NAME }, - }, - paginationOpts: { cursor: null, numItems: 1 }, - }); - updated++; - console.log( - `Updated organization ${id}: slug "${slug}" → "${TARGET_SLUG}"`, - ); - } - - console.log(`Done. Updated: ${updated}, Skipped: ${skipped}`); - }, -}); diff --git a/services/platform/convex/organizations/reseed_all_orgs.ts b/services/platform/convex/organizations/reseed_all_orgs.ts index 4bb196cf58..19361126b5 100644 --- a/services/platform/convex/organizations/reseed_all_orgs.ts +++ b/services/platform/convex/organizations/reseed_all_orgs.ts @@ -1,24 +1,34 @@ /** - * Operator-triggered re-seed: enumerate every org (incl. `default`) and - * re-invoke `scaffoldNewOrganization` with `override:true`. Driven by - * `tale deploy --override-all` via `bunx convex run organizations/reseed_all_orgs:reseedAllOrgsFromBuiltin`. + * Operator-triggered re-seed: enumerate every registered org (incl. + * `default`) and re-invoke `scaffoldNewOrganization({override:true, + * strict:true})`. Driven by `tale deploy --override-all` via + * `bunx convex run organizations/reseed_all_orgs:reseedAllOrgsFromBuiltin`. * * Semantics: * - Always reseeds `default` even if absent from the org list (canonical * template org). - * - Per-org try/catch: one failure logs + continues; the full result - * map is returned so the CLI surfaces succeeded/failed counts and - * exits non-zero on any failure. + * - Per-org try/catch records errors into the result map AND THEN the + * action throws at the end if any org failed, so `bunx convex run` + * exits non-zero. Without the final throw, the CLI would see exit-0 + * from docker exec and report success on partial failure. + * - Per-org call uses `strict:true` so scaffold's per-domain failures + * surface as a thrown error here (instead of silent + * `console.error`-and-continue). * - Deterministic order: collected slugs are sorted before processing * so logs and partial-failure reruns are reproducible. * - Cursor-paginated org enumeration (200/page) instead of the - * 500-page-cap pattern in older backfills — avoids silently capping - * deployments with many orgs. + * 500-page-cap pattern in older backfills. + * + * Note: enumerates Better Auth `organization` rows. Filesystem-only org + * subtrees (no DB row) are intentionally skipped — `--override-all` is + * "reseed all registered orgs", not "reseed every dir on disk". * * Note: this is an ops re-runnable tool, not a one-shot migration. Lives * next to `scaffold.ts` (the thing it reinvokes), not in `migrations/`. */ +import { v } from 'convex/values'; + import { getString, isRecord } from '../../lib/utils/type-guards'; import { components, internal } from '../_generated/api'; import { internalAction } from '../_generated/server'; @@ -37,6 +47,21 @@ type OrgReseedResult = export const reseedAllOrgsFromBuiltin = internalAction({ args: {}, + returns: v.object({ + total: v.number(), + succeeded: v.number(), + failed: v.number(), + results: v.array( + v.union( + v.object({ slug: v.string(), status: v.literal('ok') }), + v.object({ + slug: v.string(), + status: v.literal('error'), + error: v.string(), + }), + ), + ), + }), handler: async (ctx) => { const slugSet = new Set(['default']); @@ -79,7 +104,7 @@ export const reseedAllOrgsFromBuiltin = internalAction({ try { await ctx.runAction( internal.organizations.scaffold.scaffoldNewOrganization, - { orgSlug: slug, override: true }, + { orgSlug: slug, override: true, strict: true }, ); results.push({ slug, status: 'ok' }); console.log(`[reseedAllOrgs] reseeded "${slug}"`); @@ -96,6 +121,24 @@ export const reseedAllOrgsFromBuiltin = internalAction({ `[reseedAllOrgs] done: total=${results.length} succeeded=${succeeded} failed=${failed}`, ); + // CRITICAL: throw on any per-org failure so `bunx convex run` exits + // non-zero. The aggregated `results` are also printed to console + // above so per-org detail survives. Without this throw, the CLI + // wrapper sees exit-0 from `docker exec` and reports + // `success('Reseed complete.')` on partial failure. + if (failed > 0) { + const failedSlugs = results + .filter( + (r): r is Extract => + r.status === 'error', + ) + .map((r) => `${r.slug} (${r.error.split('\n')[0]})`) + .join(', '); + throw new Error( + `reseedAllOrgs: ${failed}/${results.length} orgs failed — ${failedSlugs}`, + ); + } + return { total: results.length, succeeded, diff --git a/services/platform/convex/organizations/scaffold.ts b/services/platform/convex/organizations/scaffold.ts index e784d3b6e5..61e47391c0 100644 --- a/services/platform/convex/organizations/scaffold.ts +++ b/services/platform/convex/organizations/scaffold.ts @@ -28,6 +28,7 @@ * fail with ENOENT rather than racing the recursive delete. */ +import { randomUUID } from 'node:crypto'; import { lstat, readdir, @@ -58,6 +59,12 @@ import { resolveWorkflowsDir } from '../workflows/file_utils'; type DirResolver = (orgSlug: string) => string; +export type DomainResult = { + domain: string; + ok: boolean; + error?: string; +}; + type Domain = { name: string; resolve: DirResolver; @@ -129,7 +136,10 @@ async function pathsOverlap(a: string, b: string): Promise { const resolveReal = async (p: string): Promise => { try { return await realpath(p); - } catch { + } catch (err) { + if (errnoCode(err) !== 'ENOENT') { + console.warn('[scaffold.pathsOverlap] realpath failed:', p, err); + } return path.resolve(p); } }; @@ -222,15 +232,19 @@ async function copyTree( /** * Seed a single domain for an org. Source is `/default/` * (canonical template) when `TALE_CONFIG_BUILTIN_DIR` is set, falling back - * to `resolve('default')` for local dev. Returns true on success, false on - * skip/failure. + * to `resolve('default')` for local dev. + * + * Returns `{ok:true}` on success (including the legitimate + * "already scaffolded, skipped" case) and `{ok:false, error}` on + * real failure so the handler can surface or aggregate. Per-domain + * errors are also logged here for operator visibility. */ async function seedDomain( domain: Domain, catalogRoot: string | undefined, orgSlug: string, override: boolean, -): Promise { +): Promise { const sourceDir = catalogRoot ? path.join(catalogRoot, 'default', domain.name) : domain.resolve('default'); @@ -238,24 +252,23 @@ async function seedDomain( if (catalogRoot) { // Operator-set catalog path must exist; missing = deploy misconfig - // (platform/convex image version skew). Surface in logs instead of - // silent zero-seed. + // (platform/convex image version skew). Surface in logs AND return + // an error so reseed-all-orgs can fail loudly. + let statErr: unknown; const sourceExists = await stat(sourceDir) .then(() => true) .catch((err) => { - if (errnoCode(err) === 'ENOENT') { - console.error( - `[scaffold] ${domain.name}: ${BUILTIN_ENV}=${catalogRoot} is set but ${sourceDir} does not exist; org "${orgSlug}" will receive zero seed data for this domain`, - ); - } else { - console.error( - `[scaffold] ${domain.name}: stat ${sourceDir} failed:`, - err instanceof Error ? err.message : err, - ); - } + statErr = err; return false; }); - if (!sourceExists) return; + if (!sourceExists) { + const msg = + errnoCode(statErr) === 'ENOENT' + ? `${BUILTIN_ENV}=${catalogRoot} is set but ${sourceDir} does not exist` + : `stat ${sourceDir} failed: ${statErr instanceof Error ? statErr.message : String(statErr)}`; + console.error(`[scaffold] ${domain.name}: ${msg}`); + return { domain: domain.name, ok: false, error: msg }; + } } // copy-onto-self guard: realpath-aware. Fires for default-org reseed @@ -265,7 +278,7 @@ async function seedDomain( console.warn( `[scaffold] ${domain.name}: source and target overlap (${sourceDir} ↔ ${targetDir}); skipping`, ); - return; + return { domain: domain.name, ok: true }; } if (!override) { @@ -274,7 +287,7 @@ async function seedDomain( console.warn( `[scaffold] ${domain.name}: target ${targetDir} already has files, skipping (use override:true to reseed)`, ); - return; + return { domain: domain.name, ok: true }; } } @@ -294,7 +307,8 @@ async function seedDomain( try { bundles = await readdir(sourceDir); } catch (err) { - if (errnoCode(err) === 'ENOENT') return; + if (errnoCode(err) === 'ENOENT') + return { domain: domain.name, ok: true }; throw err; } for (const bundleName of bundles) { @@ -302,12 +316,47 @@ async function seedDomain( if (SKIP_DIR_NAMES.has(bundleName)) continue; const bundleSrc = path.join(sourceDir, bundleName); const bundleDst = path.join(targetDir, bundleName); - const info = await lstat(bundleSrc).catch(() => null); + const info = await lstat(bundleSrc).catch((err) => { + if (errnoCode(err) !== 'ENOENT') { + console.warn( + `[scaffold] ${domain.name}: lstat ${bundleSrc} failed:`, + err, + ); + } + return null; + }); if (!info || info.isSymbolicLink() || !info.isDirectory()) continue; if (override) { - await rm(bundleDst, { recursive: true, force: true }); + // Write into a sibling staging dir then atomic-rename onto the + // target. Eliminates the "rm before copy" window where an + // interrupt would leave an empty bundle on disk. `force` dropped + // so EACCES / EBUSY surface as real errors. The cleanup-on-exit + // path below also drops the staging dir to avoid leakage. + const staging = `${bundleDst}.staging-${randomUUID().slice(0, 8)}`; + try { + await copyTree(bundleSrc, staging, /* allowSubdirs */ true); + // Best-effort old-dir removal before rename. If the old dir + // exists and is non-empty, `rename` will fail on most platforms + // — surface that. + await rm(bundleDst, { recursive: true }).catch((err) => { + if (errnoCode(err) !== 'ENOENT') throw err; + }); + await rename(staging, bundleDst); + } catch (err) { + // If anything went wrong, scrub the staging dir. + await rm(staging, { recursive: true }).catch((scrubErr) => { + if (errnoCode(scrubErr) !== 'ENOENT') { + console.warn( + `[scaffold] ${domain.name}: failed to scrub staging ${staging}:`, + scrubErr, + ); + } + }); + throw err; + } + } else { + await copyTree(bundleSrc, bundleDst, /* allowSubdirs */ true); } - await copyTree(bundleSrc, bundleDst, /* allowSubdirs */ true); } } else { // 'tree' — workflows + branding. Per-file overwrite, no rm. User-only @@ -316,65 +365,120 @@ async function seedDomain( await copyTree(sourceDir, targetDir, /* allowSubdirs */ true); } } catch (err) { + const message = err instanceof Error ? err.message : String(err); console.error( `[scaffold] ${domain.name}: copy failed for org "${orgSlug}":`, - err instanceof Error ? err.message : err, + message, ); - // Continue with other domains; partial scaffolding is better than none. + return { domain: domain.name, ok: false, error: message }; } + + return { domain: domain.name, ok: true }; } /** * Retention is one JSON object per org (`/retention.json`), not a - * subtree. Special-cased outside the DOMAINS loop. + * subtree. Special-cased outside the DOMAINS loop. Returns a `DomainResult` + * shaped like seedDomain's so the handler can aggregate uniformly. + * + * Assumes `TALE_CONFIG_DIR` is set + absolute (validated by the handler). */ async function seedRetention( catalogRoot: string | undefined, + configRoot: string, orgSlug: string, override: boolean, -): Promise { +): Promise { const sourceFile = catalogRoot ? path.join(catalogRoot, 'default', 'retention.json') - : path.join(process.env.TALE_CONFIG_DIR ?? '', 'default', 'retention.json'); - const targetFile = path.join( - process.env.TALE_CONFIG_DIR ?? '', - orgSlug, - 'retention.json', - ); + : path.join(configRoot, 'default', 'retention.json'); + const targetFile = path.join(configRoot, orgSlug, 'retention.json'); + let statErr: unknown; const sourceExists = await stat(sourceFile) .then(() => true) .catch((err) => { - if (errnoCode(err) !== 'ENOENT') { - console.warn('[scaffold] retention: stat failed:', sourceFile, err); - } + statErr = err; return false; }); - if (!sourceExists) return; + if (!sourceExists) { + if (errnoCode(statErr) === 'ENOENT') { + // Missing catalog retention is expected in some test fixtures; treat + // as no-op (no error to propagate). + return { domain: 'retention', ok: true }; + } + const msg = `stat ${sourceFile} failed: ${statErr instanceof Error ? statErr.message : String(statErr)}`; + console.warn('[scaffold] retention:', msg); + return { domain: 'retention', ok: false, error: msg }; + } if (await pathsOverlap(sourceFile, targetFile)) { console.warn(`[scaffold] retention: source and target overlap; skipping`); - return; + return { domain: 'retention', ok: true }; } + let targetStatErr: unknown; const targetExists = await stat(targetFile) .then(() => true) - .catch(() => false); + .catch((err) => { + targetStatErr = err; + return false; + }); + if (!targetExists && errnoCode(targetStatErr) !== 'ENOENT' && targetStatErr) { + console.warn( + `[scaffold] retention: stat ${targetFile} failed:`, + targetStatErr, + ); + } if (targetExists && !override) { console.warn( `[scaffold] retention: target ${targetFile} exists, skipping (use override:true to reseed)`, ); - return; + return { domain: 'retention', ok: true }; } try { const buf = await readFile(sourceFile); await atomicWrite(targetFile, buf.toString('utf-8')); + return { domain: 'retention', ok: true }; } catch (err) { + const message = err instanceof Error ? err.message : String(err); console.error( `[scaffold] retention: copy failed for org "${orgSlug}":`, - err instanceof Error ? err.message : err, + message, ); + return { domain: 'retention', ok: false, error: message }; + } +} + +/** + * Best-effort opportunistic sweep of `.deleted-*` siblings older than + * 24h that survived a prior failed `rm`. Called at the top of + * `cleanupOrgFilesystem`. Errors are swallowed (the main op shouldn't + * fail because of a leftover dir we couldn't clean). + */ +const CONDEMNED_TTL_MS = 24 * 60 * 60 * 1000; +async function sweepStaleCondemnedDirs(root: string): Promise { + let entries: string[]; + try { + entries = await readdir(root); + } catch (err) { + if (errnoCode(err) === 'ENOENT') return; + throw err; + } + const now = Date.now(); + for (const name of entries) { + if (!name.startsWith('.deleted-')) continue; + const p = path.join(root, name); + const info = await lstat(p).catch(() => null); + if (!info || info.isSymbolicLink()) continue; + if (now - info.mtimeMs < CONDEMNED_TTL_MS) continue; + await rm(p, { recursive: true }).catch((err) => { + console.warn( + `[cleanupOrgFilesystem] janitor: rm ${p} failed:`, + err instanceof Error ? err.message : err, + ); + }); } } @@ -412,6 +516,12 @@ export const cleanupOrgFilesystem = internalAction({ return null; } + // Opportunistic janitor: sweep stale `.deleted-*` siblings older than + // 24h that survived a prior failed rm. Best-effort; failures only log. + await sweepStaleCondemnedDirs(root).catch((err) => { + console.warn('[cleanupOrgFilesystem] janitor sweep failed:', err); + }); + if (args.orgSlug === 'default') { console.warn( '[cleanupOrgFilesystem] refusing to delete the default org filesystem', @@ -466,8 +576,12 @@ export const cleanupOrgFilesystem = internalAction({ // Two-phase rename-then-delete. The rename is atomic within a // filesystem; any concurrent writer of the original path fails with - // ENOENT instead of racing the recursive delete. - const condemned = path.join(root, `.deleted-${args.orgSlug}-${Date.now()}`); + // ENOENT instead of racing the recursive delete. UUID suffix avoids + // collisions if two cleanups land in the same millisecond. + const condemned = path.join( + root, + `.deleted-${args.orgSlug}-${Date.now()}-${randomUUID().slice(0, 8)}`, + ); try { await rename(orgDir, condemned); } catch (err) { @@ -502,24 +616,78 @@ export const scaffoldNewOrganization = internalAction({ * files (idempotent org-create path). */ override: v.optional(v.boolean()), + /** + * When true, throw an aggregated error if any domain or retention + * copy failed. Used by `reseedAllOrgsFromBuiltin` so partial failures + * surface as non-zero CLI exit. + * + * When false (default), continue past per-domain failures and return + * the per-domain result map. Used by `auth.afterCreateOrganization` + * where partial-scaffold-on-org-create is preferable to blocking the + * UX. + */ + strict: v.optional(v.boolean()), }, - returns: v.null(), + returns: v.object({ + ok: v.boolean(), + skipped: v.boolean(), + results: v.array( + v.object({ + domain: v.string(), + ok: v.boolean(), + error: v.optional(v.string()), + }), + ), + }), handler: async (_ctx, args) => { if (!validateOrgSlug(args.orgSlug)) { console.warn( `[scaffoldNewOrganization] refusing invalid slug "${args.orgSlug}"`, ); - return null; + return { ok: false, skipped: true, results: [] }; + } + + // Symmetric guard to cleanupOrgFilesystem: refuse to operate on a + // non-absolute or unset config root rather than writing relative + // paths into the action's CWD. + const configRoot = process.env.TALE_CONFIG_DIR; + if (!configRoot || !path.isAbsolute(configRoot)) { + const msg = + '[scaffoldNewOrganization] TALE_CONFIG_DIR is unset or not absolute; refusing to proceed'; + console.error(msg); + if (args.strict) { + throw new Error(msg); + } + return { ok: false, skipped: true, results: [] }; } const catalogRoot = process.env[BUILTIN_ENV]; const override = args.override ?? false; + const results: DomainResult[] = []; for (const domain of DOMAINS) { - await seedDomain(domain, catalogRoot, args.orgSlug, override); + results.push( + await seedDomain(domain, catalogRoot, args.orgSlug, override), + ); } - await seedRetention(catalogRoot, args.orgSlug, override); + results.push( + await seedRetention(catalogRoot, configRoot, args.orgSlug, override), + ); - return null; + const failed = results.filter((r) => !r.ok); + if (failed.length > 0 && args.strict) { + const detail = failed + .map((r) => `${r.domain}: ${r.error ?? 'unknown error'}`) + .join('; '); + throw new Error( + `scaffold "${args.orgSlug}": ${failed.length}/${results.length} domains failed — ${detail}`, + ); + } + + return { + ok: failed.length === 0, + skipped: false, + results, + }; }, }); diff --git a/services/platform/convex/skills/file_actions.ts b/services/platform/convex/skills/file_actions.ts index a46b693269..fba67e2ef0 100644 --- a/services/platform/convex/skills/file_actions.ts +++ b/services/platform/convex/skills/file_actions.ts @@ -5,7 +5,7 @@ * for the runtime engine's snapshot read). * * Storage model mirrors agents/integrations: SKILL.md + bundle assets on - * disk under `${SKILLS_DIR}///`. There is NO Convex + * disk under `${TALE_CONFIG_DIR}//skills//`. There is NO Convex * DB table for skills — the file is the source of truth, team scoping and * role restriction live in YAML frontmatter, author/timestamps come from * audit_logs (see Phase 5c follow-up). diff --git a/services/platform/convex/workflow_engine/action_defs/document/document_action.ts b/services/platform/convex/workflow_engine/action_defs/document/document_action.ts index e13499a176..9c49f699e8 100644 --- a/services/platform/convex/workflow_engine/action_defs/document/document_action.ts +++ b/services/platform/convex/workflow_engine/action_defs/document/document_action.ts @@ -17,6 +17,7 @@ import { fetchDocumentComparisonByUrls } from '../../../agent_tools/documents/he import { fetchDocumentContent } from '../../../agent_tools/documents/helpers/fetch_document_content'; import { getDocumentEffectiveDate } from '../../../documents/transform_to_document_item'; import type { DocumentMetadata } from '../../../documents/types'; +import { orgSlugFromId } from '../../../lib/helpers/org_slug'; import { toConvexJsonRecord, toId } from '../../../lib/type_cast_helpers'; import { wrapUntrusted } from '../../../lib/untrusted_content'; import { jsonRecordValidator } from '../../../lib/validators/json'; @@ -384,11 +385,13 @@ export const documentAction: ActionDefinition = { resolveFileName(ctx, params.comparisonFileId), ]); + const compareOrgSlug = await orgSlugFromId(ctx, organizationId); return await fetchDocumentComparisonByUrls( baseFileUrl, baseFileName, compFileUrl, compFileName, + compareOrgSlug, params.maxChanges, ); } diff --git a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_document.test.ts b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_document.test.ts index 3cf2328a6e..33bbc326d8 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_document.test.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_document.test.ts @@ -16,17 +16,26 @@ const uploadFileMock = vi.mocked(uploadFile); const DEFAULT_METADATA = { fileName: 'document.pdf', contentType: 'application/pdf', + organizationId: 'org-1', }; +const DEFAULT_ORG_ROW = { _id: 'org-1', slug: 'default' }; + function createCtx( getUrlResult: string | null = 'https://storage.example.com/file', metadataResult: Record | null = DEFAULT_METADATA, ) { + // uploadDocument issues two runQuery calls in order: + // 1. internal.file_metadata.internal_queries.getByStorageId + // 2. components.betterAuth.adapter.findOne (via orgSlugFromId) return { storage: { getUrl: vi.fn().mockResolvedValue(getUrlResult), }, - runQuery: vi.fn().mockResolvedValue(metadataResult), + runQuery: vi + .fn() + .mockResolvedValueOnce(metadataResult) + .mockResolvedValueOnce(DEFAULT_ORG_ROW), }; } diff --git a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_document.ts b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_document.ts index 01ae1e4726..8cb1fdf1d5 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_document.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_document.ts @@ -4,6 +4,7 @@ import { } from '../../../../../lib/shared/file-types'; import { internal } from '../../../../_generated/api'; import type { ActionCtx } from '../../../../_generated/server'; +import { orgSlugFromId } from '../../../../lib/helpers/org_slug'; import { toId } from '../../../../lib/type_cast_helpers'; import type { RagUploadResult } from './types'; import { uploadFile } from './upload_file_direct'; @@ -67,6 +68,8 @@ export async function uploadDocument( contentType, ); + const orgSlug = await orgSlugFromId(ctx, metadata.organizationId); + return uploadFile({ file, filename: fileName, @@ -74,5 +77,6 @@ export async function uploadDocument( fileId, metadata: options?.metadata, sync: options?.sync ?? false, + orgSlug, }); } diff --git a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.test.ts b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.test.ts index f382ef1cf1..6124589290 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.test.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.test.ts @@ -26,6 +26,7 @@ function defaultArgs() { filename: 'test.txt', contentType: 'text/plain', fileId: FILE_ID, + orgSlug: 'default', }; } diff --git a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.ts b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.ts index 8bfd5b386c..d3d0b5b62c 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.ts @@ -9,6 +9,8 @@ export interface UploadFileArgs { metadata?: Record; timeoutMs?: number; sync?: boolean; + /** Required: RAG resolves the org's provider catalog from this slug. */ + orgSlug: string; } interface RagApiUploadResponse { @@ -32,6 +34,7 @@ export async function uploadFile({ metadata, timeoutMs, sync = false, + orgSlug, }: UploadFileArgs): Promise { const effectiveTimeout = timeoutMs ?? (sync ? SYNC_TIMEOUT_MS : DEFAULT_TIMEOUT_MS); @@ -53,6 +56,7 @@ export async function uploadFile({ method: 'POST', body: formData, timeoutMs: effectiveTimeout, + orgSlug, }); if (!response.ok) { diff --git a/services/platform/convex/workflow_engine/action_defs/rag/rag_action.ts b/services/platform/convex/workflow_engine/action_defs/rag/rag_action.ts index 021ff255d3..7a41435aa7 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/rag_action.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/rag_action.ts @@ -5,6 +5,7 @@ import { internal } from '../../../_generated/api'; import type { ActionCtx } from '../../../_generated/server'; import type { SearchResponse } from '../../../agent_tools/rag/format_search_results'; import { fetchDocumentChunks } from '../../../agent_tools/rag/helpers/fetch_document_chunks'; +import { orgSlugFromId } from '../../../lib/helpers/org_slug'; import { ragFetch } from '../../../lib/helpers/rag_config'; import { toId } from '../../../lib/type_cast_helpers'; import { wrapUntrusted } from '../../../lib/untrusted_content'; @@ -100,7 +101,12 @@ export const ragAction: ActionDefinition = { // fileIds must be verified against the workflow's organizationId // before reaching the RAG service, which would otherwise serve // any file by id regardless of tenant. - await assertStorageIdsInOrg(ctx, _variables, migratedParams.fileIds); + const orgId = await assertStorageIdsInOrg( + ctx, + _variables, + migratedParams.fileIds, + ); + const orgSlug = await orgSlugFromId(ctx, orgId); try { const response = await ragFetch('/api/v1/search', { method: 'POST', @@ -113,6 +119,7 @@ export const ragAction: ActionDefinition = { include_metadata: true, }), timeoutMs: SEARCH_TIMEOUT_MS, + orgSlug, }); if (!response.ok) { @@ -195,7 +202,7 @@ async function assertStorageIdsInOrg( ctx: ActionCtx, variables: Record, storageIds: string[], -): Promise { +): Promise { const organizationId = typeof variables.organizationId === 'string' ? variables.organizationId @@ -205,7 +212,7 @@ async function assertStorageIdsInOrg( 'organizationId is required in workflow variables for RAG operations', ); } - if (storageIds.length === 0) return; + if (storageIds.length === 0) return organizationId; const ownsStorage = await ctx.runQuery( internal.documents.internal_queries.verifyStorageIdsBelongToOrg, { organizationId, storageIds }, @@ -213,6 +220,7 @@ async function assertStorageIdsInOrg( if (!ownsStorage) { throw new Error('One or more file ids do not belong to this organization'); } + return organizationId; } /** diff --git a/services/platform/docker-entrypoint.sh b/services/platform/docker-entrypoint.sh index 0ac3c78b6d..230bfcf1b3 100644 --- a/services/platform/docker-entrypoint.sh +++ b/services/platform/docker-entrypoint.sh @@ -243,9 +243,15 @@ deploy_convex_functions() { ) for legacy in "${LEGACY_DOMAIN_VARS[@]}"; do if [ "${CONVEX_ENV_MAP[$legacy]+_}" ]; then - if bunx convex env remove "$legacy" --url "$CONVEX_URL" --admin-key "$ADMIN_KEY" >/dev/null 2>&1; then + # Match the surrounding env-sync loop's aggregation pattern: track + # failures in `failed_vars` later, never swallow with `>/dev/null` + # so a real CLI error doesn't leave the legacy var lingering in + # Convex without an operator-visible signal. + if bunx convex env remove "$legacy" --url "$CONVEX_URL" --admin-key "$ADMIN_KEY" >/dev/null; then echo " ✓ $legacy removed (no longer honored under org-first layout)" unset 'CONVEX_ENV_MAP[$legacy]' + else + log_warn "Failed to remove legacy env var $legacy from Convex; will retry on next boot" fi fi done @@ -299,7 +305,7 @@ deploy_convex_functions() { # 5b. Remove vars from Convex that are no longer set on the platform. # Without this, env vars unset on the platform side linger in Convex. - # Skip orphans we already removed above (in the ORPHAN_DERIVED block). + # Skip orphans we already removed above (in the LEGACY_DOMAIN_VARS block). for convex_var in "${!CONVEX_ENV_MAP[@]}"; do local found=false local sv diff --git a/services/platform/vite-plugins/serve-branding-images.ts b/services/platform/vite-plugins/serve-branding-images.ts index e05fe95e4d..bbc6c7d9a7 100644 --- a/services/platform/vite-plugins/serve-branding-images.ts +++ b/services/platform/vite-plugins/serve-branding-images.ts @@ -52,7 +52,24 @@ export function serveBrandingImages(): Plugin { res.setHeader('Cache-Control', 'no-cache, must-revalidate'); res.end(data); }) - .catch(() => { + .catch((err: unknown) => { + // ENOENT is the expected miss — fall through to the next + // middleware so Vite's static handler / 404 page kicks in. + // Other errors (EACCES, EISDIR) are worth a warning so a + // misconfigured branding dir doesn't silently 404 forever. + const code = + err !== null && + typeof err === 'object' && + 'code' in err && + typeof err.code === 'string' + ? err.code + : undefined; + if (code !== 'ENOENT') { + console.warn( + `[serve-branding-images] readFile ${filePath} failed:`, + err, + ); + } next(); }); }); diff --git a/services/rag/app/auth.py b/services/rag/app/auth.py index 8f5dd58818..dc46007a46 100644 --- a/services/rag/app/auth.py +++ b/services/rag/app/auth.py @@ -12,12 +12,18 @@ """ import hmac +import re from fastapi import Header, HTTPException, status from loguru import logger from .config import settings +# Org-slug regex aligned with services/platform/convex/lib/file_io.ts:25 +# plus the literal "default". Capped at 64 chars to match the platform's +# migrate-script regex (script.sh:134). Keep these in sync. +_ORG_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$") + def _extract_bearer(header_value: str | None) -> str | None: if not header_value: @@ -53,6 +59,29 @@ async def verify_auth_token( ) +async def require_org_slug( + x_tale_org: str | None = Header(default=None), +) -> str: + """FastAPI dependency: extract + validate the `X-Tale-Org` header. + + Every protected RAG endpoint requires this header. Caller-supplied; + the platform sets it from the authenticated user's selected org. + No fallback to `default` — a missing header is a caller bug that we + surface as 400 rather than silently serve another org's providers. + """ + if not x_tale_org: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="missing X-Tale-Org header", + ) + if not _ORG_SLUG_RE.match(x_tale_org): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="invalid X-Tale-Org header", + ) + return x_tale_org + + def warn_if_auth_disabled() -> None: """Emit a loud SECURITY warning when `RAG_AUTH_TOKEN` is unset. diff --git a/services/rag/app/config.py b/services/rag/app/config.py index ec3a9504fa..9d081e894e 100644 --- a/services/rag/app/config.py +++ b/services/rag/app/config.py @@ -75,10 +75,10 @@ def get_database_url(self) -> str: return self.database_url raise ValueError("RAG_DATABASE_URL must be set in environment") - def get_llm_config(self) -> dict: - """Get LLM configuration from provider files.""" - base_url, api_key, model = self.get_chat_config() - emb_base_url, emb_api_key, embedding_model, _dims = self.get_embedding_config() + def get_llm_config(self, org_slug: str) -> dict: + """Get LLM configuration for an org from provider files.""" + base_url, api_key, model = self.get_chat_config(org_slug) + emb_base_url, emb_api_key, embedding_model, _dims = self.get_embedding_config(org_slug) config: dict = { "provider": "openai", diff --git a/services/rag/app/routers/documents.py b/services/rag/app/routers/documents.py index bc77df3de2..a59b791ef1 100644 --- a/services/rag/app/routers/documents.py +++ b/services/rag/app/routers/documents.py @@ -6,11 +6,21 @@ from typing import Any from uuid import uuid4 -from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile, status +from fastapi import ( + APIRouter, + Depends, + File, + Form, + HTTPException, + Query, + UploadFile, + status, +) from fastapi.background import BackgroundTasks from loguru import logger from tale_shared.db import acquire_with_retry +from ..auth import require_org_slug from ..config import settings from ..models import ( DocumentAddResponse, @@ -195,6 +205,7 @@ def _sanitize_error(exc: Exception, max_length: int = 500) -> str: async def _background_ingest( + org_slug: str, content: bytes, file_id: str, filename: str, @@ -204,6 +215,7 @@ async def _background_ingest( """Run document ingestion in the background, recording status in documents table.""" try: result = await rag_service.add_document( + org_slug, content=content, file_id=file_id, filename=filename, @@ -325,6 +337,7 @@ def _ms_timestamp_to_datetime(value: Any) -> dt.datetime | None: @router.post("/documents/upload", response_model=DocumentAddResponse) async def upload_document( background_tasks: BackgroundTasks, + org_slug: str = Depends(require_org_slug), file: UploadFile = _FILE_UPLOAD, metadata: str | None = Form(None, description="Optional metadata as JSON string"), file_id: str | None = Form(None, description="Optional custom file ID"), @@ -370,6 +383,7 @@ async def upload_document( if sync: try: result = await rag_service.add_document( + org_slug, content=file_bytes, file_id=doc_id, filename=file.filename, @@ -397,6 +411,7 @@ async def upload_document( background_tasks.add_task( _background_ingest, + org_slug, file_bytes, doc_id, file.filename, @@ -530,6 +545,7 @@ async def compare_documents(request: DocumentCompareRequest): @router.post("/documents/compare-files", response_model=DocumentCompareResponse) async def compare_files( + org_slug: str = Depends(require_org_slug), base_file: UploadFile = _BASE_FILE, comparison_file: UploadFile = _COMPARISON_FILE, max_changes: int = _MAX_CHANGES_FORM, @@ -551,6 +567,7 @@ async def compare_files( try: result = await rag_service.compare_files( + org_slug, base_bytes, base_file.filename, comparison_bytes, diff --git a/services/rag/app/routers/search.py b/services/rag/app/routers/search.py index 1993199163..4a99031154 100644 --- a/services/rag/app/routers/search.py +++ b/services/rag/app/routers/search.py @@ -2,9 +2,10 @@ import time -from fastapi import APIRouter, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, status from loguru import logger +from ..auth import require_org_slug from ..models import ( GenerateRequest, GenerateResponse, @@ -19,12 +20,16 @@ @router.post("/search", response_model=QueryResponse) -async def search(request: QueryRequest): +async def search( + request: QueryRequest, + org_slug: str = Depends(require_org_slug), +): """Search the knowledge base using hybrid BM25 + vector search.""" try: start_time = time.time() results = await rag_service.search( + org_slug, query=request.query, top_k=request.top_k, similarity_threshold=request.similarity_threshold, @@ -73,7 +78,10 @@ async def search(request: QueryRequest): @router.post("/generate", response_model=GenerateResponse) -async def generate(request: GenerateRequest): +async def generate( + request: GenerateRequest, + org_slug: str = Depends(require_org_slug), +): """Generate a response using RAG. Retrieves top 30 most relevant chunks, uses temperature 0.3 @@ -81,6 +89,7 @@ async def generate(request: GenerateRequest): """ try: result = await rag_service.generate( + org_slug, query=request.query, file_ids=request.file_ids, ) diff --git a/services/rag/app/services/rag_service.py b/services/rag/app/services/rag_service.py index ca93ba275c..a804e468f8 100644 --- a/services/rag/app/services/rag_service.py +++ b/services/rag/app/services/rag_service.py @@ -2,6 +2,18 @@ Provides: add_document, search, generate, delete_document. All operations use the private_knowledge schema in tale_knowledge database. + +Multi-org: each public method requires an `org_slug` so the LLM / +embedding / vision clients used for the call come from THAT org's +provider catalog at `//providers/`. Per-org client +state is built lazily and cached for `_CONFIG_CHECK_INTERVAL` seconds. + +Embedding **dimensions** are global: the underlying knowledge DB uses +one vector column, so all orgs sharing this RAG instance must use the +same embedding dimensions. The first org to initialize pins the value; +subsequent orgs that disagree raise loudly rather than silently storing +mis-dimensioned vectors. (Per-org dims would require per-org DB schemas +— out of scope.) """ from __future__ import annotations @@ -9,6 +21,7 @@ import asyncio import datetime as dt import time +from dataclasses import dataclass from typing import Any import asyncpg @@ -60,21 +73,46 @@ async def _safe_close(coro) -> None: logger.warning("Failed to close old client", exc_info=True) +@dataclass +class _OrgClients: + """Per-org cached LLM/embedding/vision clients. + + Lifecycle: built lazily on first call for an org, refreshed if older + than `_CONFIG_CHECK_INTERVAL` AND the underlying provider config has + changed on disk. + """ + + llm_config: dict + vision_config: tuple | None + embedding_service: EmbeddingService + openai_client: AsyncOpenAI + vision_client: VisionClient | None + search_service: RagSearchService + last_check: float + + class RagService: def __init__(self) -> None: self.initialized = False self._init_lock = asyncio.Lock() self._pool: asyncpg.Pool | None = None - self._embedding_service: EmbeddingService | None = None - self._vision_client: VisionClient | None = None - self._openai_client: AsyncOpenAI | None = None - self._search_service: RagSearchService | None = None - self._llm_config: dict | None = None - self._vision_config: tuple | None = None - self._last_config_check: float = 0 + # Embedding dimensions are pinned globally; see module docstring. + self._pinned_dims: int | None = None + # Per-org client cache and per-org locks (so concurrent first-calls + # for the same org don't both build clients). + self._org_clients: dict[str, _OrgClients] = {} + self._org_locks: dict[str, asyncio.Lock] = {} + # Per-search-call usage propagation — set by search(), read by + # generate(). Single-threaded asyncio so no need for per-org isolation. + self.last_search_usage: Any = None async def initialize(self) -> None: - """Initialize database pool, embedding service, vision client, and LLM client.""" + """Initialize the shared database pool. + + Per-org client construction is deferred until the first call for + that org. The DB pool is global — all orgs share one + knowledge-DB connection pool because the schema is global. + """ if self.initialized: return @@ -82,138 +120,113 @@ async def initialize(self) -> None: if self.initialized: return - await self._do_initialize() + self._pool = await init_pool() + self.initialized = True + logger.info("RagService initialized (DB pool ready; per-org clients lazy)") - async def _do_initialize(self) -> None: + @property + def embedding_service(self) -> EmbeddingService | None: + """Deprecated: kept for any callers that haven't been threaded + with `org_slug` yet. Returns None; callers must migrate. + """ + return None - # Database pool - self._pool = await init_pool() + def _get_org_lock(self, org_slug: str) -> asyncio.Lock: + lock = self._org_locks.get(org_slug) + if lock is None: + lock = asyncio.Lock() + self._org_locks[org_slug] = lock + return lock - # Embedding service - llm_config = settings.get_llm_config() - embedding_model = llm_config["embedding_model"] - dimensions = settings.get_embedding_dimensions() + async def _ensure_org_clients(self, org_slug: str) -> _OrgClients: + """Lazy-init or refresh an org's clients. - self._embedding_service = EmbeddingService( - api_key=llm_config["embedding_api_key"], - base_url=llm_config["embedding_base_url"], - model=embedding_model, - dimensions=dimensions, - ) - self._llm_config = llm_config + Refresh is gated on `_CONFIG_CHECK_INTERVAL` so a busy org doesn't + re-read its provider files on every call. + """ + if not self.initialized: + await self.initialize() + if self._pool is None: + raise RuntimeError("RagService not initialized: database pool is None") - # Pin embedding dimensions and create HNSW index (runtime config, not a migration) - await pin_embedding_dimensions(self._pool, dimensions) + cached = self._org_clients.get(org_slug) + if cached is not None: + now = time.monotonic() + if (now - cached.last_check) < _CONFIG_CHECK_INTERVAL: + return cached - # Vision client (optional — only if model is configured) - try: - vision_config = settings.get_vision_config() - v_base_url, v_api_key, v_model = vision_config - self._vision_client = VisionClient( - api_key=v_api_key, - model=v_model, - base_url=v_base_url, - timeout=120.0, - request_timeout=float(settings.vision_request_timeout), - max_concurrent_pages=settings.vision_max_concurrent_pages, - pdf_dpi=settings.vision_pdf_dpi, - ocr_prompt=settings.vision_extraction_prompt, + lock = self._get_org_lock(org_slug) + async with lock: + cached = self._org_clients.get(org_slug) + if cached is not None: + now = time.monotonic() + if (now - cached.last_check) < _CONFIG_CHECK_INTERVAL: + return cached + + return await self._build_or_refresh_org_clients(org_slug, cached) + + async def _build_or_refresh_org_clients( + self, + org_slug: str, + previous: _OrgClients | None, + ) -> _OrgClients: + """Construct fresh clients for org_slug, atomic-swapping if existing.""" + assert self._pool is not None + + llm_config = settings.get_llm_config(org_slug) + if previous is not None and llm_config == previous.llm_config: + # No change — refresh the timestamp and reuse. + previous.last_check = time.monotonic() + return previous + + if not llm_config.get("api_key") or not llm_config.get("embedding_api_key"): + if previous is not None: + logger.warning( + "Skipping LLM config reload for org '{}': empty API key", + org_slug, + ) + previous.last_check = time.monotonic() + return previous + raise ValueError(f"Org '{org_slug}' has empty chat or embedding API key in provider config.") + + _b, _a, _m, dims = settings.get_embedding_config(org_slug) + + if self._pinned_dims is None: + self._pinned_dims = dims + await pin_embedding_dimensions(self._pool, dims) + logger.info( + "Pinned RAG embedding dimensions to {} (set by org '{}')", + dims, + org_slug, ) - self._vision_config = vision_config - logger.info("Vision client initialized with model: {}", v_model) - except ValueError: - logger.info("No vision model configured, Vision features disabled") - self._vision_client = None - - # OpenAI client for generation. Explicit timeout: the SDK - # default is 600 s, which can hold the asyncio event loop for - # 10 minutes on a stuck provider endpoint and starve the DB - # pool. Round-2 review MEDIUM (E.4.7). - self._openai_client = AsyncOpenAI( + elif dims != self._pinned_dims: + raise ValueError( + f"Org '{org_slug}' embedding dimensions ({dims}) do not match the " + f"pinned RAG schema dimensions ({self._pinned_dims}). All orgs " + f"sharing this RAG instance must use the same embedding model " + f"dimensions. Reconcile provider configs or run RAG per-org." + ) + + embedding_service = EmbeddingService( + api_key=llm_config["embedding_api_key"], + base_url=llm_config["embedding_base_url"], + model=llm_config["embedding_model"], + dimensions=dims, + ) + openai_client = AsyncOpenAI( api_key=llm_config["api_key"], base_url=llm_config["base_url"], timeout=httpx.Timeout(connect=10.0, read=120.0, write=30.0, pool=5.0), ) - # Search service - self._search_service = RagSearchService(self._pool, self._embedding_service) - - self._last_config_check = time.monotonic() - self.initialized = True - logger.info("RagService initialized") - - @property - def embedding_service(self) -> EmbeddingService | None: - return self._embedding_service - - def _maybe_refresh_clients(self) -> None: - """Check provider config freshness; rebuild clients if changed. - - This method is synchronous (no await) so that all attribute swaps - happen atomically from asyncio's cooperative-scheduling perspective. - """ - if not self.initialized: - return - now = time.monotonic() - if (now - self._last_config_check) < _CONFIG_CHECK_INTERVAL: - return - self._last_config_check = now - - # Check chat/embedding config - new_llm_config = settings.get_llm_config() - if new_llm_config != self._llm_config: - if not new_llm_config.get("api_key") or not new_llm_config.get("embedding_api_key"): - logger.warning("Skipping LLM config reload: empty API key") - else: - new_dims = settings.get_embedding_dimensions() - if self._embedding_service and new_dims != self._embedding_service.dimensions: - logger.error( - "Embedding dimensions changed ({} -> {}). Restart required.", - self._embedding_service.dimensions, - new_dims, - ) - else: - # Prepare new clients before swapping any state - new_emb = EmbeddingService( - api_key=new_llm_config["embedding_api_key"], - base_url=new_llm_config["embedding_base_url"], - model=new_llm_config["embedding_model"], - dimensions=new_dims, - ) - new_oai = AsyncOpenAI( - api_key=new_llm_config["api_key"], - base_url=new_llm_config["base_url"], - timeout=httpx.Timeout(connect=10.0, read=120.0, write=30.0, pool=5.0), - ) - - # Swap all at once (atomic from asyncio's cooperative perspective) - old_emb = self._embedding_service - old_oai = self._openai_client - self._embedding_service = new_emb - self._openai_client = new_oai - if self._pool: - self._search_service = RagSearchService(self._pool, new_emb) - self._llm_config = new_llm_config - logger.info("RAG LLM clients refreshed: model={}", new_llm_config.get("embedding_model")) - - # Close old clients (fire-and-forget with grace period) - loop = asyncio.get_running_loop() - if old_emb: - task = loop.create_task(_safe_close(old_emb.close())) - _background_tasks.add(task) - task.add_done_callback(_background_tasks.discard) - if old_oai: - task = loop.create_task(_safe_close(old_oai.close())) - _background_tasks.add(task) - task.add_done_callback(_background_tasks.discard) - - # Check vision config + # Vision client (optional — only if the org has a vision-tagged model) + vision_client: VisionClient | None = None + vision_config: tuple | None = None try: - new_vision_config = settings.get_vision_config() - v_base_url, v_api_key, v_model = new_vision_config - if new_vision_config != self._vision_config and v_api_key: - old_vision = self._vision_client - self._vision_client = VisionClient( + vision_config = settings.get_vision_config(org_slug) + v_base_url, v_api_key, v_model = vision_config + if v_api_key: + vision_client = VisionClient( api_key=v_api_key, model=v_model, base_url=v_base_url, @@ -223,18 +236,58 @@ def _maybe_refresh_clients(self) -> None: pdf_dpi=settings.vision_pdf_dpi, ocr_prompt=settings.vision_extraction_prompt, ) - self._vision_config = new_vision_config - logger.info("RAG vision client refreshed: model={}", v_model) - if old_vision: - loop = asyncio.get_running_loop() - task = loop.create_task(_safe_close(old_vision.close())) - _background_tasks.add(task) - task.add_done_callback(_background_tasks.discard) + logger.info( + "Vision client initialized for org '{}' with model {}", + org_slug, + v_model, + ) except ValueError: - logger.debug("No vision model in provider config, skipping vision refresh") + logger.debug( + "No vision model configured for org '{}', Vision disabled", + org_slug, + ) + + search_service = RagSearchService(self._pool, embedding_service) + + new_clients = _OrgClients( + llm_config=llm_config, + vision_config=vision_config, + embedding_service=embedding_service, + openai_client=openai_client, + vision_client=vision_client, + search_service=search_service, + last_check=time.monotonic(), + ) + self._org_clients[org_slug] = new_clients + + # Best-effort close of old clients after a grace period so in-flight + # requests on the old clients finish cleanly. + if previous is not None: + loop = asyncio.get_running_loop() + if previous.embedding_service is not embedding_service: + task = loop.create_task(_safe_close(previous.embedding_service.close())) + _background_tasks.add(task) + task.add_done_callback(_background_tasks.discard) + if previous.openai_client is not openai_client: + task = loop.create_task(_safe_close(previous.openai_client.close())) + _background_tasks.add(task) + task.add_done_callback(_background_tasks.discard) + if previous.vision_client is not None and previous.vision_client is not vision_client: + task = loop.create_task(_safe_close(previous.vision_client.close())) + _background_tasks.add(task) + task.add_done_callback(_background_tasks.discard) + + logger.info( + "RAG clients {} for org '{}': model={}", + "refreshed" if previous else "initialized", + org_slug, + llm_config.get("model"), + ) + return new_clients async def add_document( self, + org_slug: str, content: bytes, file_id: str, filename: str, @@ -242,23 +295,19 @@ async def add_document( source_created_at: dt.datetime | None = None, source_modified_at: dt.datetime | None = None, ) -> dict[str, Any]: - """Add a document to the knowledge base.""" - if not self.initialized: - await self.initialize() - self._maybe_refresh_clients() + """Add a document to the knowledge base for the given org.""" + clients = await self._ensure_org_clients(org_slug) if self._pool is None: raise RuntimeError("RagService not initialized: database pool is None") - if self._embedding_service is None: - raise RuntimeError("RagService not initialized: embedding service is None") return await index_document( self._pool, file_id, content, filename, - embedding_service=self._embedding_service, - vision_client=self._vision_client, + embedding_service=clients.embedding_service, + vision_client=clients.vision_client, chunk_size=settings.chunk_size, chunk_overlap=settings.chunk_overlap, source_created_at=source_created_at, @@ -267,6 +316,7 @@ async def add_document( async def search( self, + org_slug: str, query: str, *, top_k: int | None = None, @@ -277,24 +327,19 @@ async def search( Embedding token usage available via `self.last_search_usage` after call. """ - if not self.initialized: - await self.initialize() - self._maybe_refresh_clients() - - if self._search_service is None: - raise RuntimeError("RagService not initialized: search service is None") + clients = await self._ensure_org_clients(org_slug) effective_top_k = top_k if top_k is not None else settings.top_k threshold = similarity_threshold if similarity_threshold is not None else settings.similarity_threshold - results = await self._search_service.search( + results = await clients.search_service.search( query, file_ids=file_ids, top_k=effective_top_k, similarity_threshold=threshold, ) - self.last_search_usage = getattr(self._search_service, "last_search_usage", None) + self.last_search_usage = getattr(clients.search_service, "last_search_usage", None) # If no results and some files are still indexing, wait and retry once if not results and file_ids: @@ -303,33 +348,29 @@ async def search( if has_processing: logger.info("No results and some files still indexing, retrying in 3s") await asyncio.sleep(3) - results = await self._search_service.search( + results = await clients.search_service.search( query, file_ids=file_ids, top_k=effective_top_k, similarity_threshold=threshold, ) - self.last_search_usage = getattr(self._search_service, "last_search_usage", None) + self.last_search_usage = getattr(clients.search_service, "last_search_usage", None) return results async def generate( self, + org_slug: str, query: str, file_ids: list[str] | None = None, ) -> dict[str, Any]: """Generate a response using RAG: search -> context assembly -> LLM.""" - if not self.initialized: - await self.initialize() - self._maybe_refresh_clients() - - if self._openai_client is None: - raise RuntimeError("RagService not initialized: OpenAI client is None") + clients = await self._ensure_org_clients(org_slug) try: start_time = time.time() - search_results = await self.search(query, top_k=RAG_TOP_K, file_ids=file_ids) + search_results = await self.search(org_slug, query, top_k=RAG_TOP_K, file_ids=file_ids) if not search_results: return { @@ -363,9 +404,9 @@ async def generate( context = "\n\n".join(context_parts) user_message = f"Context:\n{context}\n\nQuestion: {query}" - llm_config = settings.get_llm_config() + llm_config = clients.llm_config - completion = await self._openai_client.chat.completions.create( + completion = await clients.openai_client.chat.completions.create( model=llm_config["model"], messages=[ {"role": "system", "content": SYSTEM_PROMPT}, @@ -417,14 +458,9 @@ async def get_document_content( ) -> dict[str, Any] | None: """Retrieve document content by reassembling stored chunks. - Args: - file_id: Logical file identifier. - chunk_start: First chunk to return (1-indexed). - chunk_end: Last chunk to return (1-indexed, inclusive). None = capped by MAX_CHUNK_WINDOW. - return_chunks: If True, include individual chunks as a list. - - Returns: - Response dict with content and metadata, or None if not found. + Does not require an org slug: documents are looked up by file_id + in the shared knowledge schema. Access control / tenancy is + enforced at the platform → RAG boundary. """ if not self.initialized: await self.initialize() @@ -474,20 +510,7 @@ async def get_document_content( "source_modified_at": doc["source_modified_at"], } - # Reassembly: concatenate each chunk's forward-owning `core_content` - # span. By construction, "".join(core_content) equals the original - # ingested text (see tale_knowledge.chunking.splitter tests), so - # overlap regions between adjacent chunks appear exactly once — - # fixing the duplicate-content bug the old "\n\n".join(chunk_content) - # exhibited. - # - # Per-document reindex is atomic (see _do_store), so a document's - # chunks are either all migrated (core_content populated) or all - # legacy (core_content == ''). Mixed state within one document is - # not possible. Falling back to the old stitching for legacy docs - # preserves correctness (no lost text) with today's known - # duplicate-content behavior until reindex completes. - # The fallback + chunk_content column disappear in Phase 5. + # Reassembly: see chunking docs. all_migrated = all(row["core_content"] for row in rows) if all_migrated: combined = "".join(row["core_content"] for row in rows) @@ -521,11 +544,7 @@ async def get_document_statuses( """Get statuses for multiple documents by file_id. Returns a dict mapping file_id to status info or None if not found. - When a document has multiple scope rows, priority is: processing > failed > completed. - - If ANY scope row is still processing, the document is considered processing. - This ensures reindex operations are visible even when other scope rows - remain completed. + Org-agnostic (status lookup uses the shared knowledge schema). """ if not self.initialized: await self.initialize() @@ -571,7 +590,11 @@ async def delete_document( self, file_id: str, ) -> dict[str, Any]: - """Delete a document and its chunks from the knowledge base.""" + """Delete a document and its chunks from the knowledge base. + + Org-agnostic: file_id is globally unique in this schema. Access + control is enforced at the platform → RAG boundary. + """ if not self.initialized: await self.initialize() @@ -627,8 +650,7 @@ async def compare_documents( ) -> dict[str, Any] | None: """Compare two documents using deterministic paragraph-level diffing. - Fetches both documents in parallel. Returns structured diff with - change blocks, or an error dict when a document is not found. + Org-agnostic — operates on stored documents by file_id. """ from .diff_service import compute_diff @@ -640,7 +662,11 @@ async def compare_documents( if base is None: return {"error": "not_found", "file_id": base_file_id, "role": "base"} if comp is None: - return {"error": "not_found", "file_id": comparison_file_id, "role": "comparison"} + return { + "error": "not_found", + "file_id": comparison_file_id, + "role": "comparison", + } diff_result = compute_diff( base["content"], @@ -663,6 +689,7 @@ async def compare_documents( async def compare_files( self, + org_slug: str, base_bytes: bytes, base_filename: str, comparison_bytes: bytes, @@ -672,10 +699,10 @@ async def compare_files( ) -> dict[str, Any]: """Compare two uploaded files using deterministic paragraph-level diffing. - Extracts text directly from file bytes — no database storage or embedding. - Text extraction runs in parallel for both files via asyncio.gather. + Extracts text directly from file bytes — uses the org's vision + client for OCR-able formats. No database storage or embedding. """ - self._maybe_refresh_clients() + clients = await self._ensure_org_clients(org_slug) from tale_knowledge.extraction import extract_text @@ -684,8 +711,12 @@ async def compare_files( t0 = time.time() (base_text, _), (comp_text, _) = await asyncio.gather( - extract_text(base_bytes, base_filename, vision_client=self._vision_client), - extract_text(comparison_bytes, comparison_filename, vision_client=self._vision_client), + extract_text(base_bytes, base_filename, vision_client=clients.vision_client), + extract_text( + comparison_bytes, + comparison_filename, + vision_client=clients.vision_client, + ), ) extraction_ms = (time.time() - t0) * 1000 @@ -713,7 +744,31 @@ async def compare_files( return result async def shutdown(self) -> None: - """Clean shutdown — close pool.""" + """Clean shutdown — close pool and all per-org clients.""" + # Best-effort close of each org's clients before tearing down the pool. + for org_slug, clients in list(self._org_clients.items()): + try: + await clients.embedding_service.close() + except Exception: + logger.warning( + "Failed to close embedding_service for org '{}'", + org_slug, + exc_info=True, + ) + try: + await clients.openai_client.close() + except Exception: + logger.warning("Failed to close openai_client for org '{}'", org_slug, exc_info=True) + if clients.vision_client is not None: + try: + await clients.vision_client.close() + except Exception: + logger.warning( + "Failed to close vision_client for org '{}'", + org_slug, + exc_info=True, + ) + self._org_clients.clear() await close_pool() self.initialized = False diff --git a/services/rag/tests/test_background_ingest.py b/services/rag/tests/test_background_ingest.py index 819fb81d7c..f7357dd15b 100644 --- a/services/rag/tests/test_background_ingest.py +++ b/services/rag/tests/test_background_ingest.py @@ -17,6 +17,8 @@ import pytest +TEST_ORG = "test-org" + pytestmark = pytest.mark.asyncio @@ -198,6 +200,7 @@ async def test_successful_ingestion(self): ): mock_rag.add_document = AsyncMock(return_value=add_result) await _background_ingest( + TEST_ORG, b"content", "doc-1", "test.txt", @@ -223,6 +226,7 @@ async def test_skipped_content_marks_completed(self): ): mock_rag.add_document = AsyncMock(return_value=add_result) await _background_ingest( + TEST_ORG, b"content", "doc-1", "test.txt", @@ -247,6 +251,7 @@ async def test_non_skipped_does_not_call_mark_completed(self): ): mock_rag.add_document = AsyncMock(return_value=add_result) await _background_ingest( + TEST_ORG, b"content", "doc-1", "test.txt", @@ -264,6 +269,7 @@ async def test_ingestion_failure_records_sanitized_error(self): ): mock_rag.add_document = AsyncMock(side_effect=RuntimeError("x" * 1000)) await _background_ingest( + TEST_ORG, b"content", "doc-1", "test.txt", @@ -287,6 +293,7 @@ async def test_record_failure_error_does_not_propagate(self): ): mock_rag.add_document = AsyncMock(side_effect=ValueError("ingestion failed")) await _background_ingest( + TEST_ORG, b"content", "doc-1", "test.txt", @@ -313,6 +320,7 @@ async def test_forwards_source_timestamps_to_add_document(self): ): mock_rag.add_document = AsyncMock(return_value=add_result) await _background_ingest( + TEST_ORG, b"content", "doc-1", "test.txt", @@ -321,6 +329,7 @@ async def test_forwards_source_timestamps_to_add_document(self): ) mock_rag.add_document.assert_awaited_once_with( + TEST_ORG, content=b"content", file_id="doc-1", filename="test.txt", @@ -338,6 +347,7 @@ async def test_cleanup_memory_always_called(self): ): mock_rag.add_document = AsyncMock(side_effect=RuntimeError("boom")) await _background_ingest( + TEST_ORG, b"content", "doc-1", "test.txt", diff --git a/services/rag/tests/test_compare_files.py b/services/rag/tests/test_compare_files.py index fc1e383c3e..29c8efe157 100644 --- a/services/rag/tests/test_compare_files.py +++ b/services/rag/tests/test_compare_files.py @@ -15,21 +15,49 @@ pytestmark = pytest.mark.asyncio +TEST_ORG = "test-org" + def _make_service(): - """Create a RagService with all internal dependencies pre-mocked.""" - from app.services.rag_service import RagService + """Create a RagService with all internal dependencies pre-mocked. + + Pre-seeds the per-org client cache for `TEST_ORG` so compare_files + doesn't trigger the lazy-init / provider-catalog path. + """ + from app.services.rag_service import RagService, _OrgClients service = RagService() service.initialized = True service._pool = MagicMock() - service._embedding_service = AsyncMock() - service._vision_client = MagicMock() - service._search_service = AsyncMock() - service._openai_client = AsyncMock() - service._llm_config = {} - service._vision_config = None - service._last_config_check = time.monotonic() + service._pinned_dims = 1536 + + embedding = AsyncMock() + embedding.dimensions = 1536 + openai_client = AsyncMock() + vision_client = MagicMock() + search_service = AsyncMock() + + service._org_clients[TEST_ORG] = _OrgClients( + llm_config={ + "model": "gpt-test", + "embedding_model": "embed-test", + "api_key": "k", + "base_url": "http://test", + "embedding_api_key": "k", + "embedding_base_url": "http://test", + }, + vision_config=None, + embedding_service=embedding, + openai_client=openai_client, + vision_client=vision_client, + search_service=search_service, + last_check=time.monotonic(), + ) + # Back-compat aliases for tests that grab mocks off the service. + service._search_service = search_service + service._openai_client = openai_client + service._embedding_service = embedding + service._vision_client = vision_client return service @@ -45,6 +73,7 @@ async def mock_extract(content_bytes, filename, *, vision_client=None): with patch("tale_knowledge.extraction.extract_text", side_effect=mock_extract): result = await service.compare_files( + TEST_ORG, b"Section 1\n\nOriginal clause.", "base.txt", b"Section 1\n\nModified clause.", @@ -66,6 +95,7 @@ async def mock_extract(content_bytes, filename, *, vision_client=None): with patch("tale_knowledge.extraction.extract_text", side_effect=mock_extract): result = await service.compare_files( + TEST_ORG, content, "a.txt", content, @@ -90,6 +120,7 @@ async def mock_extract(content_bytes, filename, *, vision_client=None): pytest.raises(ValueError, match="No text could be extracted from base file"), ): await service.compare_files( + TEST_ORG, b"", "empty.txt", b"content", @@ -109,6 +140,7 @@ async def mock_extract(content_bytes, filename, *, vision_client=None): pytest.raises(ValueError, match="No text could be extracted from comparison file"), ): await service.compare_files( + TEST_ORG, b"content", "base.txt", b"", @@ -130,6 +162,7 @@ async def mock_extract(content_bytes, filename, *, vision_client=None): with patch("tale_knowledge.extraction.extract_text", side_effect=mock_extract): result = await service.compare_files( + TEST_ORG, b"Section 1\n\nParagraph A.", "base.txt", b"Section 1\n\nParagraph B.", @@ -152,6 +185,7 @@ async def mock_extract(content_bytes, filename, *, vision_client=None): with patch("tale_knowledge.extraction.extract_text", side_effect=mock_extract): result = await service.compare_files( + TEST_ORG, base.encode(), "base.txt", comp.encode(), @@ -205,6 +239,7 @@ async def test_happy_path(self): async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.post( "/api/v1/documents/compare-files", + headers={"X-Tale-Org": TEST_ORG}, files={ "base_file": ("base.txt", b"Hello\n\nWorld", "text/plain"), "comparison_file": ("comp.txt", b"Hello\n\nEarth", "text/plain"), @@ -222,6 +257,7 @@ async def test_unsupported_extension(self): async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.post( "/api/v1/documents/compare-files", + headers={"X-Tale-Org": TEST_ORG}, files={ "base_file": ("base.exe", b"binary", "application/octet-stream"), "comparison_file": ("comp.txt", b"text", "text/plain"), @@ -241,6 +277,7 @@ async def test_extraction_failure_returns_422(self): async with AsyncClient(transport=transport, base_url="http://test") as client: response = await client.post( "/api/v1/documents/compare-files", + headers={"X-Tale-Org": TEST_ORG}, files={ "base_file": ("empty.pdf", b"fake-pdf", "application/pdf"), "comparison_file": ("comp.pdf", b"fake-pdf", "application/pdf"), diff --git a/services/rag/tests/test_config.py b/services/rag/tests/test_config.py index 0b46d913bb..4b9f964047 100644 --- a/services/rag/tests/test_config.py +++ b/services/rag/tests/test_config.py @@ -26,7 +26,7 @@ class TestGetLlmConfig: def test_returns_valid_config(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert config["provider"] == "openai" assert config["api_key"] == "sk-test" assert config["base_url"] == "https://openrouter.ai/api/v1" @@ -42,7 +42,7 @@ def test_missing_chat_model_raises(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() with pytest.raises(ValueError, match="No chat model"): - s.get_llm_config() + s.get_llm_config("default") @patch( "tale_shared.config.base._provider_embedding_model", @@ -53,14 +53,14 @@ def test_missing_embedding_model_raises(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() with pytest.raises(ValueError, match="No embedding model"): - s.get_llm_config() + s.get_llm_config("default") @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @patch("tale_shared.config.base._provider_chat_model", return_value=_mock_chat_model()) def test_optional_max_tokens_included_when_set(self, mock_chat, mock_embed): with patch.dict(os.environ, {"RAG_OPENAI_MAX_TOKENS": "4096"}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert config["max_tokens"] == 4096 @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @@ -68,7 +68,7 @@ def test_optional_max_tokens_included_when_set(self, mock_chat, mock_embed): def test_optional_temperature_included_when_set(self, mock_chat, mock_embed): with patch.dict(os.environ, {"RAG_OPENAI_TEMPERATURE": "0.7"}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert config["temperature"] == pytest.approx(0.7) @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @@ -76,7 +76,7 @@ def test_optional_temperature_included_when_set(self, mock_chat, mock_embed): def test_max_tokens_omitted_when_not_set(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert "max_tokens" not in config @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @@ -84,7 +84,7 @@ def test_max_tokens_omitted_when_not_set(self, mock_chat, mock_embed): def test_temperature_omitted_when_not_set(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert "temperature" not in config @@ -93,7 +93,7 @@ class TestGetVisionModel: def test_returns_model_from_provider(self, mock_provider): with patch.dict(os.environ, {}, clear=True): s = Settings() - assert s.get_vision_model() == "gpt-4o" + assert s.get_vision_model("default") == "gpt-4o" @patch( "tale_shared.config.base._provider_vision_model", @@ -103,4 +103,4 @@ def test_missing_provider_raises(self, mock_provider): with patch.dict(os.environ, {}, clear=True): s = Settings() with pytest.raises(ValueError, match="No vision model"): - s.get_vision_model() + s.get_vision_model("default") diff --git a/services/rag/tests/test_document_helpers.py b/services/rag/tests/test_document_helpers.py index 512dc8cf27..5f2a710413 100644 --- a/services/rag/tests/test_document_helpers.py +++ b/services/rag/tests/test_document_helpers.py @@ -4,8 +4,8 @@ - _validate_file_extension: supported, unsupported, no extension - _parse_metadata: valid JSON, invalid JSON, non-dict JSON, None - SUPPORTED_EXTENSIONS: excludes legacy Office formats (.doc, .ppt, .xls) -- Settings.get_embedding_dimensions(): via provider files -- Settings.get_llm_config(): via provider files +- Settings.get_embedding_dimensions("default"): via provider files +- Settings.get_llm_config("default"): via provider files """ import os @@ -204,13 +204,13 @@ def _mock_embedding_model(): class TestGetEmbeddingDimensions: - """Settings.get_embedding_dimensions() from provider files.""" + """Settings.get_embedding_dimensions("default") from provider files.""" @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) def test_valid_dimensions(self, mock_provider): with patch.dict(os.environ, {}, clear=True): s = Settings() - assert s.get_embedding_dimensions() == 1536 + assert s.get_embedding_dimensions("default") == 1536 @patch( "tale_shared.config.base._provider_embedding_model", @@ -219,7 +219,7 @@ def test_valid_dimensions(self, mock_provider): def test_large_dimensions(self, mock_provider): with patch.dict(os.environ, {}, clear=True): s = Settings() - assert s.get_embedding_dimensions() == 3072 + assert s.get_embedding_dimensions("default") == 3072 @patch( "tale_shared.config.base._provider_embedding_model", @@ -229,18 +229,18 @@ def test_missing_provider_raises(self, mock_provider): with patch.dict(os.environ, {}, clear=True): s = Settings() with pytest.raises(ValueError, match="No embedding model"): - s.get_embedding_dimensions() + s.get_embedding_dimensions("default") class TestGetLlmConfig: - """Settings.get_llm_config() from provider files.""" + """Settings.get_llm_config("default") from provider files.""" @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @patch("tale_shared.config.base._provider_chat_model", return_value=_mock_chat_model()) def test_all_present_returns_valid_config(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert config["provider"] == "openai" assert config["api_key"] == "sk-test" assert config["base_url"] == "https://openrouter.ai/api/v1" @@ -256,7 +256,7 @@ def test_missing_chat_model_raises(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() with pytest.raises(ValueError, match="No chat model"): - s.get_llm_config() + s.get_llm_config("default") @patch( "tale_shared.config.base._provider_embedding_model", @@ -267,14 +267,14 @@ def test_missing_embedding_model_raises(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() with pytest.raises(ValueError, match="No embedding model"): - s.get_llm_config() + s.get_llm_config("default") @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @patch("tale_shared.config.base._provider_chat_model", return_value=_mock_chat_model()) def test_optional_max_tokens_included_when_set(self, mock_chat, mock_embed): with patch.dict(os.environ, {"RAG_OPENAI_MAX_TOKENS": "4096"}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert config["max_tokens"] == 4096 @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @@ -282,7 +282,7 @@ def test_optional_max_tokens_included_when_set(self, mock_chat, mock_embed): def test_optional_temperature_included_when_set(self, mock_chat, mock_embed): with patch.dict(os.environ, {"RAG_OPENAI_TEMPERATURE": "0.7"}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert config["temperature"] == pytest.approx(0.7) @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @@ -290,7 +290,7 @@ def test_optional_temperature_included_when_set(self, mock_chat, mock_embed): def test_max_tokens_omitted_when_not_set(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert "max_tokens" not in config @patch("tale_shared.config.base._provider_embedding_model", return_value=_mock_embedding_model()) @@ -298,5 +298,5 @@ def test_max_tokens_omitted_when_not_set(self, mock_chat, mock_embed): def test_temperature_omitted_when_not_set(self, mock_chat, mock_embed): with patch.dict(os.environ, {}, clear=True): s = Settings() - config = s.get_llm_config() + config = s.get_llm_config("default") assert "temperature" not in config diff --git a/services/rag/tests/test_rag_service.py b/services/rag/tests/test_rag_service.py index e80c594d84..e5b26bccb7 100644 --- a/services/rag/tests/test_rag_service.py +++ b/services/rag/tests/test_rag_service.py @@ -18,24 +18,52 @@ pytestmark = pytest.mark.asyncio +TEST_ORG = "test-org" + def _make_service(): """Create a RagService with all internal dependencies pre-mocked. - Bypasses initialize() by directly setting the internal state. + Bypasses initialize() by directly setting the internal state, and + pre-seeds the per-org client cache for `TEST_ORG` so tests don't + have to drive the lazy-init path. """ - from app.services.rag_service import RagService + from app.services.rag_service import RagService, _OrgClients service = RagService() service.initialized = True service._pool = MagicMock() - service._embedding_service = AsyncMock() - service._vision_client = MagicMock() - service._search_service = AsyncMock() - service._openai_client = AsyncMock() - service._llm_config = {} - service._vision_config = None - service._last_config_check = time.monotonic() + service._pinned_dims = 1536 + + embedding = AsyncMock() + embedding.dimensions = 1536 + openai_client = AsyncMock() + vision_client = MagicMock() + search_service = AsyncMock() + + service._org_clients[TEST_ORG] = _OrgClients( + llm_config={ + "model": "gpt-test", + "embedding_model": "embed-test", + "api_key": "k", + "base_url": "http://test", + "embedding_api_key": "k", + "embedding_base_url": "http://test", + }, + vision_config=None, + embedding_service=embedding, + openai_client=openai_client, + vision_client=vision_client, + search_service=search_service, + last_check=time.monotonic(), + ) + # Back-compat aliases for tests that grab the mocks directly off the + # service. Both names point at the SAME mock instance the per-org + # cache uses, so setup-then-assert via either attribute works. + service._search_service = search_service + service._openai_client = openai_client + service._embedding_service = embedding + service._vision_client = vision_client return service @@ -77,6 +105,7 @@ async def test_user_calls_index_document(self): "app.services.rag_service.index_document", new_callable=AsyncMock, return_value=index_result ) as mock_idx: result = await service.add_document( + TEST_ORG, b"content bytes", "doc-1", "report.pdf", @@ -99,6 +128,7 @@ async def test_skipped_returns_skipped(self): with patch("app.services.rag_service.index_document", new_callable=AsyncMock, return_value=index_result): result = await service.add_document( + TEST_ORG, b"content", "doc-skip", "file.txt", @@ -108,7 +138,12 @@ async def test_skipped_returns_skipped(self): assert result["skip_reason"] == "content_unchanged" async def test_initializes_if_not_initialized(self): - from app.services.rag_service import RagService + """`add_document` triggers `initialize()` (sets up the DB pool) + on the first call. Under the multi-org refactor, per-org client + construction is deferred even further (lazy on first call for + that org), so we pre-seed the cache to bypass _ensure_org_clients + and only verify the DB-pool initialize gate fires.""" + from app.services.rag_service import RagService, _OrgClients service = RagService() assert service.initialized is False @@ -117,13 +152,31 @@ async def test_initializes_if_not_initialized(self): def _fake_init(): service.initialized = True - service._last_config_check = time.monotonic() - service._llm_config = {} - service._vision_config = None + # Pre-seed per-org cache so the inner _ensure_org_clients + # call inside add_document doesn't try to read a real + # provider catalog from disk. + embedding = AsyncMock() + embedding.dimensions = 1536 + service._pinned_dims = 1536 + service._org_clients[TEST_ORG] = _OrgClients( + llm_config={ + "model": "gpt", + "embedding_model": "embed", + "api_key": "k", + "base_url": "u", + "embedding_api_key": "k", + "embedding_base_url": "u", + }, + vision_config=None, + embedding_service=embedding, + openai_client=AsyncMock(), + vision_client=MagicMock(), + search_service=AsyncMock(), + last_check=time.monotonic(), + ) mock_init.side_effect = _fake_init service._pool = MagicMock() - service._embedding_service = AsyncMock() with patch( "app.services.rag_service.index_document", @@ -136,7 +189,7 @@ def _fake_init(): "skip_reason": "x", }, ): - await service.add_document(b"x", "d", "f.txt") + await service.add_document(TEST_ORG, b"x", "d", "f.txt") mock_init.assert_awaited_once() @@ -156,7 +209,7 @@ async def test_delegates_to_search_service(self): with patch("app.services.rag_service.settings") as mock_settings: mock_settings.top_k = 10 mock_settings.similarity_threshold = 0.0 - results = await service.search("test query", file_ids=["doc-1"]) + results = await service.search(TEST_ORG, "test query", file_ids=["doc-1"]) assert len(results) == 2 service._search_service.search.assert_awaited_once_with( @@ -173,7 +226,7 @@ async def test_applies_similarity_threshold(self): with patch("app.services.rag_service.settings") as mock_settings: mock_settings.top_k = 10 mock_settings.similarity_threshold = 0.7 - await service.search("query") + await service.search(TEST_ORG, "query") # Threshold is now passed to search_service for vector pre-filtering service._search_service.search.assert_awaited_once_with( @@ -190,7 +243,7 @@ async def test_custom_top_k_overrides_settings(self): with patch("app.services.rag_service.settings") as mock_settings: mock_settings.top_k = 5 mock_settings.similarity_threshold = 0.0 - await service.search("query", top_k=20) + await service.search(TEST_ORG, "query", top_k=20) service._search_service.search.assert_awaited_once_with( "query", @@ -210,7 +263,7 @@ async def test_custom_threshold_overrides_settings(self): with patch("app.services.rag_service.settings") as mock_settings: mock_settings.top_k = 10 mock_settings.similarity_threshold = 0.9 - results = await service.search("query", similarity_threshold=0.3) + results = await service.search(TEST_ORG, "query", similarity_threshold=0.3) assert len(results) == 1 @@ -225,7 +278,7 @@ async def test_zero_threshold_returns_all(self): with patch("app.services.rag_service.settings") as mock_settings: mock_settings.top_k = 10 mock_settings.similarity_threshold = 0.0 - results = await service.search("query") + results = await service.search(TEST_ORG, "query") assert len(results) == 1 @@ -237,7 +290,7 @@ async def test_passes_file_ids(self): with patch("app.services.rag_service.settings") as mock_settings: mock_settings.top_k = 10 mock_settings.similarity_threshold = 0.0 - await service.search("q", file_ids=["doc-1", "doc-2"]) + await service.search(TEST_ORG, "q", file_ids=["doc-1", "doc-2"]) service._search_service.search.assert_awaited_once_with( "q", @@ -272,7 +325,7 @@ async def test_generates_response_with_search_results(self): patch("app.services.rag_service.settings") as mock_settings, ): mock_settings.get_llm_config.return_value = {"model": "gpt-4o-mini"} - result = await service.generate("What is X?", file_ids=["doc-1"]) + result = await service.generate(TEST_ORG, "What is X?", file_ids=["doc-1"]) assert result["success"] is True assert result["response"] == "Generated answer based on context." @@ -288,7 +341,7 @@ async def test_empty_search_results_returns_no_info_message(self): new_callable=AsyncMock, return_value=[], ): - result = await service.generate("Unknown topic?") + result = await service.generate(TEST_ORG, "Unknown topic?") assert result["success"] is False assert "No relevant information" in result["response"] @@ -316,7 +369,7 @@ async def test_llm_receives_system_prompt_and_context(self): patch("app.services.rag_service.settings") as mock_settings, ): mock_settings.get_llm_config.return_value = {"model": "test-model"} - await service.generate("What?") + await service.generate(TEST_ORG, "What?") create_call = service._openai_client.chat.completions.create messages = create_call.call_args[1]["messages"] @@ -344,7 +397,7 @@ async def test_empty_llm_choices_raises(self): ): mock_settings.get_llm_config.return_value = {"model": "m"} with pytest.raises(ValueError, match="empty choices"): - await service.generate("question") + await service.generate(TEST_ORG, "question") async def test_context_truncated_at_max_chars(self): from app.services.rag_service import RAG_MAX_CONTEXT_CHARS @@ -364,7 +417,7 @@ async def test_context_truncated_at_max_chars(self): patch("app.services.rag_service.settings") as mock_settings, ): mock_settings.get_llm_config.return_value = {"model": "m"} - result = await service.generate("query") + result = await service.generate(TEST_ORG, "query") create_call = service._openai_client.chat.completions.create user_msg = create_call.call_args[1]["messages"][1]["content"] @@ -374,7 +427,7 @@ async def test_passes_file_ids_to_search(self): service = _make_service() with patch.object(service, "search", new_callable=AsyncMock, return_value=[]) as mock_search: - await service.generate("q", file_ids=["doc-1"]) + await service.generate(TEST_ORG, "q", file_ids=["doc-1"]) mock_search.assert_awaited_once() call_kwargs = mock_search.call_args[1] @@ -399,7 +452,7 @@ async def test_none_content_from_llm_returns_empty_string(self): patch("app.services.rag_service.settings") as mock_settings, ): mock_settings.get_llm_config.return_value = {"model": "m"} - result = await service.generate("q") + result = await service.generate(TEST_ORG, "q") assert result["response"] == "" assert result["success"] is True diff --git a/tools/cli/src/lib/actions/deploy.ts b/tools/cli/src/lib/actions/deploy.ts index 6f235fe576..3166dde853 100644 --- a/tools/cli/src/lib/actions/deploy.ts +++ b/tools/cli/src/lib/actions/deploy.ts @@ -757,9 +757,15 @@ async function syncProjectFiles( `${containerName}:/app/data/`, ]); + // docker cp is non-atomic across the multi-org staging dir: a failure + // here means a partial push may have landed in the container. Throw + // so the outer `deployToContainer` flow exits non-zero instead of + // printing `success('Deployment complete!')` over a half-pushed state. if (!result.success) { - logger.error(`Failed to override config: ${result.stderr}`); - return; + throw new Error( + `--override docker cp into ${containerName} failed: ${result.stderr?.trim() ?? '(no stderr)'}. ` + + `Partial push possible; re-run --override after addressing the cause.`, + ); } // docker cp copies files as root — fix ownership so the app user can write @@ -772,6 +778,9 @@ async function syncProjectFiles( `/app/data/`, ]); if (!chownResult.success) { + // Ownership fix failure isn't necessarily a push failure (files + // landed, just wrong owner), but warn loudly — the app user won't + // be able to write to its own data tree. logger.warn( `Failed to fix ownership on /app/data: ${chownResult.stderr}`, ); @@ -799,23 +808,59 @@ async function syncProjectFiles( // // All directory exclusions prune the entire subtree; `fs.cp` recurses past // the filter for any directory the filter returned `true` for. Root-level -// non-org junk (`.tale/`, `.git/`, `.env`, IDE configs, dotfiles, etc.) is -// excluded one level up — only org-shaped dirs from `findOrgDirs` reach -// this function — so the filter here only handles depth-1+ skips. +// non-org junk is excluded one level up, BUT the same kinds of junk can +// also appear INSIDE an org dir (e.g. operator commits their workspace as +// a git repo → `default/.git/`; macOS sprinkles `default/.DS_Store`; +// someone runs `npm i` in their providers folder → `default/node_modules/`). +// Filter them here so they never reach `/app/data//`. +const STAGED_DOTFILE_DENYLIST = new Set([ + // Belt-and-suspenders for things we already filter via startsWith('.'), + // but listing them makes intent explicit. + '.git', + '.tale', + '.vscode', + '.idea', + '.DS_Store', +]); +const STAGED_NAME_DENYLIST = new Set(['node_modules', '__pycache__']); async function stageOrgIntoDir(srcDir: string, destDir: string): Promise { await cp(srcDir, destDir, { recursive: true, filter: (src) => { const base = src.split(/[\\/]/).pop() ?? ''; + // `.history` and `*.secrets.json` are content-preserving filters by + // design — survive overwrites on the server side, so we never push + // them. Dotfiles (including `.git/`, `.DS_Store`, editor swap files, + // etc.) are operator-host junk that should never reach the data + // tree. node_modules / __pycache__ catch any non-dotfile package-mgr + // litter inside an org dir. if (base === '.history') return false; if (base.endsWith('.secrets.json')) return false; + if (base.startsWith('.')) return false; + if (STAGED_DOTFILE_DENYLIST.has(base)) return false; + if (STAGED_NAME_DENYLIST.has(base)) return false; // lstat is sync here because fs.cp's filter is sync. Symlinks at // any depth are skipped; missing entries (ENOENT) also skip rather // than throw — fs.cp re-races stat() so any race is benign. try { const info = lstatSync(src); if (info.isSymbolicLink()) return false; - } catch { + } catch (err: unknown) { + // ENOENT on a sibling stat is benign; anything else is worth a + // warning so a real permission/IO problem doesn't silently drop + // a file. + const code = + err !== null && + typeof err === 'object' && + 'code' in err && + typeof err.code === 'string' + ? err.code + : undefined; + if (code !== 'ENOENT') { + console.warn( + `[deploy.stageOrgIntoDir] lstat ${src} failed (${code ?? 'unknown'}); skipping`, + ); + } return false; } return true; diff --git a/tools/cli/src/lib/actions/init.ts b/tools/cli/src/lib/actions/init.ts index 4cffcbdc8c..c69c23fb67 100644 --- a/tools/cli/src/lib/actions/init.ts +++ b/tools/cli/src/lib/actions/init.ts @@ -292,7 +292,9 @@ export async function init(options: InitOptions): Promise { { apiKey: envResult.openrouterKey }, envResult.agePublicKey, ); - await writeFile(secretsPath, encrypted); + // 0600: SOPS-encrypted, but least-privilege convention for any + // `*.secrets.*` file. Limits readability to the owner. + await writeFile(secretsPath, encrypted, { mode: 0o600 }); logger.success( 'Encrypted provider API key into default/providers/openrouter.secrets.json', ); @@ -353,7 +355,21 @@ async function detectTaleProjectFiles(dir: string): Promise { try { const entries = await readdir(dir); return entries.filter((entry) => TALE_PROJECT_MARKERS.has(entry)); - } catch { + } catch (err: unknown) { + // Most common case: target dir does not exist yet (`tale init` in a + // fresh empty dir, or a path the operator just typed). Treat as + // empty — non-ENOENT errors are worth a warning so a perms issue + // doesn't masquerade as a clean slate. + const code = + err !== null && + typeof err === 'object' && + 'code' in err && + typeof err.code === 'string' + ? err.code + : undefined; + if (code !== 'ENOENT') { + console.warn(`[init.detectTaleProjectFiles] readdir ${dir} failed:`, err); + } return []; } } diff --git a/tools/cli/src/lib/actions/reseed-all-orgs.ts b/tools/cli/src/lib/actions/reseed-all-orgs.ts index 170c819a19..a3e9699001 100644 --- a/tools/cli/src/lib/actions/reseed-all-orgs.ts +++ b/tools/cli/src/lib/actions/reseed-all-orgs.ts @@ -5,12 +5,20 @@ * scripts/2026-03-28-migrate-convex-data.sh:120-131 (source env.sh, * ensure_instance_secret, compute admin key inline, run convex CLI). * - * Destructive: factory-reseeds every org's non-secret config from the - * builtin catalog. `*.secrets.json` files and `.history/` trails are - * preserved server-side by `scaffoldNewOrganization({override:true})`. - * Uploaded branding `images/` survive (branding is treated as a tree - * with per-file overwrite). Everything else under each `//` - * is overwritten with builtin content. + * Destructive: factory-reseeds every registered org's non-secret config + * from the builtin catalog. `*.secrets.json` files and `.history/` trails + * are preserved server-side by `scaffoldNewOrganization({override:true, + * strict:true})`. Uploaded branding `images/` survive (branding is + * treated as a tree with per-file overwrite). Everything else under each + * `//` is overwritten with builtin content. + * + * Filesystem-only org subtrees (no Better Auth row) are NOT touched — + * `--override-all` means "all registered orgs", not "every dir on disk". + * + * Failure semantics: the convex-side action throws on any per-org failure + * (so `bunx convex run` exits non-zero), which surfaces as + * `result.success === false` here and is converted to a CLI throw with + * the per-org detail attached. */ import { confirm } from '../../utils/confirm'; @@ -29,6 +37,12 @@ export interface ReseedAllOrgsOptions { * `INSTANCE_SECRET` is guaranteed populated and the admin key derivation * matches the entrypoint's own runtime computation. * + * `--no-push` skips a redundant push step (we're calling an existing + * deployed action). The trailing `grep -v` strips `bunx convex run`'s + * decorative banner output ("Admin key", "📋", "✅ Admin", separators, + * blank lines, etc.) so the final stdout is the action's JSON return + * value alone — parseable in TypeScript. + * * Runtime workdir is `/app` (services/platform/Dockerfile sets * `WORKDIR /app`; flattens services/platform/{convex,lib,env.sh,…} into * `/app/`). No `cd /app/services/platform` — that path does not exist @@ -44,15 +58,60 @@ cd /app HOME=/home/app timeout 1800 bunx convex run \\ organizations/reseed_all_orgs:reseedAllOrgsFromBuiltin \\ --url "\${CONVEX_URL:-http://convex:3210}" \\ - --admin-key "$ADMIN_KEY" + --admin-key "$ADMIN_KEY" \\ + --no-push 2>&1 \\ + | grep -v "^Admin key\\|^📋\\|^✅ Admin\\|^━\\|^🌐\\|^$\\|Steps:\\|Open\\|Enter\\|Paste" `; const CONFIRM_MESSAGE = - '--override-all will factory-reset every org from the builtin catalog. ' + + '--override-all will factory-reset every registered org from the builtin catalog. ' + '*.secrets.json files, .history/ trails, and uploaded branding/images/ are preserved; ' + 'all other config (model lists, agents, workflows, skills, integrations, branding.json, retention.json) ' + 'is overwritten. Proceed?'; +type ReseedResult = { + total: number; + succeeded: number; + failed: number; + results: Array< + | { slug: string; status: 'ok' } + | { slug: string; status: 'error'; error: string } + >; +}; + +/** + * Extract the last JSON object from a stream of mixed-output stdout. + * `bunx convex run` prints `null` for void-returning actions or the + * action's return value for value-returning ones. Either way, the JSON + * payload is on its own line(s) at the very end. + */ +function parseTrailingJson(stdout: string): ReseedResult | null { + const trimmed = stdout.trim(); + if (!trimmed) return null; + + // Walk backwards from the end looking for the start of a JSON value. + // The action returns an object, so look for the matching `{`. + const lastBrace = trimmed.lastIndexOf('{'); + if (lastBrace < 0) return null; + + try { + const parsed = JSON.parse(trimmed.slice(lastBrace)); + if ( + parsed && + typeof parsed === 'object' && + typeof parsed.total === 'number' && + typeof parsed.succeeded === 'number' && + typeof parsed.failed === 'number' && + Array.isArray(parsed.results) + ) { + return parsed as ReseedResult; + } + return null; + } catch { + return null; + } +} + export async function reseedAllOrgsFromBuiltin( options: ReseedAllOrgsOptions, ): Promise { @@ -78,16 +137,16 @@ export async function reseedAllOrgsFromBuiltin( if (dryRun) { logger.blank(); logger.info('[DRY-RUN] Would run:'); - logger.info(` docker exec ${container} bash -lc ''`); - logger.info('Reseed script body (would be piped into bash):'); + logger.info(` docker exec -i ${container} bash -s <<'EOF'`); for (const line of RESEED_SCRIPT.split('\n')) { logger.info(` ${line}`); } + logger.info(` EOF`); return; } logger.blank(); - logger.step('Reseeding builtin catalog into all orgs...'); + logger.step('Reseeding builtin catalog into all registered orgs...'); // Pipe the script via stdin instead of embedding in argv — avoids shell // escaping pitfalls and keeps the script source readable. @@ -95,21 +154,33 @@ export async function reseedAllOrgsFromBuiltin( stdin: RESEED_SCRIPT, }); + // The convex action throws on any per-org failure, which propagates to + // `bunx convex run`'s exit code, which propagates to `docker exec`'s + // exit code, which becomes `result.success === false` here. if (!result.success) { + if (result.stdout) { + logger.info(result.stdout.trim()); + } if (result.stderr) { logger.error(result.stderr.trim()); } throw new Error( - `--override-all failed (docker exec into ${container} returned non-zero).`, + `--override-all failed: reseed action raised in ${container}. ` + + `Per-org detail above; partial state on disk — re-run --override-all ` + + `after addressing failures (the action is idempotent).`, ); } - // The action's return value is printed to stdout by `bunx convex run`. - if (result.stdout) { - const trimmed = result.stdout.trim(); - if (trimmed) { - logger.info(trimmed); - } + // All orgs succeeded. Parse and summarize. + const parsed = parseTrailingJson(result.stdout); + if (parsed) { + logger.info( + `Reseeded ${parsed.succeeded}/${parsed.total} orgs from builtin catalog.`, + ); + } else if (result.stdout) { + // Couldn't parse — surface raw stdout so the operator isn't flying + // blind. Should be rare given the grep strip above. + logger.info(result.stdout.trim()); } logger.success('Reseed complete.'); diff --git a/tools/cli/src/lib/actions/start.ts b/tools/cli/src/lib/actions/start.ts index 47cdecdf9a..2f86fa701e 100644 --- a/tools/cli/src/lib/actions/start.ts +++ b/tools/cli/src/lib/actions/start.ts @@ -147,6 +147,38 @@ export async function start(options: StartOptions): Promise { } } + // Detect legacy flat-layout dirs at the project root (`agents/`, + // `workflows/`, …). Under the org-first layout these belong under + // `default//` instead — the platform's resolvers won't read + // anything at the old paths. Surface the runbook so the operator + // doesn't boot into a "nothing's working" state. + const LEGACY_FLAT_DOMAINS = [ + 'agents', + 'workflows', + 'integrations', + 'branding', + 'providers', + 'skills', + ]; + const legacyDirsFound = LEGACY_FLAT_DOMAINS.filter((d) => + existsSync(join(projectDir, d)), + ); + if (legacyDirsFound.length > 0) { + logger.warn( + `Legacy flat layout detected at project root: ${legacyDirsFound.map((d) => `${d}/`).join(', ')}`, + ); + logger.info( + ' The org-first layout expects these under `default//` (or another org subtree).', + ); + logger.info( + ' Migrate with: `tale migrate config-layout` then `tale deploy --override-all -y`.', + ); + logger.info( + ' See docs//self-hosted/operate/upgrades.md for the full runbook.', + ); + logger.blank(); + } + await assertDockerAvailable(); // Resolve project ID from tale.json before any Docker-resource naming. @@ -242,10 +274,10 @@ export async function start(options: StartOptions): Promise { } logger.blank(); logger.info( - 'Agents, workflows, integrations, and branding are bind-mounted from your project.', + 'Per-org config (`/agents/`, `/workflows/`, `/integrations/`, `/branding/`, `/providers/`, `/skills/`)', ); logger.info( - 'Edits to agents/, workflows/, integrations/, and branding/ will auto-refresh the browser.', + 'is bind-mounted from your project. Edits to those paths auto-refresh the browser.', ); logger.blank(); logger.info(`Stop with: docker compose -p ${getProjectId()}-dev down`); diff --git a/tools/cli/src/lib/actions/update.ts b/tools/cli/src/lib/actions/update.ts index 11f4ad7fbb..8d4161d363 100644 --- a/tools/cli/src/lib/actions/update.ts +++ b/tools/cli/src/lib/actions/update.ts @@ -73,43 +73,84 @@ export async function update(options: UpdateOptions): Promise { await fetchReference(projectDir); } - // Regenerate AI rules files + // Read existing checksums BEFORE rewriting rules so we can apply the + // same modified/unmodified policy as example files. + const oldChecksums = await readChecksums(projectDir); + const oldFiles = oldChecksums?.files ?? {}; + + // Regenerate AI rules files. Same protection policy as examples: + // - new file → write + // - deleted by user → skip + // - unmodified-since-last-update → overwrite + // - locally modified + no --force → keep, warn + // - locally modified + --force → overwrite logger.step(`${prefix}Updating AI rules files...`); - if (!options.dryRun) { - const rulesFiles = generateAllRules(); - for (const { relativePath, content } of rulesFiles) { - const destPath = join(projectDir, relativePath); - await mkdir(dirname(destPath), { recursive: true }); - await writeFile(destPath, content); + const rulesFiles = generateAllRules(); + const rulesUpdates: Record = {}; + for (const { relativePath, content } of rulesFiles) { + const destPath = join(projectDir, relativePath); + const newHash = computeContentHash(content); + const oldHash = oldFiles[relativePath]; + + if (!oldHash) { + logger.info(`${prefix}+ ${relativePath} (new)`); + if (!options.dryRun) { + await mkdir(dirname(destPath), { recursive: true }); + await writeFile(destPath, content); + } + rulesUpdates[relativePath] = newHash; + } else if (!existsSync(destPath)) { + logger.info(`${prefix}- ${relativePath} (deleted by user, skipping)`); + } else { + const currentHash = await computeFileHash(destPath); + if (currentHash === oldHash) { + logger.info(`${prefix}~ ${relativePath} (updated)`); + if (!options.dryRun) { + await writeFile(destPath, content); + } + rulesUpdates[relativePath] = newHash; + } else if (options.force) { + logger.warn( + `${prefix}~ ${relativePath} (overwritten, was locally modified)`, + ); + if (!options.dryRun) { + await writeFile(destPath, content); + } + rulesUpdates[relativePath] = newHash; + } else { + logger.warn( + `${prefix}⚠ Skipped ${relativePath} (locally modified). Re-run with --force to overwrite.`, + ); + rulesUpdates[relativePath] = oldHash; + } } } - // Read existing checksums - const oldChecksums = await readChecksums(projectDir); - const oldFiles = oldChecksums?.files ?? {}; - - // Get new example files from embedded data + // Get new example files from embedded data. Paths land under + // `default//...` to match the org-first layout that + // `tale init` scaffolds. const newExampleFiles = new Map(); + const DEFAULT_ORG = 'default'; for (const [relPath, content] of getEmbeddedExamples('agents')) { - newExampleFiles.set(join('agents', relPath), content); + newExampleFiles.set(join(DEFAULT_ORG, 'agents', relPath), content); } for (const [relPath, content] of getEmbeddedExamples('workflows')) { - newExampleFiles.set(join('workflows', relPath), content); + newExampleFiles.set(join(DEFAULT_ORG, 'workflows', relPath), content); } for (const [relPath, content] of getEmbeddedExamples('integrations')) { - newExampleFiles.set(join('integrations', relPath), content); + newExampleFiles.set(join(DEFAULT_ORG, 'integrations', relPath), content); } for (const [relPath, content] of getEmbeddedExamples('branding')) { - newExampleFiles.set(join('branding', relPath), content); + newExampleFiles.set(join(DEFAULT_ORG, 'branding', relPath), content); } for (const [relPath, content] of getEmbeddedExamples('providers')) { if (!relPath.endsWith('.secrets.json')) { - newExampleFiles.set(join('providers', relPath), content); + newExampleFiles.set(join(DEFAULT_ORG, 'providers', relPath), content); } } for (const [relPath, content] of getEmbeddedExamples('skills')) { - newExampleFiles.set(join('skills', relPath), content); + newExampleFiles.set(join(DEFAULT_ORG, 'skills', relPath), content); } // Classify and apply changes @@ -120,7 +161,9 @@ export async function update(options: UpdateOptions): Promise { removed: [], }; - const newChecksumFiles: Record = {}; + // Seed checksum map with the rules-file decisions so the final write + // includes their hashes (so future updates can detect local edits). + const newChecksumFiles: Record = { ...rulesUpdates }; for (const [relPath, content] of newExampleFiles) { const destPath = join(projectDir, relPath); diff --git a/tools/cli/src/lib/compose/generators/constants.ts b/tools/cli/src/lib/compose/generators/constants.ts index 616e81ae61..a192ff98df 100644 --- a/tools/cli/src/lib/compose/generators/constants.ts +++ b/tools/cli/src/lib/compose/generators/constants.ts @@ -5,8 +5,12 @@ export const DEV_VOLUME_NAMES = [ 'db-data', 'db-backup', 'rag-data', - // Retained for legacy migration (used by `tale migrate split-convex` to - // locate pre-split data). Not mounted by any container after Phase 2. + // Legacy: pre-0.3.0 deployments split platform and convex data; today + // everything lives in `convex-data`. The volume is retained as an + // unused stub so the detect() probe in start.ts can identify pre-0.3.0 + // deployments and produce a coherent diff. Operators can delete it + // by hand once they're past the upgrade window. Do not remove this + // entry without coordinating with that detect() heuristic. 'platform-data', 'convex-data', 'caddy-data', @@ -18,9 +22,7 @@ export const DEV_VOLUME_NAMES = [ // Every volume declared as `external: true` in the stateful or color compose // must appear here so `ensureVolumes` pre-creates it. export const REQUIRED_VOLUMES = [ - // platform-data is kept for upgrade scenarios where split-convex migrates - // its contents into convex-data; on fresh installs it is an unused empty - // volume. Removing it would break detect() for pre-0.3.0 deployments. + // See DEV_VOLUME_NAMES for the `platform-data` rationale. 'platform-data', 'convex-data', 'caddy-data', diff --git a/tools/cli/src/lib/compose/generators/generate-dev-compose.ts b/tools/cli/src/lib/compose/generators/generate-dev-compose.ts index 83e5344940..73f0c11913 100644 --- a/tools/cli/src/lib/compose/generators/generate-dev-compose.ts +++ b/tools/cli/src/lib/compose/generators/generate-dev-compose.ts @@ -1,4 +1,4 @@ -import { existsSync } from 'node:fs'; +import { existsSync, readdirSync, statSync } from 'node:fs'; import { join } from 'node:path'; import { stringify } from 'yaml'; @@ -17,14 +17,18 @@ import type { ComposeConfig, ServiceConfig } from '../types'; import { DEV_VOLUME_NAMES } from './constants'; const DEV_COLOR = 'blue' as const; -/** Project-root subdirs that `tale init` populates from embedded examples. */ -const HOST_CONFIG_DIRS = [ +/** Domain dirs that the org-first layout uses under `//`. */ +const HOST_DOMAIN_DIRS = [ 'agents', 'workflows', 'integrations', 'branding', 'providers', + 'skills', ] as const; +/** Org-slug regex aligned with the platform-side validator. Refuses dotfiles + * and any non-org-shaped dir at the project root (`.tale`, `.git`, etc.). */ +const ORG_SLUG_RE = /^[a-z0-9][a-z0-9_-]{0,63}$/; interface DevComposeOptions { /** Project root, used to verify host bind-mount sources exist before @@ -33,23 +37,59 @@ interface DevComposeOptions { projectDir?: string; } -/** Return host bind-mount fragments (e.g. './agents:/app/data/agents{ro}') - * only for directories that actually exist on the host, with one warning - * per missing directory so the operator can fix it without docker emitting - * a confusing 'no such file or directory' error. */ +/** Discover org subdirectories (`//`) by enumerating the + * project root. Every direct subdir whose name matches the org-slug regex + * is an org. `tale init` always creates at least `default/`. */ +function findOrgDirs(projectDir: string): string[] { + let entries: string[]; + try { + entries = readdirSync(projectDir); + } catch { + return []; + } + const orgs: string[] = []; + for (const name of entries) { + if (!ORG_SLUG_RE.test(name)) continue; + let stats: ReturnType; + try { + stats = statSync(join(projectDir, name)); + } catch { + continue; + } + if (!stats.isDirectory()) continue; + orgs.push(name); + } + return orgs; +} + +/** Return host bind-mount fragments for the org-first layout. + * + * For each org `//`, emits one mount per domain dir that + * actually exists: `.//://{ro}`. + * Missing per-domain dirs are skipped silently (operators don't have to + * populate every domain), but a `tale init` workspace with no org dirs + * at all logs a single warning. */ function existingHostMounts( projectDir: string, containerBase: string, suffix = '', ): string[] { + const orgs = findOrgDirs(projectDir); + if (orgs.length === 0) { + logger.warn( + `No org directories found under ${projectDir}. Container will fall back to convex-data volume contents — host edits will not hot-reload.`, + ); + return []; + } const mounts: string[] = []; - for (const dir of HOST_CONFIG_DIRS) { - if (existsSync(join(projectDir, dir))) { - mounts.push(`./${dir}:${containerBase}/${dir}${suffix}`); - } else { - logger.warn( - `Skipping host bind mount for ./${dir} (directory not found in project root). Container will fall back to convex-data volume contents.`, - ); + for (const org of orgs) { + for (const domain of HOST_DOMAIN_DIRS) { + const src = join(projectDir, org, domain); + if (existsSync(src)) { + mounts.push( + `./${org}/${domain}:${containerBase}/${org}/${domain}${suffix}`, + ); + } } } return mounts; @@ -106,19 +146,18 @@ export function generateDevCompose( convex: { condition: 'service_healthy' }, }; - const providersBindMount = existsSync(join(projectDir, 'providers')) - ? './providers:/app/platform-config/providers:ro' - : null; - - // RAG/crawler need convex-data:/app/platform-config:ro for non-provider - // config (integrations, branding, …). The providers bind mount is a more - // specific path and shadows just providers/ for host-edit hot reload. + // RAG/crawler need convex-data:/app/platform-config:ro for per-org + // provider config (and integrations, branding, …). The org-first + // layout has paths like `default/providers/foo.json`, all under one + // root, so the previous standalone `./providers:/app/platform-config/providers:ro` + // shadow is no longer needed — the per-org bind mounts below cover + // host-edit hot reload for every org's provider catalog. const rag = createRagService(config, DEV_COLOR); rag.container_name = `${getProjectId()}-rag`; rag.volumes = [ 'rag-data:/app/data', 'convex-data:/app/platform-config:ro', - ...(providersBindMount ? [providersBindMount] : []), + ...existingHostMounts(projectDir, '/app/platform-config', ':ro'), ]; const crawler = createCrawlerService(config, DEV_COLOR); @@ -126,7 +165,7 @@ export function generateDevCompose( crawler.volumes = [ 'crawler-data:/app/data', 'convex-data:/app/platform-config:ro', - ...(providersBindMount ? [providersBindMount] : []), + ...existingHostMounts(projectDir, '/app/platform-config', ':ro'), ]; const proxy = createProxyService(config, hostAlias); diff --git a/tools/cli/src/lib/migrate-config-layout/script.sh b/tools/cli/src/lib/migrate-config-layout/script.sh index 40f2f9a850..e69cf4c1e2 100644 --- a/tools/cli/src/lib/migrate-config-layout/script.sh +++ b/tools/cli/src/lib/migrate-config-layout/script.sh @@ -18,7 +18,7 @@ # image, old code paths still active). cp leaves old paths in place so # old code keeps reading providers correctly until the operator runs # `tale deploy --override-all -y` to recreate convex with the new code. -set -eo pipefail +set -euo pipefail DRY_RUN=0 CLEANUP_OLD=0 @@ -30,7 +30,13 @@ for arg in "$@"; do esac done +# Defense in depth: `set -u` already aborts on unset $DATA, but ${VAR:?…} +# gives a clearer error message and won't be defeated by a future `set +# +u` somewhere downstream. Critical because some branches below build +# absolute paths from $DATA and rm them — a silent empty would operate +# from the container's filesystem root. DATA="${TALE_CONFIG_DIR:-/app/data}" +: "${DATA:?DATA must be a non-empty absolute path}" APP_UID=1001 APP_GID=1001 diff --git a/tools/cli/src/lib/rules/content.ts b/tools/cli/src/lib/rules/content.ts index fd08762ad3..a9d592438e 100644 --- a/tools/cli/src/lib/rules/content.ts +++ b/tools/cli/src/lib/rules/content.ts @@ -1,32 +1,57 @@ const RULES_CONTENT = `# Tale Project -This is a Tale project. Edit configs in \`agents/\`, \`workflows/\`, \`integrations/\`, and \`branding/\`. +This is a Tale project. Config is namespaced **per organization** under +\`//\`, with \`default\` as the canonical (and only required) org +on a fresh \`tale init\`. Multi-org deployments add sibling subtrees +(\`acme/\`, \`globex/\`, …) with the same internal shape. ## Project structure \`\`\` -agents/ — Agent JSON configs (one file per agent) -workflows/ — Workflow JSON configs (organized by category subdirectories) -integrations/ — Integration directories (config.json + connector.ts + icon.svg each) -branding/ — Branding config (branding.json + images/) -.tale/reference/ — Read-only implementation source code, read before creating or editing configs +default/ — Canonical/template org (created by 'tale init') + agents/ — Agent JSON configs (one file per agent) + workflows/ — Workflow JSON configs (organized by category) + integrations/ — Integration bundles (config.json + connector.ts + icon.svg) + branding/ — Branding config (branding.json + images/) + providers/ — LLM provider configs (and *.secrets.json sidecars) + skills/ — Skill bundles (per-skill subdirs) + retention.json — Per-org data-retention overrides +/ — Same shape; one tree per registered org +.tale/reference/ — Read-only implementation source code (read before + creating or editing configs) \`\`\` ## Working with configs -Before creating or editing any config, read the relevant schemas and implementation code in \`.tale/reference/\` to understand the valid structure, fields, and constraints. Use existing config files in the project as examples. +Before creating or editing any config, read the relevant schemas and +implementation code in \`.tale/reference/\` to understand the valid +structure, fields, and constraints. Use existing config files in the +project as examples. ## How modules connect -- Agents can simultaneously bind integrations (\`integrationBindings\`), delegate to other agents (\`delegates\`), and attach workflows (\`workflows\`) -- Workflows use integration operations within their steps and can be triggered by agents -- Check existing configs to understand available bindings before creating new ones +- Agents can simultaneously bind integrations (\`integrationBindings\`), + delegate to other agents (\`delegates\`), and attach workflows + (\`workflows\`) +- Workflows use integration operations within their steps and can be + triggered by agents +- Check existing configs to understand available bindings before creating + new ones ## Naming conventions +- Org slug (top-level directory name): \`[a-z0-9][a-z0-9_-]{0,63}\` (or + the literal \`default\`) - Agent filenames: \`[a-z0-9][a-z0-9_-]*\\.json\` - Workflow step slugs: \`[a-z0-9][a-z0-9_-]*\` - Integration directory names: lowercase alphanumeric with hyphens/underscores + +## Secrets + +\`*.secrets.json\` sidecars (e.g. \`providers/openrouter.secrets.json\`) +are SOPS-encrypted and gitignored. Never commit them; never include them +in PR diffs. The repo's root \`.gitignore\` covers \`**/*.secrets.json\` +and \`**/.history/\` at all depths. `; export function buildRulesContent(): string { diff --git a/tools/cli/src/lib/rules/generators.ts b/tools/cli/src/lib/rules/generators.ts index 377ff7734c..77af35c73a 100644 --- a/tools/cli/src/lib/rules/generators.ts +++ b/tools/cli/src/lib/rules/generators.ts @@ -6,10 +6,13 @@ interface RulesFile { } function buildCursorMdc(content: string): string { + // Globs match the org-first layout: any direct subdir of the project + // root that contains the canonical domain dirs. Covers `default/` and + // any additional org subtree (`acme/`, etc.) without listing each. const frontmatter = [ '---', 'description: Tale project configuration rules', - 'globs: agents/**,workflows/**,integrations/**,branding/**', + 'globs: */agents/**,*/workflows/**,*/integrations/**,*/branding/**,*/providers/**,*/skills/**,*/retention.json', '---', '', ].join('\n'); From 2d421ec71618bac70bf7eb444919d23657761d99 Mon Sep 17 00:00:00 2001 From: larryro <371767072@qq.com> Date: Thu, 28 May 2026 10:33:59 +0800 Subject: [PATCH 03/41] fix(crawler,platform,rag,cli): close P0 gaps from org-aware review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-round review of the org-first refactor surfaced 10 P0 issues where the org-aware line only finished header propagation and left the data layer, error layer, reserved-name layer, and caller-update layer incomplete. This commit closes all 10 without expanding scope to P1. Crawler - Add `website_org_memberships` junction table + migration with backfill to `default`. websites/website_urls/chunks/page_paragraph_hashes remain deployment-shared content; the per-org boundary lives in the new table. Delete is ref-counted — last membership purges the website, others only drop the row. - `_fts_search` / `_vector_search` filter by EXISTS on the membership table so org A can't see chunks from a domain only org B added. - Scheduler binds `set_active_org()` to each website's oldest member per-task instead of hard-coding `default`. - Re-add boot-time `ALTER TABLE chunks ALTER COLUMN embedding TYPE vector(N)` pin (resolved from default-org provider catalog); fail-loudly on missing provider, atomic pool rollback on failure. Platform - New `UpstreamHttpError` typed wrapper with status/retryable/ safeMessage/sanitized body snippet; replace 8 raw `errorText` embeds across rag_action, rag_search_tool, fetch_document_*, upload_file_direct, delete_document. - Reserve `'default'` slug in `beforeCreateOrganization` (with first-run seed exception via `betterAuth.organization` count), in zod refine on the create-org form, and narrow `isCallerAdmin` to admins of the `default` org (branding owner). - config-watcher drops the `.endsWith('.json')` early gate so SKILL.md / scripts/*.py invalidate skill queries as the doc comment promises; per-domain extension filters inside `parseConfigChange`; 100ms tail-debounce per (type, org, slug) key prevents SSE storms during bulk migrations. - Thread `organizationId` + `x-tale-org` header through 10+ crawler callers that previously hit the now-globally-required dependency without the header: fetch_and_extract, websites/internal_actions (8 sites), file_metadata/internal_actions, apply/extract_docx, generate_docx, generate_document, crawler_action (3 sites). CLI - deploy.ts: move success log after sync + reseed; legacy-flat-layout detection now throws with `tale migrate config-layout` guidance and runs at deploy entry (not just under --override); --override-all implies forceRecreate so the reseed targets the new binary. - start.ts: pass `projectDir` to `generateDevCompose` so running from a subdirectory finds the right org dirs. RAG - /config no longer 500s — drop the per-org `get_llm_config()` call that the multi-org refactor made impossible from an org-less health endpoint; remove the corresponding fields from ConfigResponse. Misc - Remove Phase 2 (renameOrgSlug) from the dated convex-data migration script — the underlying Convex function was deleted on the parent refactor and any re-run would fail. Tests: new `test_website_membership.py` (9 cases), updated `test_websites_router.py` for ref-counted delete + first-membership trigger semantics, `test_database.py` rewritten around the new pin contract, new `upstream_http_error.test.ts` for the typed wrapper. Adjusted 4 existing platform test files whose assertions hard-coded the old raw-body error message format. `bun run check` green (70932 platform + 481 crawler tests + RAG suite). --- scripts/2026-03-28-migrate-convex-data.sh | 64 +---- services/crawler/app/routers/pages.py | 21 ++ services/crawler/app/routers/websites.py | 57 +++-- services/crawler/app/services/database.py | 60 +++-- .../crawler/app/services/pg_website_store.py | 150 ++++++++++-- services/crawler/app/services/scheduler.py | 23 +- .../crawler/app/services/search_service.py | 82 +++++-- ...0528000000_add_website_org_memberships.sql | 36 +++ services/crawler/tests/conftest.py | 30 ++- services/crawler/tests/test_database.py | 134 ++++------ .../crawler/tests/test_website_membership.py | 156 ++++++++++++ .../crawler/tests/test_websites_router.py | 229 ++++++------------ .../components/organization-form.tsx | 12 + services/platform/convex/_generated/api.d.ts | 2 + .../documents/document_retrieve_tool.test.ts | 2 +- .../fetch_document_comparison.test.ts | 2 +- .../documents/fetch_document_content.test.ts | 4 +- .../helpers/fetch_document_comparison.ts | 15 +- .../helpers/fetch_document_content.ts | 5 +- .../convex/agent_tools/rag/rag_search_tool.ts | 22 +- .../web/helpers/fetch_and_extract.ts | 11 +- services/platform/convex/auth.ts | 22 ++ .../convex/branding/internal_queries.ts | 26 +- .../convex/documents/generate_document.ts | 7 +- .../convex/documents/generate_docx.ts | 7 +- .../convex/file_metadata/internal_actions.ts | 5 + .../file_metadata/internal_mutations.ts | 1 + .../convex/file_metadata/mutations.ts | 1 + .../__tests__/upstream_http_error.test.ts | 95 ++++++++ .../convex/lib/errors/upstream_http_error.ts | 108 +++++++++ services/platform/convex/websites/actions.ts | 38 ++- .../convex/websites/internal_actions.ts | 70 +++++- services/platform/convex/websites/rest_api.ts | 4 + .../action_defs/crawler/crawler_action.ts | 30 ++- .../action_defs/document/document_action.ts | 16 +- .../document/helpers/apply_docx_structured.ts | 32 +-- .../helpers/extract_docx_structured.ts | 4 + .../rag/helpers/delete_document.ts | 8 +- .../rag/helpers/upload_file_direct.test.ts | 25 +- .../rag/helpers/upload_file_direct.ts | 5 +- .../action_defs/rag/rag_action.ts | 8 +- services/platform/lib/config-watcher.ts | 64 ++++- .../shared/constants/reserved-org-slugs.ts | 19 ++ services/rag/app/models.py | 9 +- services/rag/app/routers/health.py | 15 +- tools/cli/src/commands/deploy/index.ts | 8 +- tools/cli/src/lib/actions/deploy.ts | 38 ++- tools/cli/src/lib/actions/start.ts | 1 + 48 files changed, 1280 insertions(+), 503 deletions(-) create mode 100644 services/crawler/migrations/20260528000000_add_website_org_memberships.sql create mode 100644 services/crawler/tests/test_website_membership.py create mode 100644 services/platform/convex/lib/errors/__tests__/upstream_http_error.test.ts create mode 100644 services/platform/convex/lib/errors/upstream_http_error.ts create mode 100644 services/platform/lib/shared/constants/reserved-org-slugs.ts diff --git a/scripts/2026-03-28-migrate-convex-data.sh b/scripts/2026-03-28-migrate-convex-data.sh index 09984260ec..7ad741cf29 100755 --- a/scripts/2026-03-28-migrate-convex-data.sh +++ b/scripts/2026-03-28-migrate-convex-data.sh @@ -2,18 +2,22 @@ # ============================================================================ # Migration: Convex data migration (2026-03-28) # ============================================================================ -# Handles two tasks: -# 1. Copy Convex storage data from old volume to new volume -# 2. Rename organization slug to "default" +# Copies Convex storage data from old volume to new volume. # # Background: # The platform volume was renamed from platform-convex-data to platform-data. # Old Convex storage files (modules, user uploads) need to be copied to the # new volume so the Convex backend can find them. # +# Note: +# A prior version of this script also called `convex run +# migrations/rename_org_slug:renameOrgSlug` (Phase 2) — that migration was +# removed in v1.0 along with the upgrade framework; the Phase 2 step is +# no longer needed and would now fail with "function not found". +# # Prerequisites: # - Docker must be running -# - Platform container should be stopped for phase 1 +# - Platform container should be stopped before running # # Usage: # ./scripts/2026-03-28-migrate-convex-data.sh @@ -83,56 +87,8 @@ else echo "" fi -# ============================================================================ -# Phase 2: Rename organization slug to "default" -# ============================================================================ - -find_platform_container() { - docker ps --filter "name=tale-platform" --filter "status=running" --format '{{.Names}}' | head -1 -} - -echo "" -echo "── Phase 2: Organization slug rename ──" -echo "" - -container=$(find_platform_container) - -if [ -z "$container" ]; then - echo "❌ Platform container is not running." - echo " Please start it first:" - echo "" - echo " docker compose up --build -d platform" - echo "" - echo " Then re-run this script." - exit 1 -fi - -status=$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "unknown") -if [ "$status" != "healthy" ]; then - echo "❌ Platform container '$container' is not healthy (status: $status)." - echo " Wait for it to become healthy, then re-run this script." - exit 1 -fi - -echo " ✅ $container is healthy." -echo " Running organization slug migration..." - -docker exec "$container" bash -c ' - source /app/env.sh - env_normalize_common - source /app/generate-admin-key.sh - ensure_instance_secret - ADMIN_KEY=$(generate_key "$INSTANCE_NAME" "$INSTANCE_SECRET") - cd /app - HOME=/home/tanstack bunx convex run \ - migrations/rename_org_slug:renameOrgSlug \ - --url "http://localhost:3210" \ - --admin-key "$ADMIN_KEY" \ - --no-push 2>&1 -' | grep -v "^Admin key\|^📋\|^✅ Admin\|^━\|^🌐\|^$\|Steps:\|Open\|Enter\|Paste" +# Phase 2 (renameOrgSlug) removed in v1.0 — the underlying Convex migration +# function no longer exists in the platform codebase. echo "" echo "✅ Migration complete!" -echo "" -echo "You can verify the organization slug with:" -echo " docker exec $container bash -c 'source /app/env.sh && env_normalize_common && source /app/generate-admin-key.sh && ensure_instance_secret && ADMIN_KEY=\$(generate_key \"\$INSTANCE_NAME\" \"\$INSTANCE_SECRET\") && cd /app && HOME=/home/tanstack bunx convex data --component betterAuth organization --url \"http://localhost:3210\" --admin-key \"\$ADMIN_KEY\"'" diff --git a/services/crawler/app/routers/pages.py b/services/crawler/app/routers/pages.py index 378e8cb692..6268cedaae 100644 --- a/services/crawler/app/routers/pages.py +++ b/services/crawler/app/routers/pages.py @@ -6,8 +6,27 @@ from loguru import logger from app.models import PageChunkItem, PageChunksResponse, PageListItem, PageListResponse +from app.org_context import get_active_org from app.services.database import get_pool + +async def _require_org_membership(pool, domain: str, org_slug: str) -> None: + """Caller's org must have a membership on `domain`, else 404. + + Routers below operate on shared chunks/website_urls tables — without + this gate, any authenticated request would be able to read any + domain's pages just by knowing the name. + """ + async with pool.acquire() as conn: + row = await conn.fetchrow( + "SELECT 1 FROM website_org_memberships WHERE domain = $1 AND org_slug = $2", + domain, + org_slug, + ) + if row is None: + raise HTTPException(status_code=404, detail=f"Website not found: {domain}") + + router = APIRouter(prefix="/api/v1/pages", tags=["Pages"]) @@ -22,6 +41,7 @@ async def list_pages( """List all crawled pages for a website with indexing status.""" try: pool = get_pool() + await _require_org_membership(pool, domain, get_active_org()) sort_columns = { "last_crawled_at": "wu.last_crawled_at", @@ -104,6 +124,7 @@ async def get_page_chunks( """Get all indexed chunks for a specific page URL.""" try: pool = get_pool() + await _require_org_membership(pool, domain, get_active_org()) async with pool.acquire() as conn: rows = await conn.fetch( diff --git a/services/crawler/app/routers/websites.py b/services/crawler/app/routers/websites.py index 6a8bf4d0a7..a51ea850a9 100644 --- a/services/crawler/app/routers/websites.py +++ b/services/crawler/app/routers/websites.py @@ -16,6 +16,7 @@ WebsiteUrl, WebsiteUrlsResponse, ) +from app.org_context import get_active_org from app.services.pg_website_store import PgWebsiteStoreManager from app.services.scheduler import cancel_scan, trigger_scan @@ -74,6 +75,7 @@ def _format_timestamp(val) -> str | None: async def register_website(request: RegisterWebsiteRequest, http_request: Request): try: manager = _get_manager(http_request) + org_slug = get_active_org() # Reject registration if domain is currently being deleted website = await manager.get_website(request.domain) @@ -83,20 +85,21 @@ async def register_website(request: RegisterWebsiteRequest, http_request: Reques detail=f"Domain {request.domain} is currently being deleted. Please retry later.", ) - await manager.register_website( + result = await manager.register_website( domain=request.domain, scan_interval=request.scan_interval, + org_slug=org_slug, ) - # Wake the scheduler — newly registered sites have last_scanned_at=NULL - # and will be picked up immediately by get_due_websites(). - # The scheduler handles URL discovery, crawling, and metadata extraction - # with proper concurrency control (max_concurrent_scans semaphore). - trigger_scan() + # Wake the scheduler only when this membership creates new work + # (first org to register this domain). Subsequent orgs joining an + # already-tracked domain reuse the existing crawl cadence. + if result.get("first_membership"): + trigger_scan() return WebsiteInfoResponse( domain=request.domain, - status="scanning", + status="scanning" if result.get("first_membership") else (website.get("status") if website else "idle"), scan_interval=request.scan_interval, ) except HTTPException: @@ -110,6 +113,11 @@ async def register_website(request: RegisterWebsiteRequest, http_request: Reques async def update_website(domain: str, request: UpdateWebsiteRequest, http_request: Request): try: manager = _get_manager(http_request) + org_slug = get_active_org() + # Caller's org must have a membership on this domain or it doesn't + # exist (from their viewpoint). + if not await manager.org_has_membership(domain, org_slug): + raise HTTPException(status_code=404, detail=f"Website not found: {domain}") website = await manager.get_website(domain) if not website: raise HTTPException(status_code=404, detail=f"Website not found: {domain}") @@ -145,6 +153,9 @@ async def update_website(domain: str, request: UpdateWebsiteRequest, http_reques async def get_website_info(domain: str, http_request: Request): try: manager = _get_manager(http_request) + org_slug = get_active_org() + if not await manager.org_has_membership(domain, org_slug): + raise HTTPException(status_code=404, detail=f"Website not found: {domain}") website = await manager.get_website(domain) if not website: @@ -173,19 +184,28 @@ async def get_website_info(domain: str, http_request: Request): @router.delete("/{domain}") async def deregister_website(domain: str, http_request: Request): try: - cancel_scan(domain) manager = _get_manager(http_request) - marked = await manager.begin_delete(domain) - if not marked: - # Already deleting — return 202 idempotently - website = await manager.get_website(domain) - if website and website.get("status") == "deleting": - return JSONResponse( - status_code=202, - content={"domain": domain, "status": "deleting"}, - ) + org_slug = get_active_org() + + result = await manager.begin_delete(domain, org_slug) + if not result["removed_membership"]: + # The caller's org wasn't tracking this domain. From their + # viewpoint, the website doesn't exist — return 404 instead + # of leaking whether another org has it. raise HTTPException(status_code=404, detail=f"Website not found: {domain}") + if not result["removed_website"]: + # Other orgs are still using this domain; only the caller's + # membership was removed. Domain data stays in place. + return JSONResponse( + status_code=200, + content={"domain": domain, "status": "membership_removed"}, + ) + + # We dropped the last membership and the website was marked for + # deletion. Cancel any in-flight scan and start the CASCADE in + # the background. + cancel_scan(domain) _spawn_delete_task(manager, domain) return JSONResponse( status_code=202, @@ -208,6 +228,9 @@ async def get_website_urls( ): try: manager = _get_manager(http_request) + org_slug = get_active_org() + if not await manager.org_has_membership(domain, org_slug): + raise HTTPException(status_code=404, detail=f"Website not found: {domain}") website = await manager.get_website(domain) if not website: diff --git a/services/crawler/app/services/database.py b/services/crawler/app/services/database.py index 160a536a5d..6a00853bfd 100644 --- a/services/crawler/app/services/database.py +++ b/services/crawler/app/services/database.py @@ -46,8 +46,28 @@ async def init_pool(*, max_size: int = 10) -> asyncpg.Pool: if _pool is not None: return _pool + # Resolve the deployment-wide embedding dim BEFORE creating the + # pool. This way, a missing `default` org provider fails fast + # with no pool resource to clean up — and the module-level + # `_pool` stays None so a follow-up retry can re-enter cleanly. + # + # Background: the baseline migration declares `embedding vector` + # (no dim) so pgvector accepts mixed-dim inserts silently and + # `create_chunks_hnsw_index()` raises ("has no dimensions"). + # All orgs on this deployment must share embedding dims (single + # chunks table); we pin from the `default` org's catalog. + try: + _, _, _, dims = settings.get_embedding_config("default") + except Exception as e: + raise RuntimeError( + "Cannot resolve embedding dims for the 'default' org " + "(needed to pin public_web.chunks.embedding at boot). " + "Ensure providers are configured for the default org " + "before starting crawler." + ) from e + dsn = _get_database_url() - _pool = await asyncpg.create_pool( + pool = await asyncpg.create_pool( dsn, min_size=min(2, max_size), max_size=max_size, @@ -62,26 +82,26 @@ async def init_pool(*, max_size: int = 10) -> asyncpg.Pool: ) logger.info(f"PostgreSQL connection pool initialized (min={min(2, max_size)}, max={max_size})") - # Note: the previous boot-time embedding-dimension guard was - # removed when crawler became multi-org. Dim is now an attribute - # of the org's provider catalog, not a global setting, and there - # is no org context at lifespan start. `get_embedding_service()` - # refuses dim changes per-org at request time; pgvector enforces - # column dim on insert. - # - # The column type and HNSW index are pinned lazily on the first - # insert (pgvector errors loudly on dim mismatch). All orgs - # sharing this crawler instance must agree on embedding dims. - - # Create HNSW index if it doesn't exist yet. The index targets - # whatever the column type is set to; if no rows have been - # inserted, the call is cheap. try: - async with acquire_with_retry(_pool) as conn: - await conn.execute(f"SELECT {SCHEMA}.create_chunks_hnsw_index()") - except Exception as e: - logger.warning(f"HNSW index creation deferred: {e}") - + async with acquire_with_retry(pool) as conn: + await conn.execute(f"ALTER TABLE {SCHEMA}.chunks ALTER COLUMN embedding TYPE vector({dims})") + logger.info(f"Pinned {SCHEMA}.chunks.embedding to vector({dims})") + + # Create HNSW index if it doesn't exist yet. After the pin + # above this is the normal path; the function raises if the + # dim is still unset, which would now indicate a deeper + # invariant break. + try: + async with acquire_with_retry(pool) as conn: + await conn.execute(f"SELECT {SCHEMA}.create_chunks_hnsw_index()") + except Exception as e: + logger.warning(f"HNSW index creation deferred: {e}") + except Exception: + # Roll back the pool we just opened so a retry hits a clean state. + await pool.close() + raise + + _pool = pool return _pool diff --git a/services/crawler/app/services/pg_website_store.py b/services/crawler/app/services/pg_website_store.py index b47d1b83ad..75ee0e8ded 100644 --- a/services/crawler/app/services/pg_website_store.py +++ b/services/crawler/app/services/pg_website_store.py @@ -253,8 +253,25 @@ def __init__(self, pool: asyncpg.Pool): self._pool = pool self._stores: dict[str, PgWebsiteStore] = {} - async def register_website(self, domain: str, scan_interval: int = 21600) -> dict: - async with acquire_with_retry(self._pool) as conn: + async def register_website( + self, + domain: str, + scan_interval: int = 21600, + *, + org_slug: str, + ) -> dict: + """Register a domain on behalf of `org_slug`. + + websites is deployment-shared content storage; the per-org + boundary lives in `website_org_memberships`. The first org to + register a domain creates the website row; subsequent orgs + simply join the membership table without re-fetching. + + Returns a dict that includes `first_membership=True` only when + this call is the first to register the domain — callers use it + to decide whether to trigger an immediate scan. + """ + async with acquire_with_retry(self._pool) as conn, conn.transaction(): await conn.execute( """INSERT INTO websites (domain, scan_interval, created_at, updated_at) VALUES ($1, $2, NOW(), NOW()) @@ -264,8 +281,37 @@ async def register_website(self, domain: str, scan_interval: int = 21600) -> dic domain, scan_interval, ) - logger.info(f"Registered website: {domain} (interval={scan_interval}s)") - return {"domain": domain, "scan_interval": scan_interval, "status": "idle"} + # ON CONFLICT DO NOTHING — re-registering from the same org is a no-op. + # `xmax = 0` is true on a row INSERTed in this command; non-zero on + # an existing row that hit ON CONFLICT. We use it to tell the caller + # whether this was the very first membership for the domain. + row = await conn.fetchrow( + """INSERT INTO website_org_memberships (domain, org_slug) + VALUES ($1, $2) + ON CONFLICT DO NOTHING + RETURNING (xmax = 0) AS inserted""", + domain, + org_slug, + ) + membership_inserted = bool(row and row["inserted"]) + total_members = await conn.fetchval( + "SELECT COUNT(*) FROM website_org_memberships WHERE domain = $1", + domain, + ) + first_membership = membership_inserted and total_members == 1 + logger.info( + "Registered website: %s for org=%s (interval=%ss, first_membership=%s)", + domain, + org_slug, + scan_interval, + first_membership, + ) + return { + "domain": domain, + "scan_interval": scan_interval, + "status": "idle", + "first_membership": first_membership, + } async def update_website_metadata( self, @@ -288,16 +334,50 @@ async def update_website_metadata( page_count, ) - async def begin_delete(self, domain: str) -> bool: - """Mark a website for deletion. Returns True if the domain was found and marked.""" - self._stores.pop(domain, None) - async with acquire_with_retry(self._pool) as conn: - row = await conn.fetchrow( - "UPDATE websites SET status = 'deleting', updated_at = NOW() " - "WHERE domain = $1 AND status != 'deleting' RETURNING domain", + async def begin_delete(self, domain: str, org_slug: str) -> dict: + """Remove org's membership of `domain`. If no orgs remain after + the removal, mark the website itself for deletion (the actual + CASCADE happens in `execute_delete`, called from a background + task). + + Returns a dict with: + - `removed_membership`: True if the (domain, org) row existed + and was removed. + - `removed_website`: True if this caller dropped the last + membership and the website was marked for deletion. + """ + async with acquire_with_retry(self._pool) as conn, conn.transaction(): + deleted = await conn.execute( + "DELETE FROM website_org_memberships WHERE domain = $1 AND org_slug = $2", domain, + org_slug, ) - return row is not None + # asyncpg returns "DELETE N" as the tag; "DELETE 0" means no row matched. + removed_membership = deleted != "DELETE 0" + remaining = await conn.fetchval( + "SELECT COUNT(*) FROM website_org_memberships WHERE domain = $1", + domain, + ) + removed_website = False + if remaining == 0: + self._stores.pop(domain, None) + row = await conn.fetchrow( + "UPDATE websites SET status = 'deleting', updated_at = NOW() " + "WHERE domain = $1 AND status != 'deleting' RETURNING domain", + domain, + ) + removed_website = row is not None + logger.info( + "begin_delete: domain=%s org=%s removed_membership=%s removed_website=%s", + domain, + org_slug, + removed_membership, + removed_website, + ) + return { + "removed_membership": removed_membership, + "removed_website": removed_website, + } async def execute_delete(self, domain: str) -> None: """Run the actual CASCADE DELETE. Intended for background execution.""" @@ -320,19 +400,51 @@ async def get_due_websites(self) -> list[dict]: - Its scan interval has elapsed and it is not currently scanning/deleting, OR - It has been stuck in 'scanning' for >2 hours (no heartbeat progress), indicating the previous scanner crashed or was replaced. + + Each returned row also includes `owner_org_slug` — the slug of the + org that registered the domain earliest. The scheduler uses this to + bind `set_active_org()` so the per-org provider catalog can resolve + API keys for the embed/fetch path. Domains with no remaining + memberships (a transient race during delete) are skipped. """ async with acquire_with_retry(self._pool) as conn: rows = await conn.fetch( - """SELECT domain, status, scan_interval, last_scanned_at, error - FROM websites - WHERE (status NOT IN ('scanning', 'deleting') - AND (last_scanned_at IS NULL - OR last_scanned_at + make_interval(secs => scan_interval) < NOW())) - OR (status = 'scanning' - AND updated_at < NOW() - INTERVAL '2 hours')""" + """SELECT w.domain, w.status, w.scan_interval, w.last_scanned_at, w.error, + m.org_slug AS owner_org_slug + FROM websites w + JOIN LATERAL ( + SELECT org_slug FROM website_org_memberships + WHERE domain = w.domain + ORDER BY added_at ASC, org_slug ASC + LIMIT 1 + ) m ON true + WHERE (w.status NOT IN ('scanning', 'deleting') + AND (w.last_scanned_at IS NULL + OR w.last_scanned_at + make_interval(secs => w.scan_interval) < NOW())) + OR (w.status = 'scanning' + AND w.updated_at < NOW() - INTERVAL '2 hours')""" ) return [dict(r) for r in rows] + async def org_has_membership(self, domain: str, org_slug: str) -> bool: + """True if `org_slug` has registered `domain` (used by per-org views).""" + async with acquire_with_retry(self._pool) as conn: + row = await conn.fetchrow( + "SELECT 1 FROM website_org_memberships WHERE domain = $1 AND org_slug = $2", + domain, + org_slug, + ) + return row is not None + + async def list_domains_for_org(self, org_slug: str) -> list[str]: + """Return all domains the given org has registered.""" + async with acquire_with_retry(self._pool) as conn: + rows = await conn.fetch( + "SELECT domain FROM website_org_memberships WHERE org_slug = $1 ORDER BY domain", + org_slug, + ) + return [r["domain"] for r in rows] + async def update_scan_interval(self, domain: str, scan_interval: int) -> None: async with acquire_with_retry(self._pool) as conn: await conn.execute( diff --git a/services/crawler/app/services/scheduler.py b/services/crawler/app/services/scheduler.py index d72051d73b..edc83742fb 100644 --- a/services/crawler/app/services/scheduler.py +++ b/services/crawler/app/services/scheduler.py @@ -12,6 +12,7 @@ import httpx +from app.org_context import set_active_org from app.services.crawler_service import CrawlerService from app.services.indexing_service import IndexingService from app.services.pg_website_store import PgWebsiteStore, PgWebsiteStoreManager @@ -66,21 +67,15 @@ async def run_scheduler( global _scan_trigger _scan_trigger = asyncio.Event() - # Background scheduler has no per-request X-Tale-Org context. Until - # the websites table carries the owning org slug, fall back to - # `default` for any provider lookups triggered by scheduled scans. - # Log once so operators see the assumption. - from app.org_context import set_active_org - - set_active_org("default") - logger.warning( - "Scheduler background task using org slug 'default' for provider " - "lookups. Per-website org binding is a follow-up." - ) - sem = asyncio.Semaphore(max_concurrent_scans) - async def bounded_scan(domain: str): + async def bounded_scan(domain: str, owner_org_slug: str): + # ContextVar is per-asyncio-task: asyncio.create_task copies the + # parent context at spawn, then any `set` inside the task only + # affects this task. Setting here binds provider lookups for the + # embed/fetch path to the website's owning org for the duration + # of the scan. + set_active_org(owner_org_slug) async with sem: await _scan_website( domain, @@ -95,7 +90,7 @@ async def bounded_scan(domain: str): due = await store_manager.get_due_websites() if due: logger.info(f"Scheduler: {len(due)} website(s) due for scanning") - tasks = [asyncio.create_task(bounded_scan(w["domain"])) for w in due] + tasks = [asyncio.create_task(bounded_scan(w["domain"], w["owner_org_slug"])) for w in due] results = await asyncio.gather(*tasks, return_exceptions=True) for website, result in zip(due, results, strict=True): if isinstance(result, BaseException): diff --git a/services/crawler/app/services/search_service.py b/services/crawler/app/services/search_service.py index 99bd9a2e55..9526afcdb7 100644 --- a/services/crawler/app/services/search_service.py +++ b/services/crawler/app/services/search_service.py @@ -9,6 +9,7 @@ import asyncpg +from app.org_context import get_active_org from app.services.database import acquire_with_retry from app.services.embedding_service import get_embedding_service @@ -41,13 +42,18 @@ async def search( limit: int = 10, similarity_threshold: float = 0.4, ) -> list[SearchResult]: + # Resolve the active org once and pass to both helpers — chunks + # data is shared across orgs, but each search is restricted to + # domains the caller's org has registered (membership filter). + org_slug = get_active_org() + # Generate query embedding and run both searches in parallel embedding_task = asyncio.create_task(get_embedding_service().embed_query(query)) - fts_task = asyncio.create_task(self._fts_search(query, domain, limit * 3)) + fts_task = asyncio.create_task(self._fts_search(query, org_slug, domain, limit * 3)) query_embedding = await embedding_task fts_results = await fts_task - vector_results = await self._vector_search(query_embedding, domain, limit * 3) + vector_results = await self._vector_search(query_embedding, org_slug, domain, limit * 3) # Pre-filter vector results by cosine similarity (matches RAG pipeline). # If ALL vector results fall below the threshold the query is considered @@ -69,57 +75,81 @@ async def search( return self._merge_rrf([fts_results, vector_results], limit) - async def _fts_search(self, query: str, domain: str | None, limit: int) -> list[dict]: + async def _fts_search(self, query: str, org_slug: str, domain: str | None, limit: int) -> list[dict]: + # Membership filter restricts the org's view to domains it has + # registered. chunks/websites are deployment-shared content, but + # org A must not see search hits from a domain only org B added. async with acquire_with_retry(self._pool) as conn: if domain: rows = await conn.fetch( - """SELECT id, url, title, chunk_content, core_content, chunk_index, - paradedb.score(id) AS score - FROM chunks - WHERE id @@@ paradedb.match('chunk_content', $1) AND domain = $2 + """SELECT c.id, c.url, c.title, c.chunk_content, c.core_content, c.chunk_index, + paradedb.score(c.id) AS score + FROM chunks c + WHERE c.id @@@ paradedb.match('chunk_content', $1) + AND c.domain = $2 + AND EXISTS ( + SELECT 1 FROM website_org_memberships m + WHERE m.domain = c.domain AND m.org_slug = $3 + ) ORDER BY score DESC - LIMIT $3""", + LIMIT $4""", query, domain, + org_slug, limit, ) else: rows = await conn.fetch( - """SELECT id, url, title, chunk_content, core_content, chunk_index, - paradedb.score(id) AS score - FROM chunks - WHERE id @@@ paradedb.match('chunk_content', $1) + """SELECT c.id, c.url, c.title, c.chunk_content, c.core_content, c.chunk_index, + paradedb.score(c.id) AS score + FROM chunks c + WHERE c.id @@@ paradedb.match('chunk_content', $1) + AND EXISTS ( + SELECT 1 FROM website_org_memberships m + WHERE m.domain = c.domain AND m.org_slug = $2 + ) ORDER BY score DESC - LIMIT $2""", + LIMIT $3""", query, + org_slug, limit, ) return [dict(r) for r in rows] - async def _vector_search(self, embedding: list[float], domain: str | None, limit: int) -> list[dict]: + async def _vector_search(self, embedding: list[float], org_slug: str, domain: str | None, limit: int) -> list[dict]: vec_str = json.dumps(embedding) async with acquire_with_retry(self._pool) as conn: if domain: rows = await conn.fetch( - """SELECT id, url, title, chunk_content, core_content, chunk_index, - 1 - (embedding <=> $1::vector) AS score - FROM chunks - WHERE domain = $2 AND embedding IS NOT NULL - ORDER BY embedding <=> $1::vector - LIMIT $3""", + """SELECT c.id, c.url, c.title, c.chunk_content, c.core_content, c.chunk_index, + 1 - (c.embedding <=> $1::vector) AS score + FROM chunks c + WHERE c.domain = $2 AND c.embedding IS NOT NULL + AND EXISTS ( + SELECT 1 FROM website_org_memberships m + WHERE m.domain = c.domain AND m.org_slug = $3 + ) + ORDER BY c.embedding <=> $1::vector + LIMIT $4""", vec_str, domain, + org_slug, limit, ) else: rows = await conn.fetch( - """SELECT id, url, title, chunk_content, core_content, chunk_index, - 1 - (embedding <=> $1::vector) AS score - FROM chunks - WHERE embedding IS NOT NULL - ORDER BY embedding <=> $1::vector - LIMIT $2""", + """SELECT c.id, c.url, c.title, c.chunk_content, c.core_content, c.chunk_index, + 1 - (c.embedding <=> $1::vector) AS score + FROM chunks c + WHERE c.embedding IS NOT NULL + AND EXISTS ( + SELECT 1 FROM website_org_memberships m + WHERE m.domain = c.domain AND m.org_slug = $2 + ) + ORDER BY c.embedding <=> $1::vector + LIMIT $3""", vec_str, + org_slug, limit, ) return [dict(r) for r in rows] diff --git a/services/crawler/migrations/20260528000000_add_website_org_memberships.sql b/services/crawler/migrations/20260528000000_add_website_org_memberships.sql new file mode 100644 index 0000000000..b2047a6cb4 --- /dev/null +++ b/services/crawler/migrations/20260528000000_add_website_org_memberships.sql @@ -0,0 +1,36 @@ +-- migrate:up +-- Per-org website membership layer. +-- +-- websites / website_urls / chunks / page_paragraph_hashes remain +-- deployment-shared content storage (one canonical fetch + embed per +-- domain, independent of which org requested it). This junction table +-- tracks WHICH orgs have asked the crawler to track a given domain. +-- +-- Register: insert (domain, org_slug) ON CONFLICT DO NOTHING. First +-- membership for a never-seen domain implies UPSERT into websites. +-- Delete: delete the (domain, org_slug) row; the website itself is +-- only purged when no memberships remain (ref-counted). +-- Search/list: JOIN this table filtered by current X-Tale-Org so org A +-- only sees domains it registered (or another member of org A did). +-- +-- Backfill: every existing website row is treated as belonging to the +-- 'default' org, which is the only org in use at the demo stage. +-- ON CONFLICT DO NOTHING keeps the migration idempotent on re-run. + +CREATE TABLE IF NOT EXISTS public_web.website_org_memberships ( + domain TEXT NOT NULL REFERENCES public_web.websites(domain) ON DELETE CASCADE, + org_slug TEXT NOT NULL, + added_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + PRIMARY KEY (domain, org_slug) +); + +CREATE INDEX IF NOT EXISTS idx_website_org_memberships_by_org + ON public_web.website_org_memberships (org_slug); + +INSERT INTO public_web.website_org_memberships (domain, org_slug) +SELECT domain, 'default' +FROM public_web.websites +ON CONFLICT DO NOTHING; + +-- migrate:down +DROP TABLE IF EXISTS public_web.website_org_memberships; diff --git a/services/crawler/tests/conftest.py b/services/crawler/tests/conftest.py index 2c149549bf..55be3f34c3 100644 --- a/services/crawler/tests/conftest.py +++ b/services/crawler/tests/conftest.py @@ -1 +1,29 @@ -"""Test configuration for the crawler service.""" +"""Test configuration for the crawler service. + +Provides an autouse fixture that binds the active-org ContextVar to +`"test-org"` for the duration of each test. Crawler routers and +services now read `get_active_org()` to scope work per-org; without +a binding they raise RuntimeError on first use. + +The same fixture also resets the ContextVar after each test to keep +tests isolated under parallel runners. +""" + +from collections.abc import Iterator + +import pytest + +from app.org_context import _active_org, set_active_org + + +@pytest.fixture(autouse=True) +def _bind_test_active_org() -> Iterator[None]: + """Bind `set_active_org("test-org")` for the test, then reset.""" + token = _active_org.set("test-org") + try: + yield + finally: + _active_org.reset(token) + + +__all__ = ["_bind_test_active_org", "set_active_org"] diff --git a/services/crawler/tests/test_database.py b/services/crawler/tests/test_database.py index 18aa21712f..763aabf6a6 100644 --- a/services/crawler/tests/test_database.py +++ b/services/crawler/tests/test_database.py @@ -1,4 +1,12 @@ -"""Tests for database pool initialization, including dimension mismatch guard.""" +"""Tests for database pool initialization, including the boot-time +embedding-column dimension pin. + +The baseline migration declares `chunks.embedding` as bare `vector` +(no dim). Without an explicit pin pgvector accepts mixed-dim inserts +silently and the HNSW index can't be built. `init_pool` resolves +the deployment-wide dim from the `default` org's provider catalog +and `ALTER TABLE`-pins the column at boot. +""" from contextlib import asynccontextmanager from unittest.mock import AsyncMock, patch @@ -16,14 +24,9 @@ def _reset_pool(): db_mod._pool = None -def _fake_pool(stored_dims: int | None, col_type: str = "vector(1536)"): - """Build a mock asyncpg pool. - - *stored_dims* is returned for the first fetchval (dimension check). - *col_type* is returned for the second fetchval (column type check). - """ +def _fake_pool(): + """Build a mock asyncpg pool with a tracked single connection.""" conn = AsyncMock() - conn.fetchval = AsyncMock(side_effect=[stored_dims, col_type]) conn.execute = AsyncMock() pool = AsyncMock() @@ -37,122 +40,75 @@ async def _acq(_pool, **_kw): return pool, _acq -@pytest.mark.skip( - reason="Boot-time embedding-dimension guard was removed when crawler " - "became multi-org. Dim is now per-org provider catalog; pgvector enforces " - "column dim on insert + get_embedding_service refuses dim changes per-org." -) -class TestDimensionMismatchGuard: - @pytest.mark.asyncio - async def test_raises_on_dimension_mismatch(self): - fake_pool, acq = _fake_pool(stored_dims=3072) - - with ( - patch("app.services.database.asyncpg.create_pool", AsyncMock(return_value=fake_pool)), - patch("app.services.database.acquire_with_retry", acq), - patch("app.services.database.settings") as mock_settings, - ): - mock_settings.get_embedding_dimensions.return_value = 1536 - mock_settings.database_url = "postgresql://test:test@localhost/test" - - with pytest.raises(RuntimeError, match="dimension mismatch"): - await db_mod.init_pool() - - assert db_mod._pool is None - +class TestEmbeddingColumnPin: @pytest.mark.asyncio - async def test_passes_when_dimensions_match(self): - fake_pool, acq = _fake_pool(stored_dims=1536, col_type="vector(1536)") + async def test_pins_column_at_boot(self): + """init_pool issues ALTER TABLE … TYPE vector(N) using default-org dim.""" + fake_pool, acq = _fake_pool() with ( patch("app.services.database.asyncpg.create_pool", AsyncMock(return_value=fake_pool)), patch("app.services.database.acquire_with_retry", acq), patch("app.services.database.settings") as mock_settings, ): - mock_settings.get_embedding_dimensions.return_value = 1536 - mock_settings.database_url = "postgresql://test:test@localhost/test" - - pool = await db_mod.init_pool() - - assert pool is fake_pool - - @pytest.mark.asyncio - async def test_passes_when_no_existing_data(self): - fake_pool, acq = _fake_pool(stored_dims=None, col_type="vector") - - with ( - patch("app.services.database.asyncpg.create_pool", AsyncMock(return_value=fake_pool)), - patch("app.services.database.acquire_with_retry", acq), - patch("app.services.database.settings") as mock_settings, - ): - mock_settings.get_embedding_dimensions.return_value = 1536 - mock_settings.database_url = "postgresql://test:test@localhost/test" - - pool = await db_mod.init_pool() - - assert pool is fake_pool - - -@pytest.mark.skip( - reason="Boot-time embedding-column ALTER was removed when crawler became " - "multi-org. Column type is now driven by the first INSERT under pgvector; " - "operators reconcile per-org provider catalogs manually if dims diverge." -) -class TestEmbeddingColumnPinning: - @pytest.mark.asyncio - async def test_alters_untyped_vector_column(self): - """When column is bare `vector`, init_pool pins it to vector(N).""" - fake_pool, acq = _fake_pool(stored_dims=None, col_type="vector") - - with ( - patch("app.services.database.asyncpg.create_pool", AsyncMock(return_value=fake_pool)), - patch("app.services.database.acquire_with_retry", acq), - patch("app.services.database.settings") as mock_settings, - ): - mock_settings.get_embedding_dimensions.return_value = 768 + mock_settings.get_embedding_config.return_value = ( + "https://api.example.com", + "sk-test", + "text-embedding-3-small", + 1536, + ) mock_settings.database_url = "postgresql://test:test@localhost/test" await db_mod.init_pool() + mock_settings.get_embedding_config.assert_called_once_with("default") conn = fake_pool._test_conn execute_calls = [str(c) for c in conn.execute.call_args_list] - assert any("ALTER TABLE" in c and "vector(768)" in c for c in execute_calls) + assert any("ALTER TABLE" in c and "vector(1536)" in c for c in execute_calls) + # HNSW index creation is attempted after the pin. + assert any("create_chunks_hnsw_index" in c for c in execute_calls) @pytest.mark.asyncio - async def test_skips_alter_when_already_typed(self): - """When column already has dimensions, no ALTER is issued.""" - fake_pool, acq = _fake_pool(stored_dims=1536, col_type="vector(1536)") + async def test_uses_default_org_dim(self): + """ALTER TABLE uses whatever dim the default org's provider returns.""" + fake_pool, acq = _fake_pool() with ( patch("app.services.database.asyncpg.create_pool", AsyncMock(return_value=fake_pool)), patch("app.services.database.acquire_with_retry", acq), patch("app.services.database.settings") as mock_settings, ): - mock_settings.get_embedding_dimensions.return_value = 1536 + mock_settings.get_embedding_config.return_value = ( + "https://api.example.com", + "sk-test", + "nomic-embed-text", + 768, + ) mock_settings.database_url = "postgresql://test:test@localhost/test" await db_mod.init_pool() conn = fake_pool._test_conn execute_calls = [str(c) for c in conn.execute.call_args_list] - assert not any("ALTER TABLE" in c for c in execute_calls) + assert any("ALTER TABLE" in c and "vector(768)" in c for c in execute_calls) @pytest.mark.asyncio - async def test_repins_column_when_dimension_changed(self): - """When column is pinned to a different dimension and table is empty, re-pin.""" - fake_pool, acq = _fake_pool(stored_dims=None, col_type="vector(2560)") + async def test_raises_when_default_org_provider_unconfigured(self): + """Without a default-org provider, boot fails loudly rather than + proceeding with an unpinned column (silent regression risk).""" + fake_pool, acq = _fake_pool() with ( patch("app.services.database.asyncpg.create_pool", AsyncMock(return_value=fake_pool)), patch("app.services.database.acquire_with_retry", acq), patch("app.services.database.settings") as mock_settings, ): - mock_settings.get_embedding_dimensions.return_value = 1536 + mock_settings.get_embedding_config.side_effect = ValueError( + "no embedding provider configured for org 'default'" + ) mock_settings.database_url = "postgresql://test:test@localhost/test" - await db_mod.init_pool() + with pytest.raises(RuntimeError, match="default"): + await db_mod.init_pool() - conn = fake_pool._test_conn - execute_calls = [str(c) for c in conn.execute.call_args_list] - assert any("DROP INDEX" in c and "idx_pw_chunks_embedding_hnsw" in c for c in execute_calls) - assert any("ALTER TABLE" in c and "vector(1536)" in c for c in execute_calls) + assert db_mod._pool is None diff --git a/services/crawler/tests/test_website_membership.py b/services/crawler/tests/test_website_membership.py new file mode 100644 index 0000000000..c14214b53c --- /dev/null +++ b/services/crawler/tests/test_website_membership.py @@ -0,0 +1,156 @@ +"""Tests for the per-org website_org_memberships layer. + +Covers `PgWebsiteStoreManager.register_website` / `begin_delete` / +`get_due_websites` / `org_has_membership` against an in-memory +asyncpg pool stand-in. The aim is to lock in the ref-counted delete +semantics — websites/chunks rows are deployment-shared, but the +"who can see this domain" decision is org-local. +""" + +from contextlib import asynccontextmanager +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from app.services.pg_website_store import PgWebsiteStoreManager + +pytestmark = pytest.mark.asyncio + + +def _make_conn(*, fetchval_return=0, execute_return="DELETE 1", fetchrow_return=None): + """Build a per-test asyncpg connection stub with configurable returns.""" + conn = AsyncMock() + conn.execute = AsyncMock(return_value=execute_return) + conn.fetchval = AsyncMock(return_value=fetchval_return) + conn.fetchrow = AsyncMock(return_value=fetchrow_return) + # Transactions are no-ops at this layer; just yield the same conn. + conn.transaction = MagicMock() + conn.transaction.return_value.__aenter__ = AsyncMock(return_value=None) + conn.transaction.return_value.__aexit__ = AsyncMock(return_value=None) + return conn + + +def _patch_acquire(conn): + """Patch `acquire_with_retry` to yield our stub connection.""" + + @asynccontextmanager + async def _acq(_pool, **_kw): + yield conn + + return patch("app.services.pg_website_store.acquire_with_retry", _acq) + + +class TestRegisterWebsite: + async def test_first_membership_reports_first_membership_true(self): + conn = _make_conn( + fetchval_return=1, # total members after insert = 1 + fetchrow_return={"inserted": True}, + ) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + result = await manager.register_website(domain="example.com", scan_interval=3600, org_slug="acme") + + assert result["first_membership"] is True + assert result["domain"] == "example.com" + assert result["scan_interval"] == 3600 + + async def test_second_org_joining_does_not_report_first_membership(self): + conn = _make_conn( + fetchval_return=2, # total members after insert = 2 + fetchrow_return={"inserted": True}, + ) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + result = await manager.register_website(domain="example.com", scan_interval=3600, org_slug="beta") + + assert result["first_membership"] is False + + async def test_idempotent_when_same_org_re_registers(self): + # ON CONFLICT DO NOTHING → no RETURNING row, total stays as-is. + conn = _make_conn( + fetchval_return=1, + fetchrow_return=None, + ) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + result = await manager.register_website(domain="example.com", scan_interval=3600, org_slug="acme") + + assert result["first_membership"] is False + + +class TestBeginDelete: + async def test_removes_website_when_last_membership(self): + conn = _make_conn( + fetchval_return=0, # no memberships left after delete + execute_return="DELETE 1", # the membership row was deleted + fetchrow_return={"domain": "example.com"}, # website marked deleting + ) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + result = await manager.begin_delete("example.com", "acme") + + assert result == {"removed_membership": True, "removed_website": True} + + async def test_keeps_website_when_other_orgs_remain(self): + conn = _make_conn( + fetchval_return=2, # 2 other orgs still tracking + execute_return="DELETE 1", + fetchrow_return=None, + ) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + result = await manager.begin_delete("example.com", "acme") + + assert result == {"removed_membership": True, "removed_website": False} + + async def test_no_membership_returns_false_false(self): + """Caller's org never tracked this domain — neither rm-membership nor rm-website fires.""" + conn = _make_conn( + fetchval_return=3, + execute_return="DELETE 0", # no row matched the (domain, org) tuple + fetchrow_return=None, + ) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + result = await manager.begin_delete("example.com", "ghost") + + assert result == {"removed_membership": False, "removed_website": False} + + +class TestOrgHasMembership: + async def test_returns_true_when_row_exists(self): + conn = _make_conn(fetchrow_return={"?column?": 1}) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + assert await manager.org_has_membership("example.com", "acme") is True + + async def test_returns_false_when_row_missing(self): + conn = _make_conn(fetchrow_return=None) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + assert await manager.org_has_membership("example.com", "ghost") is False + + +class TestGetDueWebsites: + async def test_includes_owner_org_slug(self): + conn = _make_conn() + # fetch() returns rows; the test cares about shape, not SQL. + conn.fetch = AsyncMock( + return_value=[ + { + "domain": "example.com", + "status": "idle", + "scan_interval": 3600, + "last_scanned_at": None, + "error": None, + "owner_org_slug": "acme", + } + ] + ) + with _patch_acquire(conn): + manager = PgWebsiteStoreManager(pool=MagicMock()) + due = await manager.get_due_websites() + + assert len(due) == 1 + assert due[0]["domain"] == "example.com" + assert due[0]["owner_org_slug"] == "acme" diff --git a/services/crawler/tests/test_websites_router.py b/services/crawler/tests/test_websites_router.py index 53df8804f7..91f107e03d 100644 --- a/services/crawler/tests/test_websites_router.py +++ b/services/crawler/tests/test_websites_router.py @@ -16,6 +16,9 @@ def mock_manager(): manager = AsyncMock() manager.get_site_store = MagicMock() + # Default: caller's org has membership (tests that exercise the + # 404-on-missing-membership path can override this). + manager.org_has_membership.return_value = True app.state.pg_store_manager = manager yield manager del app.state.pg_store_manager @@ -40,12 +43,13 @@ def _website_row(domain="example.com", scan_interval=21600, **overrides): class TestRegisterWebsite: - async def test_success(self, mock_manager): + async def test_success_first_membership_triggers_scan(self, mock_manager): mock_manager.get_website.return_value = None mock_manager.register_website.return_value = { "domain": "example.com", "status": "idle", "scan_interval": 21600, + "first_membership": True, } with patch("app.routers.websites.trigger_scan") as mock_trigger: @@ -59,77 +63,60 @@ async def test_success(self, mock_manager): data = response.json() assert data["domain"] == "example.com" assert data["status"] == "scanning" - assert data["scan_interval"] == 21600 mock_manager.register_website.assert_awaited_once_with( domain="example.com", scan_interval=21600, + org_slug="test-org", ) mock_trigger.assert_called_once() - async def test_normalizes_full_url_to_domain(self, mock_manager): + async def test_second_org_joining_does_not_retrigger_scan(self, mock_manager): + """If the domain is already tracked by another org, the new + membership reuses the existing crawl; trigger_scan should NOT fire.""" + mock_manager.get_website.return_value = _website_row(status="active") mock_manager.register_website.return_value = { - "domain": "www.wisekey.com", + "domain": "example.com", "status": "idle", "scan_interval": 21600, + "first_membership": False, } - mock_manager.get_website.return_value = _website_row(domain="www.wisekey.com") - with patch("app.routers.websites.trigger_scan"): + with patch("app.routers.websites.trigger_scan") as mock_trigger: async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: response = await client.post( "/api/v1/websites", - json={"domain": "https://www.wisekey.com", "scan_interval": 21600}, + json={"domain": "example.com", "scan_interval": 21600}, ) assert response.status_code == 200 - mock_manager.register_website.assert_awaited_once_with( - domain="www.wisekey.com", - scan_interval=21600, - ) + data = response.json() + # Status reflects the already-tracked website, not "scanning" + assert data["status"] == "active" + mock_trigger.assert_not_called() - async def test_uses_default_scan_interval(self, mock_manager): + async def test_normalizes_full_url_to_domain(self, mock_manager): mock_manager.register_website.return_value = { - "domain": "example.com", + "domain": "www.wisekey.com", "status": "idle", "scan_interval": 21600, + "first_membership": True, } - mock_manager.get_website.return_value = _website_row() + mock_manager.get_website.return_value = _website_row(domain="www.wisekey.com") with patch("app.routers.websites.trigger_scan"): async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: response = await client.post( "/api/v1/websites", - json={"domain": "example.com"}, + json={"domain": "https://www.wisekey.com", "scan_interval": 21600}, ) assert response.status_code == 200 mock_manager.register_website.assert_awaited_once_with( - domain="example.com", + domain="www.wisekey.com", scan_interval=21600, + org_slug="test-org", ) - async def test_returns_scanning_status_immediately(self, mock_manager): - mock_manager.get_website.return_value = None - mock_manager.register_website.return_value = { - "domain": "example.com", - "status": "idle", - "scan_interval": 21600, - } - - with patch("app.routers.websites.trigger_scan"): - async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: - response = await client.post( - "/api/v1/websites", - json={"domain": "example.com"}, - ) - - assert response.status_code == 200 - data = response.json() - assert data["title"] is None - assert data["page_count"] == 0 - assert data["crawled_count"] == 0 - assert data["status"] == "scanning" - async def test_409_when_domain_is_deleting(self, mock_manager): mock_manager.get_website.return_value = _website_row(status="deleting") @@ -173,12 +160,23 @@ async def test_success(self, mock_manager): assert data["domain"] == "example.com" assert data["scan_interval"] == 3600 assert data["status"] == "active" - mock_manager.get_website.assert_awaited_once_with("example.com") mock_manager.update_scan_interval.assert_awaited_once_with( domain="example.com", scan_interval=3600, ) + async def test_404_when_caller_org_has_no_membership(self, mock_manager): + mock_manager.org_has_membership.return_value = False + + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + response = await client.patch( + "/api/v1/websites/example.com", + json={"scan_interval": 3600}, + ) + + assert response.status_code == 404 + mock_manager.update_scan_interval.assert_not_awaited() + async def test_404_when_not_found(self, mock_manager): mock_manager.get_website.return_value = None @@ -205,19 +203,6 @@ async def test_409_when_domain_is_deleting(self, mock_manager): assert "currently being deleted" in response.json()["detail"] mock_manager.update_scan_interval.assert_not_awaited() - async def test_500_on_error(self, mock_manager): - mock_manager.get_website.return_value = _website_row() - mock_manager.update_scan_interval.side_effect = RuntimeError("db error") - - async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: - response = await client.patch( - "/api/v1/websites/example.com", - json={"scan_interval": 3600}, - ) - - assert response.status_code == 500 - assert response.json()["detail"] == "Failed to update website" - class TestGetWebsiteInfo: async def test_success(self, mock_manager): @@ -243,39 +228,33 @@ async def test_success(self, mock_manager): data = response.json() assert data["domain"] == "example.com" assert data["title"] == "Example" - assert data["description"] == "An example site" - assert data["page_count"] == 50 - assert data["crawled_count"] == 42 assert data["status"] == "active" - assert data["scan_interval"] == 3600 - assert data["last_scanned_at"] is not None - assert data["error"] is None - assert data["created_at"] is not None - assert data["updated_at"] is not None - mock_manager.get_website.assert_awaited_once_with("example.com") - async def test_404_when_not_found(self, mock_manager): - mock_manager.get_website.return_value = None + async def test_404_when_caller_org_has_no_membership(self, mock_manager): + mock_manager.org_has_membership.return_value = False async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: - response = await client.get("/api/v1/websites/unknown.com") + response = await client.get("/api/v1/websites/example.com") assert response.status_code == 404 - assert response.json()["detail"] == "Website not found: unknown.com" + mock_manager.get_website.assert_not_awaited() - async def test_500_on_error(self, mock_manager): - mock_manager.get_website.side_effect = RuntimeError("db error") + async def test_404_when_not_found(self, mock_manager): + mock_manager.get_website.return_value = None async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: - response = await client.get("/api/v1/websites/example.com") + response = await client.get("/api/v1/websites/unknown.com") - assert response.status_code == 500 - assert response.json()["detail"] == "Failed to get website info" + assert response.status_code == 404 + assert response.json()["detail"] == "Website not found: unknown.com" class TestDeregisterWebsite: - async def test_returns_202_accepted(self, mock_manager): - mock_manager.begin_delete.return_value = True + async def test_removes_website_when_last_membership(self, mock_manager): + mock_manager.begin_delete.return_value = { + "removed_membership": True, + "removed_website": True, + } with patch("app.routers.websites._spawn_delete_task") as mock_spawn: async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: @@ -285,12 +264,32 @@ async def test_returns_202_accepted(self, mock_manager): data = response.json() assert data["domain"] == "example.com" assert data["status"] == "deleting" - mock_manager.begin_delete.assert_awaited_once_with("example.com") + mock_manager.begin_delete.assert_awaited_once_with("example.com", "test-org") mock_spawn.assert_called_once_with(mock_manager, "example.com") - async def test_404_when_not_found(self, mock_manager): - mock_manager.begin_delete.return_value = False - mock_manager.get_website.return_value = None + async def test_membership_only_when_other_orgs_remain(self, mock_manager): + """Other orgs still track this domain: only the caller's membership + is removed; website data and crawl schedule stay intact.""" + mock_manager.begin_delete.return_value = { + "removed_membership": True, + "removed_website": False, + } + + with patch("app.routers.websites._spawn_delete_task") as mock_spawn: + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + response = await client.delete("/api/v1/websites/example.com") + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "membership_removed" + # Importantly: no background delete task — data must survive. + mock_spawn.assert_not_called() + + async def test_404_when_caller_never_had_membership(self, mock_manager): + mock_manager.begin_delete.return_value = { + "removed_membership": False, + "removed_website": False, + } async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: response = await client.delete("/api/v1/websites/unknown.com") @@ -298,18 +297,6 @@ async def test_404_when_not_found(self, mock_manager): assert response.status_code == 404 assert response.json()["detail"] == "Website not found: unknown.com" - async def test_already_deleting_returns_202(self, mock_manager): - mock_manager.begin_delete.return_value = False - mock_manager.get_website.return_value = _website_row(status="deleting") - - async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: - response = await client.delete("/api/v1/websites/example.com") - - assert response.status_code == 202 - data = response.json() - assert data["domain"] == "example.com" - assert data["status"] == "deleting" - async def test_500_on_error(self, mock_manager): mock_manager.begin_delete.side_effect = RuntimeError("db error") @@ -332,12 +319,6 @@ async def test_success_with_pagination(self, mock_manager): "status": "active", "last_crawled_at": 1700000000.0, }, - { - "url": "https://example.com/page2", - "content_hash": "def456", - "status": "active", - "last_crawled_at": 1700001000.0, - }, ] mock_site_store.get_total_count.return_value = 50 @@ -347,66 +328,12 @@ async def test_success_with_pagination(self, mock_manager): assert response.status_code == 200 data = response.json() assert data["domain"] == "example.com" - assert len(data["urls"]) == 2 - assert data["urls"][0]["url"] == "https://example.com/page1" - assert data["urls"][0]["content_hash"] == "abc123" - assert data["urls"][1]["url"] == "https://example.com/page2" assert data["total"] == 50 - assert data["offset"] == 0 - assert data["has_more"] is True - mock_site_store.get_urls_page.assert_awaited_once_with(offset=0, limit=2, status=None) - mock_site_store.get_total_count.assert_awaited_once_with(status=None) - async def test_has_more_false_when_at_end(self, mock_manager): - mock_manager.get_website.return_value = {"domain": "example.com"} - mock_site_store = AsyncMock() - mock_manager.get_site_store.return_value = mock_site_store - mock_site_store.get_urls_page.return_value = [ - { - "url": "https://example.com/last", - "content_hash": "xyz", - "status": "active", - "last_crawled_at": None, - }, - ] - mock_site_store.get_total_count.return_value = 1 - - async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: - response = await client.get("/api/v1/websites/example.com/urls?offset=0&limit=100") - - assert response.status_code == 200 - data = response.json() - assert data["has_more"] is False - assert data["total"] == 1 - - async def test_status_filter(self, mock_manager): - mock_manager.get_website.return_value = {"domain": "example.com"} - mock_site_store = AsyncMock() - mock_manager.get_site_store.return_value = mock_site_store - mock_site_store.get_urls_page.return_value = [] - mock_site_store.get_total_count.return_value = 0 - - async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: - response = await client.get("/api/v1/websites/example.com/urls?status=active") - - assert response.status_code == 200 - mock_site_store.get_urls_page.assert_awaited_once_with(offset=0, limit=100, status="active") - mock_site_store.get_total_count.assert_awaited_once_with(status="active") - - async def test_404_when_website_not_found(self, mock_manager): - mock_manager.get_website.return_value = None - - async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: - response = await client.get("/api/v1/websites/unknown.com/urls") - - assert response.status_code == 404 - assert response.json()["detail"] == "Website not found: unknown.com" - - async def test_500_on_error(self, mock_manager): - mock_manager.get_website.side_effect = RuntimeError("db error") + async def test_404_when_caller_org_has_no_membership(self, mock_manager): + mock_manager.org_has_membership.return_value = False async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: response = await client.get("/api/v1/websites/example.com/urls") - assert response.status_code == 500 - assert response.json()["detail"] == "Failed to get website URLs" + assert response.status_code == 404 diff --git a/services/platform/app/features/organization/components/organization-form.tsx b/services/platform/app/features/organization/components/organization-form.tsx index 6d9631230a..559c2a1a7b 100644 --- a/services/platform/app/features/organization/components/organization-form.tsx +++ b/services/platform/app/features/organization/components/organization-form.tsx @@ -20,6 +20,7 @@ import { toast } from '@/app/hooks/use-toast'; import { api } from '@/convex/_generated/api'; import { authClient } from '@/lib/auth-client'; import { useT } from '@/lib/i18n/client'; +import { isReservedOrgSlug } from '@/lib/shared/constants/reserved-org-slugs'; import { useInitializeDefaultWorkflows } from '../hooks/actions'; @@ -49,6 +50,17 @@ export function OrganizationForm() { .regex( /^[A-Za-z0-9][A-Za-z0-9 _-]*$/, 'Use letters, digits, spaces, hyphens, and underscores only, starting with a letter or digit.', + ) + .refine( + (name) => { + const derived = name + .trim() + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); + return !isReservedOrgSlug(derived); + }, + { message: 'This name is reserved by the platform.' }, ), }), [t], diff --git a/services/platform/convex/_generated/api.d.ts b/services/platform/convex/_generated/api.d.ts index fa603e7e06..de08adf12d 100644 --- a/services/platform/convex/_generated/api.d.ts +++ b/services/platform/convex/_generated/api.d.ts @@ -452,6 +452,7 @@ import type * as lib_crypto_hex_to_bytes from "../lib/crypto/hex_to_bytes.js"; import type * as lib_crypto_internal_actions from "../lib/crypto/internal_actions.js"; import type * as lib_debug_log from "../lib/debug_log.js"; import type * as lib_error_classification from "../lib/error_classification.js"; +import type * as lib_errors_upstream_http_error from "../lib/errors/upstream_http_error.js"; import type * as lib_file_io from "../lib/file_io.js"; import type * as lib_fnv1a from "../lib/fnv1a.js"; import type * as lib_fuzzy_match from "../lib/fuzzy_match.js"; @@ -1549,6 +1550,7 @@ declare const fullApi: ApiFromModules<{ "lib/crypto/internal_actions": typeof lib_crypto_internal_actions; "lib/debug_log": typeof lib_debug_log; "lib/error_classification": typeof lib_error_classification; + "lib/errors/upstream_http_error": typeof lib_errors_upstream_http_error; "lib/file_io": typeof lib_file_io; "lib/fnv1a": typeof lib_fnv1a; "lib/fuzzy_match": typeof lib_fuzzy_match; diff --git a/services/platform/convex/agent_tools/documents/document_retrieve_tool.test.ts b/services/platform/convex/agent_tools/documents/document_retrieve_tool.test.ts index d24f8c079f..00c6284b27 100644 --- a/services/platform/convex/agent_tools/documents/document_retrieve_tool.test.ts +++ b/services/platform/convex/agent_tools/documents/document_retrieve_tool.test.ts @@ -347,7 +347,7 @@ describe('retrieveDocument helper', () => { await expect( retrieveDocument(ctx as never, { fileId: 'file-storage-123' }), - ).rejects.toThrow('RAG service error (500)'); + ).rejects.toThrow(/HTTP 500/); }); it('wraps non-JSON response parse error', async () => { diff --git a/services/platform/convex/agent_tools/documents/fetch_document_comparison.test.ts b/services/platform/convex/agent_tools/documents/fetch_document_comparison.test.ts index fa26878886..1e880b455e 100644 --- a/services/platform/convex/agent_tools/documents/fetch_document_comparison.test.ts +++ b/services/platform/convex/agent_tools/documents/fetch_document_comparison.test.ts @@ -245,7 +245,7 @@ describe('fetchDocumentComparison', () => { await expect( fetchDocumentComparison(BASE_FILE_ID, COMP_FILE_ID), - ).rejects.toThrow('RAG service error (500)'); + ).rejects.toThrow(/HTTP 500/); }); it('throws timeout error when fetch is aborted', async () => { diff --git a/services/platform/convex/agent_tools/documents/fetch_document_content.test.ts b/services/platform/convex/agent_tools/documents/fetch_document_content.test.ts index 49af0c4af3..a5282d378a 100644 --- a/services/platform/convex/agent_tools/documents/fetch_document_content.test.ts +++ b/services/platform/convex/agent_tools/documents/fetch_document_content.test.ts @@ -199,9 +199,7 @@ describe('fetchDocumentContent', () => { { preconnect: vi.fn() }, ); - await expect(fetchDocumentContent(FILE_ID)).rejects.toThrow( - 'RAG service error (500)', - ); + await expect(fetchDocumentContent(FILE_ID)).rejects.toThrow(/HTTP 500/); }); it('includes error body text in non-ok error message', async () => { diff --git a/services/platform/convex/agent_tools/documents/helpers/fetch_document_comparison.ts b/services/platform/convex/agent_tools/documents/helpers/fetch_document_comparison.ts index 626cd84796..abe10b45c2 100644 --- a/services/platform/convex/agent_tools/documents/helpers/fetch_document_comparison.ts +++ b/services/platform/convex/agent_tools/documents/helpers/fetch_document_comparison.ts @@ -1,4 +1,5 @@ import { fetchJson } from '../../../../lib/utils/type-cast-helpers'; +import { UpstreamHttpError } from '../../../lib/errors/upstream_http_error'; import { ragFetch } from '../../../lib/helpers/rag_config'; const FETCH_TIMEOUT_MS = 120_000; @@ -143,8 +144,11 @@ export async function fetchDocumentComparison( if (!response.ok) { const errorText = await response.text().catch(() => ''); - throw new Error( - `RAG service error (${response.status}): ${errorText || 'Unknown error'}`, + throw UpstreamHttpError.fromResponse( + 'rag', + response, + errorText, + '/api/v1/documents/compare', ); } @@ -238,8 +242,11 @@ export async function fetchDocumentComparisonByUrls( if (!response.ok) { const errorText = await response.text().catch(() => ''); - throw new Error( - `RAG service error (${response.status}): ${errorText || 'Unknown error'}`, + throw UpstreamHttpError.fromResponse( + 'rag', + response, + errorText, + '/api/v1/documents/compare-files', ); } diff --git a/services/platform/convex/agent_tools/documents/helpers/fetch_document_content.ts b/services/platform/convex/agent_tools/documents/helpers/fetch_document_content.ts index 208aa81f57..5e2e50f47d 100644 --- a/services/platform/convex/agent_tools/documents/helpers/fetch_document_content.ts +++ b/services/platform/convex/agent_tools/documents/helpers/fetch_document_content.ts @@ -1,4 +1,5 @@ import { fetchJson } from '../../../../lib/utils/type-cast-helpers'; +import { UpstreamHttpError } from '../../../lib/errors/upstream_http_error'; import { ragFetch } from '../../../lib/helpers/rag_config'; const MAX_CONTENT_CHARS = 50_000; @@ -64,9 +65,7 @@ export async function fetchDocumentContent( if (!response.ok) { const errorText = await response.text().catch(() => ''); - throw new Error( - `RAG service error (${response.status}): ${errorText || 'Unknown error'}`, - ); + throw UpstreamHttpError.fromResponse('rag', response, errorText, path); } let result: RagContentResponse; diff --git a/services/platform/convex/agent_tools/rag/rag_search_tool.ts b/services/platform/convex/agent_tools/rag/rag_search_tool.ts index e6f8da1884..aa0e834aa4 100644 --- a/services/platform/convex/agent_tools/rag/rag_search_tool.ts +++ b/services/platform/convex/agent_tools/rag/rag_search_tool.ts @@ -21,6 +21,7 @@ import { fetchJson } from '../../../lib/utils/type-cast-helpers'; import { internal } from '../../_generated/api'; import { stripReservedPromptTags } from '../../lib/agent_response/sanitize_prompt'; import { createDebugLog } from '../../lib/debug_log'; +import { UpstreamHttpError } from '../../lib/errors/upstream_http_error'; import { orgSlugFromId } from '../../lib/helpers/org_slug'; import { ragFetch } from '../../lib/helpers/rag_config'; import { toId } from '../../lib/type_cast_helpers'; @@ -284,10 +285,16 @@ RESPONSE (list_indexed): if (!response.ok) { const errorText = await response.text().catch(() => ''); - return { - success: false, - response: `Failed to retrieve document: ${response.status} ${errorText}`, - }; + const err = UpstreamHttpError.fromResponse( + 'rag', + response, + errorText, + `/api/v1/documents/${args.fileId}/content`, + ); + // Agent-facing tool path: return the safe summary instead of throwing + // so the agent can recover (e.g. show the user "not found" rather than + // an opaque tool error). + return { success: false, response: err.safeMessage }; } interface RetrieveResponse { @@ -452,7 +459,12 @@ RESPONSE (list_indexed): if (!response.ok) { const errorText = await response.text(); - throw new Error(`RAG service error: ${response.status} ${errorText}`); + throw UpstreamHttpError.fromResponse( + 'rag', + response, + errorText, + '/api/v1/search', + ); } const result = await fetchJson(response); diff --git a/services/platform/convex/agent_tools/web/helpers/fetch_and_extract.ts b/services/platform/convex/agent_tools/web/helpers/fetch_and_extract.ts index 1f412d152a..4e63e05221 100644 --- a/services/platform/convex/agent_tools/web/helpers/fetch_and_extract.ts +++ b/services/platform/convex/agent_tools/web/helpers/fetch_and_extract.ts @@ -9,6 +9,7 @@ import type { ToolCtx } from '@convex-dev/agent'; import { fetchJson } from '../../../../lib/utils/type-cast-helpers'; import { createDebugLog } from '../../../lib/debug_log'; +import { orgSlugFromId } from '../../../lib/helpers/org_slug'; import { getCrawlerServiceUrl } from './get_crawler_service_url'; import type { WebFetchUrlResult, WebFetchExtractApiResponse } from './types'; @@ -26,6 +27,11 @@ export async function fetchAndExtract( const crawlerServiceUrl = getCrawlerServiceUrl(ctx.variables); const apiUrl = `${crawlerServiceUrl}/api/v1/web/fetch-and-extract`; + if (!ctx.organizationId) { + throw new Error('fetch_and_extract requires organizationId in ToolCtx.'); + } + const orgSlug = await orgSlugFromId(ctx, ctx.organizationId); + debugLog('tool:web:fetch_and_extract start', { url: args.url, hasInstruction: !!args.instruction, @@ -37,7 +43,10 @@ export async function fetchAndExtract( const response = await fetch(apiUrl, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + 'x-tale-org': orgSlug, + }, body: JSON.stringify({ url: args.url, instruction: args.instruction, diff --git a/services/platform/convex/auth.ts b/services/platform/convex/auth.ts index 733f566f16..37b5102648 100644 --- a/services/platform/convex/auth.ts +++ b/services/platform/convex/auth.ts @@ -12,6 +12,7 @@ import { ownerAc, } from 'better-auth/plugins/organization/access'; +import { isReservedOrgSlug } from '../lib/shared/constants/reserved-org-slugs'; import { isRecord, getString } from '../lib/utils/type-guards'; import { components, internal } from './_generated/api'; import { DataModel } from './_generated/dataModel'; @@ -575,6 +576,27 @@ export const getAuthOptions = (ctx: GenericCtx) => { beforeCreateOrganization: async (data) => { const slug = data.organization.slug; if (!slug) return; + // Refuse reserved slugs ("default") that the platform pins + // global resources to (branding, retention defaults). + // Without this, an open-signup user could claim "default" + // before the platform seed runs and inherit branding-admin. + // Exception: the platform's own first-run seed creates + // `default` when no orgs exist yet — let that one through. + if (isReservedOrgSlug(slug)) { + const anyOrg = await ctx.runQuery( + components.betterAuth.adapter.findMany, + { + model: 'organization', + paginationOpts: { cursor: null, numItems: 1 }, + where: [], + }, + ); + if (anyOrg && anyOrg.page.length > 0) { + throw new APIError('BAD_REQUEST', { + message: `Organization slug "${slug}" is reserved by the platform.`, + }); + } + } // Convex has no unique-index primitive, so enforce slug uniqueness // at application level before Better Auth's adapter writes the row. const existing = await ctx.runQuery( diff --git a/services/platform/convex/branding/internal_queries.ts b/services/platform/convex/branding/internal_queries.ts index 4b604b6b6f..6c1269ce33 100644 --- a/services/platform/convex/branding/internal_queries.ts +++ b/services/platform/convex/branding/internal_queries.ts @@ -1,22 +1,44 @@ import { v } from 'convex/values'; +import { getString, isRecord } from '../../lib/utils/type-guards'; import { components } from '../_generated/api'; import { internalQuery } from '../_generated/server'; import { toPublicUrl } from '../lib/helpers/public_storage_url'; import { isAdmin } from '../lib/rls/helpers/role_helpers'; const GLOBAL_BINDING_KEY = 'global'; +const DEFAULT_ORG_SLUG = 'default'; +/** + * Branding is pinned to the `default` org (see `branding/file_actions.ts` + * doc comment) — so admin authority over branding must require admin role + * IN THE DEFAULT ORG SPECIFICALLY, not "admin in any org". Without this + * narrowing, an admin in any user-created org could mutate the platform's + * global branding. + */ export const isCallerAdmin = internalQuery({ args: { userId: v.string() }, returns: v.boolean(), handler: async (ctx, args) => { + const orgRes = await ctx.runQuery(components.betterAuth.adapter.findMany, { + model: 'organization', + paginationOpts: { cursor: null, numItems: 1 }, + where: [{ field: 'slug', value: DEFAULT_ORG_SLUG, operator: 'eq' }], + }); + const orgRow = orgRes?.page?.[0]; + if (!isRecord(orgRow)) return false; + const defaultOrgId = getString(orgRow, '_id'); + if (!defaultOrgId) return false; + const memberRes = await ctx.runQuery( components.betterAuth.adapter.findMany, { model: 'member', - paginationOpts: { cursor: null, numItems: 10 }, - where: [{ field: 'userId', value: args.userId, operator: 'eq' }], + paginationOpts: { cursor: null, numItems: 1 }, + where: [ + { field: 'userId', value: args.userId, operator: 'eq' }, + { field: 'organizationId', value: defaultOrgId, operator: 'eq' }, + ], }, ); for (const member of memberRes?.page ?? []) { diff --git a/services/platform/convex/documents/generate_document.ts b/services/platform/convex/documents/generate_document.ts index ca2abdbf4d..a26e637d30 100644 --- a/services/platform/convex/documents/generate_document.ts +++ b/services/platform/convex/documents/generate_document.ts @@ -10,6 +10,7 @@ import { internal } from '../_generated/api'; import type { Id } from '../_generated/dataModel'; import type { ActionCtx } from '../_generated/server'; import { createDebugLog } from '../lib/debug_log'; +import { orgSlugFromId } from '../lib/helpers/org_slug'; import { buildDownloadUrl, buildRequestBody, @@ -32,6 +33,7 @@ export async function generateDocument( const endpointPath = getEndpointPath(args.sourceType, args.outputFormat); const apiUrl = `${crawlerUrl}${endpointPath}`; + const orgSlug = await orgSlugFromId(ctx, args.organizationId); const requestBody = buildRequestBody( args.sourceType, @@ -55,7 +57,10 @@ export async function generateDocument( const response = await fetch(apiUrl, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + 'x-tale-org': orgSlug, + }, body: JSON.stringify(requestBody), }); diff --git a/services/platform/convex/documents/generate_docx.ts b/services/platform/convex/documents/generate_docx.ts index 2f7a652360..32d4b5c96e 100644 --- a/services/platform/convex/documents/generate_docx.ts +++ b/services/platform/convex/documents/generate_docx.ts @@ -12,6 +12,7 @@ import { internal } from '../_generated/api'; import type { Id } from '../_generated/dataModel'; import type { ActionCtx } from '../_generated/server'; import { createDebugLog } from '../lib/debug_log'; +import { orgSlugFromId } from '../lib/helpers/org_slug'; import { buildDownloadUrl, getCrawlerUrl } from './generate_document_helpers'; const debugLog = createDebugLog('DEBUG_DOCUMENTS', '[Documents]'); @@ -62,6 +63,7 @@ export async function generateDocx( ): Promise { const crawlerUrl = getCrawlerUrl(); const apiUrl = `${crawlerUrl}/api/v1/docx`; + const orgSlug = await orgSlugFromId(ctx, args.organizationId); const requestBody = { content: args.content, @@ -74,7 +76,10 @@ export async function generateDocx( const response = await fetch(apiUrl, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + 'x-tale-org': orgSlug, + }, body: JSON.stringify(requestBody), }); diff --git a/services/platform/convex/file_metadata/internal_actions.ts b/services/platform/convex/file_metadata/internal_actions.ts index 8b615fe30b..2fb5e85fa4 100644 --- a/services/platform/convex/file_metadata/internal_actions.ts +++ b/services/platform/convex/file_metadata/internal_actions.ts @@ -7,6 +7,7 @@ import { isRecord, getNumber } from '../../lib/utils/type-guards'; import { internal } from '../_generated/api'; import { internalAction } from '../_generated/server'; import { getCrawlerUrl } from '../documents/generate_document_helpers'; +import { orgSlugFromId } from '../lib/helpers/org_slug'; import { ragAction } from '../workflow_engine/action_defs/rag/rag_action'; /** @@ -73,6 +74,7 @@ export const extractFileMetadata = internalAction({ storageId: v.id('_storage'), fileName: v.string(), contentType: v.string(), + organizationId: v.string(), attempt: v.optional(v.number()), }, returns: v.null(), @@ -117,12 +119,14 @@ export const extractFileMetadata = internalAction({ const fileBlob = await fileResponse.blob(); const crawlerUrl = getCrawlerUrl(); const endpoint = `${crawlerUrl}/api/v1/${ext}/extract-metadata`; + const orgSlug = await orgSlugFromId(ctx, args.organizationId); const formData = new FormData(); formData.append('file', fileBlob, args.fileName); const metadataResponse = await fetch(endpoint, { method: 'POST', + headers: { 'x-tale-org': orgSlug }, body: formData, signal: AbortSignal.timeout(30_000), }); @@ -196,6 +200,7 @@ export const extractFileMetadata = internalAction({ storageId: args.storageId, fileName: args.fileName, contentType: args.contentType, + organizationId: args.organizationId, attempt: attempt + 1, }, ); diff --git a/services/platform/convex/file_metadata/internal_mutations.ts b/services/platform/convex/file_metadata/internal_mutations.ts index 96f162636e..42c8f250fd 100644 --- a/services/platform/convex/file_metadata/internal_mutations.ts +++ b/services/platform/convex/file_metadata/internal_mutations.ts @@ -130,6 +130,7 @@ export const saveFileMetadata = internalMutation({ storageId: args.storageId, fileName: args.fileName, contentType: args.contentType, + organizationId: args.organizationId, }, ); diff --git a/services/platform/convex/file_metadata/mutations.ts b/services/platform/convex/file_metadata/mutations.ts index dae28fb083..1c8d9a8167 100644 --- a/services/platform/convex/file_metadata/mutations.ts +++ b/services/platform/convex/file_metadata/mutations.ts @@ -197,6 +197,7 @@ export const saveFileMetadata = mutation({ storageId: args.storageId, fileName: args.fileName, contentType: args.contentType, + organizationId: args.organizationId, }, ); diff --git a/services/platform/convex/lib/errors/__tests__/upstream_http_error.test.ts b/services/platform/convex/lib/errors/__tests__/upstream_http_error.test.ts new file mode 100644 index 0000000000..1e784fb7f1 --- /dev/null +++ b/services/platform/convex/lib/errors/__tests__/upstream_http_error.test.ts @@ -0,0 +1,95 @@ +import { describe, expect, it } from 'vitest'; + +import { + isRetryableStatus, + isUpstreamHttpError, + UpstreamHttpError, +} from '../upstream_http_error'; + +function makeResponse(status: number): Response { + // Minimal Response stand-in — UpstreamHttpError.fromResponse only reads `.status`. + return new Response(null, { status }); +} + +describe('UpstreamHttpError', () => { + it('scrubs Bearer tokens and sk- API keys from body snippet', () => { + const body = + 'Upstream complained: Authorization: Bearer sk-abcdefgh1234567890ABCDEF'; + const err = UpstreamHttpError.fromResponse( + 'rag', + makeResponse(500), + body, + '/api/v1/search', + ); + expect(err.bodySnippet).not.toMatch(/sk-abcdefgh/); + expect(err.bodySnippet).not.toMatch(/Bearer\s+sk-/); + expect(err.bodySnippet).toMatch(/REDACTED/); + // Engineer-facing message still embeds the (now-scrubbed) snippet for triage. + expect(err.message).toMatch(/REDACTED/); + // Safe message is clean of any body content. + expect(err.safeMessage).not.toMatch(/REDACTED/); + expect(err.safeMessage).toMatch(/RAG/); + }); + + it('truncates very long bodies to ~400 chars', () => { + const body = 'X'.repeat(2000); + const err = UpstreamHttpError.fromResponse( + 'rag', + makeResponse(500), + body, + '/api/v1/search', + ); + expect(err.bodySnippet.length).toBeLessThanOrEqual(401); // 400 + ellipsis + }); + + it('marks 5xx / 408 / 429 as retryable; 4xx (other) as not', () => { + expect(isRetryableStatus(500)).toBe(true); + expect(isRetryableStatus(503)).toBe(true); + expect(isRetryableStatus(429)).toBe(true); + expect(isRetryableStatus(408)).toBe(true); + expect(isRetryableStatus(400)).toBe(false); + expect(isRetryableStatus(401)).toBe(false); + expect(isRetryableStatus(404)).toBe(false); + + const fiveHundred = UpstreamHttpError.fromResponse( + 'crawler', + makeResponse(500), + 'down', + '/api/v1/web/fetch-and-extract', + ); + expect(fiveHundred.retryable).toBe(true); + + const fourHundred = UpstreamHttpError.fromResponse( + 'crawler', + makeResponse(400), + 'bad request', + '/api/v1/urls/discover', + ); + expect(fourHundred.retryable).toBe(false); + }); + + it('safe message includes service, endpoint, and status', () => { + const err = UpstreamHttpError.fromResponse( + 'crawler', + makeResponse(503), + '', + '/api/v1/web/fetch-and-extract', + ); + expect(err.safeMessage).toContain('CRAWLER'); + expect(err.safeMessage).toContain('/api/v1/web/fetch-and-extract'); + expect(err.safeMessage).toContain('503'); + }); + + it('isUpstreamHttpError narrows correctly', () => { + const err = UpstreamHttpError.fromResponse( + 'rag', + makeResponse(500), + '', + '/x', + ); + expect(isUpstreamHttpError(err)).toBe(true); + expect(isUpstreamHttpError(new Error('other'))).toBe(false); + expect(isUpstreamHttpError(null)).toBe(false); + expect(isUpstreamHttpError('string')).toBe(false); + }); +}); diff --git a/services/platform/convex/lib/errors/upstream_http_error.ts b/services/platform/convex/lib/errors/upstream_http_error.ts new file mode 100644 index 0000000000..6157044622 --- /dev/null +++ b/services/platform/convex/lib/errors/upstream_http_error.ts @@ -0,0 +1,108 @@ +/** + * Typed wrapper for non-2xx HTTP responses from upstream services + * (RAG, Crawler). Centralizes: + * + * - Body truncation + secret scrubbing (via `sanitizeError`) so raw + * provider errors with embedded API keys, filenames, or stack + * fragments never reach a thrown Error message. + * - `retryable` flag derived from status, so callers can decide + * without re-parsing the message. + * - A `safeMessage` field with a user-presentable one-liner that + * omits the body snippet entirely; UI surfaces should prefer this. + * + * Use the static factory `UpstreamHttpError.fromResponse(...)`; raw + * `new UpstreamHttpError({...})` is reserved for tests. + */ + +import { sanitizeError } from '../utils/sanitize_secrets'; + +export type UpstreamService = 'rag' | 'crawler'; + +const BODY_SNIPPET_MAX = 400; + +export interface UpstreamErrorInit { + service: UpstreamService; + status: number; + endpoint: string; + bodySnippet: string; + retryable: boolean; + safeMessage: string; +} + +/** Status codes the platform should retry on (transient upstream). */ +export function isRetryableStatus(status: number): boolean { + return status === 408 || status === 429 || (status >= 500 && status < 600); +} + +function safeMessageFor( + service: UpstreamService, + status: number, + endpoint: string, +): string { + // User-facing summary: never includes body, never includes secrets. + // Operators get the full picture from logs + the thrown Error message. + const where = `${service.toUpperCase()} ${endpoint}`; + if (status === 401 || status === 403) { + return `${where} authentication failed (HTTP ${status}).`; + } + if (status === 404) { + return `${where} returned not found (HTTP 404).`; + } + if (status === 408 || status === 429) { + return `${where} is throttling (HTTP ${status}); retry shortly.`; + } + if (status >= 500) { + return `${where} is unavailable (HTTP ${status}); retry shortly.`; + } + return `${where} returned HTTP ${status}.`; +} + +export class UpstreamHttpError extends Error { + readonly service: UpstreamService; + readonly status: number; + readonly endpoint: string; + readonly bodySnippet: string; + readonly retryable: boolean; + readonly safeMessage: string; + + constructor(init: UpstreamErrorInit) { + // Engineer-facing message: includes the scrubbed snippet for log + // triage. UI code MUST read `.safeMessage` instead of `.message` + // to keep this snippet out of user-visible surfaces. + const snippet = init.bodySnippet ? ` — ${init.bodySnippet}` : ''; + super(`${init.safeMessage}${snippet}`); + this.name = 'UpstreamHttpError'; + this.service = init.service; + this.status = init.status; + this.endpoint = init.endpoint; + this.bodySnippet = init.bodySnippet; + this.retryable = init.retryable; + this.safeMessage = init.safeMessage; + } + + /** + * Build an UpstreamHttpError from a non-2xx Response and its already-read + * body text. Callers should always `await response.text()` first (don't + * pass the unread Response — single-use body). + */ + static fromResponse( + service: UpstreamService, + response: Response, + bodyText: string, + endpoint: string, + ): UpstreamHttpError { + return new UpstreamHttpError({ + service, + status: response.status, + endpoint, + bodySnippet: sanitizeError(bodyText, BODY_SNIPPET_MAX), + retryable: isRetryableStatus(response.status), + safeMessage: safeMessageFor(service, response.status, endpoint), + }); + } +} + +/** Narrow `unknown` to UpstreamHttpError for catch-block branching. */ +export function isUpstreamHttpError(err: unknown): err is UpstreamHttpError { + return err instanceof UpstreamHttpError; +} diff --git a/services/platform/convex/websites/actions.ts b/services/platform/convex/websites/actions.ts index 67ba2dea19..c4aac91d0d 100644 --- a/services/platform/convex/websites/actions.ts +++ b/services/platform/convex/websites/actions.ts @@ -4,6 +4,7 @@ import { internal } from '../_generated/api'; import type { Id } from '../_generated/dataModel'; import { action } from '../_generated/server'; import { authComponent } from '../auth'; +import { orgSlugFromId } from '../lib/helpers/org_slug'; import { toWebsiteDomain } from './create_website'; import { deregisterDomainFromCrawler, @@ -56,7 +57,12 @@ export const createWebsite = action({ await ctx.scheduler.runAfter( 0, internal.websites.internal_actions.registerAndSync, - { websiteId, domain, scanInterval: args.scanInterval }, + { + websiteId, + domain, + scanInterval: args.scanInterval, + organizationId: args.organizationId, + }, ); return websiteId; @@ -88,8 +94,9 @@ export const deleteWebsite = action({ }, ); + const orgSlug = await orgSlugFromId(ctx, website.organizationId); // Deregister from crawler first — if this fails, the user can retry - await deregisterDomainFromCrawler(website.domain); + await deregisterDomainFromCrawler(orgSlug, website.domain); await ctx.runMutation(internal.websites.internal_mutations.deleteWebsite, { websiteId: args.websiteId, @@ -130,8 +137,13 @@ export const updateWebsite = action({ // Sync scan interval to crawler if (args.scanInterval && args.scanInterval !== website.scanInterval) { + const orgSlug = await orgSlugFromId(ctx, website.organizationId); try { - await updateCrawlerScanInterval(website.domain, args.scanInterval); + await updateCrawlerScanInterval( + orgSlug, + website.domain, + args.scanInterval, + ); } catch (error) { if ( error instanceof Error && @@ -236,13 +248,18 @@ export const fetchPages = action({ await ctx.scheduler.runAfter( 0, internal.websites.internal_actions.syncSingleWebsite, - { websiteId: args.websiteId, domain: website.domain }, + { + websiteId: args.websiteId, + domain: website.domain, + organizationId: website.organizationId, + }, ); return await ctx.runAction( internal.websites.internal_actions.fetchWebsitePages, { domain: website.domain, + organizationId: website.organizationId, offset: args.offset, limit: args.limit, }, @@ -267,7 +284,11 @@ export const fetchChunks = action({ return await ctx.runAction( internal.websites.internal_actions.fetchPageChunks, - { domain: website.domain, url: args.url }, + { + domain: website.domain, + url: args.url, + organizationId: website.organizationId, + }, ); }, }); @@ -290,7 +311,12 @@ export const searchContent = action({ return await ctx.runAction( internal.websites.internal_actions.searchWebsiteContent, - { domain: website.domain, query: args.query, limit: args.limit }, + { + domain: website.domain, + query: args.query, + organizationId: website.organizationId, + limit: args.limit, + }, ); }, }); diff --git a/services/platform/convex/websites/internal_actions.ts b/services/platform/convex/websites/internal_actions.ts index 7f0ea485d5..7b34b031b8 100644 --- a/services/platform/convex/websites/internal_actions.ts +++ b/services/platform/convex/websites/internal_actions.ts @@ -4,6 +4,7 @@ import { internal } from '../_generated/api'; import type { Id } from '../_generated/dataModel'; import { internalAction } from '../_generated/server'; import { getCrawlerUrl } from '../documents/generate_document_helpers'; +import { orgSlugFromId } from '../lib/helpers/org_slug'; import type { CrawlerChunksResponse, CrawlerPagesResponse, @@ -14,16 +15,27 @@ import type { const CRAWLER_TIMEOUT_MS = 15_000; const SYNC_INTERVAL_MS = 60 * 60 * 1000; // 1 hour +/** + * Wrap `fetch` with a timeout and inject the required `x-tale-org` + * header so every call to the crawler service routes to the correct + * org's provider catalog. Crawler enforces this header at the router + * level (`require_org_slug`); missing it returns HTTP 400. + */ function fetchWithTimeout( url: string, + orgSlug: string, init?: RequestInit, timeoutMs = CRAWLER_TIMEOUT_MS, ): Promise { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeoutMs); - return fetch(url, { ...init, signal: controller.signal }).finally(() => - clearTimeout(timer), - ); + const mergedHeaders = new Headers(init?.headers); + mergedHeaders.set('x-tale-org', orgSlug); + return fetch(url, { + ...init, + headers: mergedHeaders, + signal: controller.signal, + }).finally(() => clearTimeout(timer)); } export function scanIntervalToSeconds(interval: string): number { @@ -48,12 +60,14 @@ export function scanIntervalToSeconds(interval: string): number { } export async function registerDomainWithCrawler( + orgSlug: string, domain: string, scanInterval: string, ): Promise { const crawlerUrl = getCrawlerUrl(); const res = await fetchWithTimeout( `${crawlerUrl}/api/v1/websites`, + orgSlug, { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -73,12 +87,14 @@ export async function registerDomainWithCrawler( } export async function updateCrawlerScanInterval( + orgSlug: string, domain: string, scanInterval: string, ): Promise { const crawlerUrl = getCrawlerUrl(); const res = await fetchWithTimeout( `${crawlerUrl}/api/v1/websites/${encodeURIComponent(domain)}`, + orgSlug, { method: 'PATCH', headers: { 'Content-Type': 'application/json' }, @@ -98,11 +114,13 @@ export async function updateCrawlerScanInterval( } export async function deregisterDomainFromCrawler( + orgSlug: string, domain: string, ): Promise { const crawlerUrl = getCrawlerUrl(); const res = await fetchWithTimeout( `${crawlerUrl}/api/v1/websites/${encodeURIComponent(domain)}`, + orgSlug, { method: 'DELETE' }, ); if (!res.ok && res.status !== 404) { @@ -113,11 +131,13 @@ export async function deregisterDomainFromCrawler( } export async function fetchWebsiteInfo( + orgSlug: string, domain: string, ): Promise { const crawlerUrl = getCrawlerUrl(); const res = await fetchWithTimeout( `${crawlerUrl}/api/v1/websites/${encodeURIComponent(domain)}`, + orgSlug, ); if (res.ok) { return await res.json(); @@ -136,11 +156,13 @@ interface WebsiteForSync { } async function fetchHomepageMetadata( + orgSlug: string, domain: string, ): Promise<{ title?: string; description?: string } | null> { const crawlerUrl = getCrawlerUrl(); const res = await fetchWithTimeout( `${crawlerUrl}/api/v1/urls/fetch`, + orgSlug, { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -170,9 +192,11 @@ export const fetchAndPatchHomepage = internalAction({ args: { websiteId: v.id('websites'), domain: v.string(), + organizationId: v.string(), }, handler: async (ctx, args): Promise => { - const info = await fetchHomepageMetadata(args.domain); + const orgSlug = await orgSlugFromId(ctx, args.organizationId); + const info = await fetchHomepageMetadata(orgSlug, args.domain); if (!info) return; await ctx.runMutation(internal.websites.internal_mutations.patchWebsite, { @@ -188,6 +212,7 @@ export const syncWebsiteStatuses = internalAction({ organizationId: v.string(), }, handler: async (ctx, args): Promise => { + const orgSlug = await orgSlugFromId(ctx, args.organizationId); const websites: WebsiteForSync[] = await ctx.runQuery( internal.websites.internal_queries.listWebsitesForSync, { organizationId: args.organizationId }, @@ -202,7 +227,7 @@ export const syncWebsiteStatuses = internalAction({ } try { - const websiteInfo = await fetchWebsiteInfo(website.domain); + const websiteInfo = await fetchWebsiteInfo(orgSlug, website.domain); if (websiteInfo) { await ctx.runMutation( @@ -264,10 +289,12 @@ export const registerAndSync = internalAction({ websiteId: v.id('websites'), domain: v.string(), scanInterval: v.string(), + organizationId: v.string(), }, handler: async (ctx, args): Promise => { + const orgSlug = await orgSlugFromId(ctx, args.organizationId); try { - await registerDomainWithCrawler(args.domain, args.scanInterval); + await registerDomainWithCrawler(orgSlug, args.domain, args.scanInterval); } catch (error) { const message = error instanceof Error ? error.message : String(error); console.error( @@ -286,14 +313,22 @@ export const registerAndSync = internalAction({ await ctx.scheduler.runAfter( 0, internal.websites.internal_actions.fetchAndPatchHomepage, - { websiteId: args.websiteId, domain: args.domain }, + { + websiteId: args.websiteId, + domain: args.domain, + organizationId: args.organizationId, + }, ); // Schedule a delayed sync to pick up scan results await ctx.scheduler.runAfter( 600_000, internal.websites.internal_actions.syncSingleWebsite, - { websiteId: args.websiteId, domain: args.domain }, + { + websiteId: args.websiteId, + domain: args.domain, + organizationId: args.organizationId, + }, ); }, }); @@ -302,8 +337,10 @@ export const syncSingleWebsite = internalAction({ args: { websiteId: v.id('websites'), domain: v.string(), + organizationId: v.string(), }, handler: async (ctx, args): Promise => { + const orgSlug = await orgSlugFromId(ctx, args.organizationId); const website = await ctx.runQuery( internal.websites.internal_queries.getWebsite, { websiteId: args.websiteId }, @@ -311,7 +348,7 @@ export const syncSingleWebsite = internalAction({ if (!website) return; try { - const info = await fetchWebsiteInfo(args.domain); + const info = await fetchWebsiteInfo(orgSlug, args.domain); if (info) { await ctx.runMutation( @@ -366,16 +403,19 @@ export const syncSingleWebsite = internalAction({ export const fetchWebsitePages = internalAction({ args: { domain: v.string(), + organizationId: v.string(), offset: v.optional(v.number()), limit: v.optional(v.number()), }, - handler: async (_ctx, args) => { + handler: async (ctx, args) => { + const orgSlug = await orgSlugFromId(ctx, args.organizationId); const crawlerUrl = getCrawlerUrl(); const offset = args.offset ?? 0; const limit = args.limit ?? 100; const res = await fetchWithTimeout( `${crawlerUrl}/api/v1/pages/${encodeURIComponent(args.domain)}?offset=${offset}&limit=${limit}`, + orgSlug, ); if (!res.ok) { @@ -396,12 +436,15 @@ export const fetchPageChunks = internalAction({ args: { domain: v.string(), url: v.string(), + organizationId: v.string(), }, - handler: async (_ctx, args) => { + handler: async (ctx, args) => { + const orgSlug = await orgSlugFromId(ctx, args.organizationId); const crawlerUrl = getCrawlerUrl(); const res = await fetchWithTimeout( `${crawlerUrl}/api/v1/pages/${encodeURIComponent(args.domain)}/chunks?url=${encodeURIComponent(args.url)}`, + orgSlug, ); if (!res.ok) { @@ -421,14 +464,17 @@ export const searchWebsiteContent = internalAction({ args: { domain: v.string(), query: v.string(), + organizationId: v.string(), limit: v.optional(v.number()), }, - handler: async (_ctx, args) => { + handler: async (ctx, args) => { + const orgSlug = await orgSlugFromId(ctx, args.organizationId); const crawlerUrl = getCrawlerUrl(); const limit = args.limit ?? 10; const res = await fetchWithTimeout( `${crawlerUrl}/api/v1/search/${encodeURIComponent(args.domain)}`, + orgSlug, { method: 'POST', headers: { 'Content-Type': 'application/json' }, diff --git a/services/platform/convex/websites/rest_api.ts b/services/platform/convex/websites/rest_api.ts index b7c34b24b3..2c3a1a959a 100644 --- a/services/platform/convex/websites/rest_api.ts +++ b/services/platform/convex/websites/rest_api.ts @@ -76,6 +76,7 @@ export const createWebsite = withRestAuth('rest:api', async (rc, request) => { websiteId, domain, scanInterval: body.scanInterval, + organizationId: rc.org.organizationId, }); return jsonCreated({ id: websiteId }); @@ -127,6 +128,7 @@ export const getWebsite = withRestAuth('rest:api', async (rc, request) => { internal.websites.internal_actions.fetchWebsitePages, { domain: website.domain, + organizationId: website.organizationId, offset, limit, }, @@ -231,6 +233,7 @@ export const websiteSubActions = withRestAuth( internal.websites.internal_actions.fetchWebsitePages, { domain: website.domain, + organizationId: website.organizationId, offset, limit, }, @@ -287,6 +290,7 @@ export const websitePostActions = withRestAuth( { domain: website.domain, query: body.query, + organizationId: website.organizationId, limit: body.limit, }, ); diff --git a/services/platform/convex/workflow_engine/action_defs/crawler/crawler_action.ts b/services/platform/convex/workflow_engine/action_defs/crawler/crawler_action.ts index f153033682..78d6bc7db3 100644 --- a/services/platform/convex/workflow_engine/action_defs/crawler/crawler_action.ts +++ b/services/platform/convex/workflow_engine/action_defs/crawler/crawler_action.ts @@ -1,6 +1,7 @@ import { v } from 'convex/values'; import { createDebugLog } from '../../../lib/debug_log'; +import { orgSlugFromId } from '../../../lib/helpers/org_slug'; import type { ActionDefinition } from '../../helpers/nodes/action/types'; import type { CrawlerActionParams, @@ -51,17 +52,28 @@ export const crawlerAction: ActionDefinition = { }), ), - async execute(_ctx, params) { + async execute(ctx, params, variables) { const serviceUrl = process.env.CRAWLER_URL || 'http://localhost:8002'; const timeout = params.timeout || 1800000; + const organizationId = + typeof variables.organizationId === 'string' + ? variables.organizationId + : undefined; + if (!organizationId) { + throw new Error( + 'crawler action requires organizationId in workflow _variables.', + ); + } + const orgSlug = await orgSlugFromId(ctx, organizationId); + switch (params.operation) { case 'discover_urls': - return await discoverUrls(params, serviceUrl, timeout); + return await discoverUrls(params, serviceUrl, orgSlug, timeout); case 'fetch_urls': - return await fetchUrls(params, serviceUrl, timeout); + return await fetchUrls(params, serviceUrl, orgSlug, timeout); case 'query_urls': - return await queryUrls(params, serviceUrl, timeout); + return await queryUrls(params, serviceUrl, orgSlug, timeout); default: throw new Error( `Unknown crawler operation: ${(params as { operation: string }).operation}`, @@ -88,6 +100,7 @@ type QueryUrlsParams = Extract< async function discoverUrls( params: DiscoverUrlsParams, serviceUrl: string, + orgSlug: string, timeout: number, ): Promise { let domain = params.domain; @@ -119,6 +132,7 @@ async function discoverUrls( method: 'POST', headers: { 'Content-Type': 'application/json', + 'x-tale-org': orgSlug, }, body: JSON.stringify(payload), signal: controller.signal, @@ -158,6 +172,7 @@ async function discoverUrls( async function fetchUrls( params: FetchUrlsParams, serviceUrl: string, + orgSlug: string, timeout: number, ): Promise { const payload = { @@ -175,6 +190,7 @@ async function fetchUrls( method: 'POST', headers: { 'Content-Type': 'application/json', + 'x-tale-org': orgSlug, }, body: JSON.stringify(payload), signal: controller.signal, @@ -206,6 +222,7 @@ async function fetchUrls( async function queryUrls( params: QueryUrlsParams, serviceUrl: string, + orgSlug: string, timeout: number, ): Promise { const searchParams = new URLSearchParams(); @@ -224,7 +241,10 @@ async function queryUrls( const response = await fetch( `${serviceUrl}/api/v1/websites/${encodeURIComponent(params.domain)}/urls?${searchParams}`, - { signal: controller.signal }, + { + headers: { 'x-tale-org': orgSlug }, + signal: controller.signal, + }, ); clearTimeout(timeoutId); diff --git a/services/platform/convex/workflow_engine/action_defs/document/document_action.ts b/services/platform/convex/workflow_engine/action_defs/document/document_action.ts index 9c49f699e8..42c567c370 100644 --- a/services/platform/convex/workflow_engine/action_defs/document/document_action.ts +++ b/services/platform/convex/workflow_engine/action_defs/document/document_action.ts @@ -582,7 +582,16 @@ export const documentAction: ActionDefinition = { } case 'extract_docx_structured': { - return await extractDocxStructured(ctx, params.fileId); + const organizationId = + typeof _variables.organizationId === 'string' + ? _variables.organizationId + : undefined; + if (!organizationId) { + throw new Error( + 'extract_docx_structured requires organizationId in workflow _variables.', + ); + } + return await extractDocxStructured(ctx, params.fileId, organizationId); } case 'apply_docx_structured': { @@ -590,6 +599,11 @@ export const documentAction: ActionDefinition = { typeof _variables.organizationId === 'string' ? _variables.organizationId : undefined; + if (!organizationId) { + throw new Error( + 'apply_docx_structured requires organizationId in workflow _variables.', + ); + } return await applyDocxStructured(ctx, { templateFileId: params.templateFileId, diff --git a/services/platform/convex/workflow_engine/action_defs/document/helpers/apply_docx_structured.ts b/services/platform/convex/workflow_engine/action_defs/document/helpers/apply_docx_structured.ts index c4dddcf758..3ea45ce39f 100644 --- a/services/platform/convex/workflow_engine/action_defs/document/helpers/apply_docx_structured.ts +++ b/services/platform/convex/workflow_engine/action_defs/document/helpers/apply_docx_structured.ts @@ -19,6 +19,7 @@ import { getCrawlerUrl, } from '../../../../documents/generate_document_helpers'; import { createDebugLog } from '../../../../lib/debug_log'; +import { orgSlugFromId } from '../../../../lib/helpers/org_slug'; import { toId } from '../../../../lib/type_cast_helpers'; const debugLog = createDebugLog('DEBUG_DOCUMENTS', '[Documents]'); @@ -53,9 +54,9 @@ export interface ApplyDocxStructuredArgs { sourceHash: string; modifications: Modification[]; fileName: string; + organizationId: string; trackChanges?: boolean; author?: string; - organizationId?: string; } export interface ApplyDocxStructuredResult { @@ -116,8 +117,11 @@ export async function applyDocxStructured( const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 300_000); + const orgSlug = await orgSlugFromId(ctx, args.organizationId); + const response = await fetch(apiUrl, { method: 'POST', + headers: { 'x-tale-org': orgSlug }, body: formData, signal: controller.signal, }); @@ -162,20 +166,18 @@ export async function applyDocxStructured( ? args.fileName : `${args.fileName}.docx`; - // Save file metadata if organizationId is available - if (args.organizationId) { - await ctx.runMutation( - internal.file_metadata.internal_mutations.saveFileMetadata, - { - organizationId: args.organizationId, - storageId, - fileName: finalFileName, - contentType: DOCX_CONTENT_TYPE, - size: docxBytes.length, - source: 'agent', - }, - ); - } + // Save file metadata so the file shows up in the org's library. + await ctx.runMutation( + internal.file_metadata.internal_mutations.saveFileMetadata, + { + organizationId: args.organizationId, + storageId, + fileName: finalFileName, + contentType: DOCX_CONTENT_TYPE, + size: docxBytes.length, + source: 'agent', + }, + ); const downloadUrl = buildDownloadUrl(storageId, finalFileName); diff --git a/services/platform/convex/workflow_engine/action_defs/document/helpers/extract_docx_structured.ts b/services/platform/convex/workflow_engine/action_defs/document/helpers/extract_docx_structured.ts index a057e9bffc..4a7d1fc5b1 100644 --- a/services/platform/convex/workflow_engine/action_defs/document/helpers/extract_docx_structured.ts +++ b/services/platform/convex/workflow_engine/action_defs/document/helpers/extract_docx_structured.ts @@ -11,6 +11,7 @@ import { fetchJson } from '../../../../../lib/utils/type-cast-helpers'; import type { ActionCtx } from '../../../../_generated/server'; import { getCrawlerUrl } from '../../../../documents/generate_document_helpers'; import { createDebugLog } from '../../../../lib/debug_log'; +import { orgSlugFromId } from '../../../../lib/helpers/org_slug'; import { toId } from '../../../../lib/type_cast_helpers'; const debugLog = createDebugLog('DEBUG_DOCUMENTS', '[Documents]'); @@ -38,9 +39,11 @@ export interface ExtractDocxStructuredResult { export async function extractDocxStructured( ctx: ActionCtx, fileId: string, + organizationId: string, ): Promise { const crawlerUrl = getCrawlerUrl(); const apiUrl = `${crawlerUrl}/api/v1/docx/extract-structured`; + const orgSlug = await orgSlugFromId(ctx, organizationId); debugLog('extractDocxStructured start', { fileId }); @@ -59,6 +62,7 @@ export async function extractDocxStructured( const response = await fetch(apiUrl, { method: 'POST', + headers: { 'x-tale-org': orgSlug }, body: formData, signal: controller.signal, }); diff --git a/services/platform/convex/workflow_engine/action_defs/rag/helpers/delete_document.ts b/services/platform/convex/workflow_engine/action_defs/rag/helpers/delete_document.ts index b71a449288..29d033c86b 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/helpers/delete_document.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/helpers/delete_document.ts @@ -8,6 +8,7 @@ import { isRecord, } from '../../../../../lib/utils/type-guards'; import { internalAction } from '../../../../_generated/server'; +import { UpstreamHttpError } from '../../../../lib/errors/upstream_http_error'; import { ragFetch } from '../../../../lib/helpers/rag_config'; import type { RagDeleteResult } from './types'; @@ -54,7 +55,12 @@ export async function deleteDocumentById({ if (!response.ok) { const errorText = await response.text(); - throw new Error(`RAG service error: ${response.status} ${errorText}`); + throw UpstreamHttpError.fromResponse( + 'rag', + response, + errorText, + `/api/v1/documents/${fileId}`, + ); } const rawResult: unknown = await response.json(); diff --git a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.test.ts b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.test.ts index 6124589290..25473ca191 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.test.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.test.ts @@ -141,20 +141,33 @@ describe('uploadFile', () => { expect(calledUrl).toBe(`${RAG_URL}/api/v1/documents/upload`); }); - it('throws on non-ok response with status info', async () => { + it('throws UpstreamHttpError with sanitized body snippet on non-ok response', async () => { mockFetchError(500, 'Internal Server Error', 'something broke'); - await expect(uploadFile(defaultArgs())).rejects.toThrow( - 'RAG service error: 500 Internal Server Error - something broke', + const err = await uploadFile(defaultArgs()).then( + () => null, + (e: unknown) => e, ); + expect(err).toBeInstanceOf(Error); + expect((err as Error).name).toBe('UpstreamHttpError'); + // Engineer-facing .message embeds the safe-summary + sanitized body. + expect((err as Error).message).toMatch(/HTTP 500/); + expect((err as Error).message).toMatch(/something broke/); + // Retryable for 5xx — caller can decide whether to bounce. + expect((err as { retryable?: boolean }).retryable).toBe(true); }); - it('throws on non-ok response without body', async () => { + it('throws UpstreamHttpError on non-ok response with empty body', async () => { mockFetchError(502, 'Bad Gateway'); - await expect(uploadFile(defaultArgs())).rejects.toThrow( - 'RAG service error: 502 Bad Gateway', + const err = await uploadFile(defaultArgs()).then( + () => null, + (e: unknown) => e, ); + expect(err).toBeInstanceOf(Error); + expect((err as Error).name).toBe('UpstreamHttpError'); + expect((err as Error).message).toMatch(/HTTP 502/); + expect((err as { retryable?: boolean }).retryable).toBe(true); }); it('returns correct RagUploadResult shape on success', async () => { diff --git a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.ts b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.ts index d3d0b5b62c..3444b127fe 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/helpers/upload_file_direct.ts @@ -1,3 +1,4 @@ +import { UpstreamHttpError } from '../../../../lib/errors/upstream_http_error'; import { ragFetch } from '../../../../lib/helpers/rag_config'; import type { RagUploadResult } from './types'; @@ -61,9 +62,7 @@ export async function uploadFile({ if (!response.ok) { const errorText = await response.text(); - throw new Error( - `RAG service error: ${response.status} ${response.statusText}${errorText ? ` - ${errorText}` : ''}`, - ); + throw UpstreamHttpError.fromResponse('rag', response, errorText, path); } // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- typed response diff --git a/services/platform/convex/workflow_engine/action_defs/rag/rag_action.ts b/services/platform/convex/workflow_engine/action_defs/rag/rag_action.ts index 7a41435aa7..d9a036d942 100644 --- a/services/platform/convex/workflow_engine/action_defs/rag/rag_action.ts +++ b/services/platform/convex/workflow_engine/action_defs/rag/rag_action.ts @@ -5,6 +5,7 @@ import { internal } from '../../../_generated/api'; import type { ActionCtx } from '../../../_generated/server'; import type { SearchResponse } from '../../../agent_tools/rag/format_search_results'; import { fetchDocumentChunks } from '../../../agent_tools/rag/helpers/fetch_document_chunks'; +import { UpstreamHttpError } from '../../../lib/errors/upstream_http_error'; import { orgSlugFromId } from '../../../lib/helpers/org_slug'; import { ragFetch } from '../../../lib/helpers/rag_config'; import { toId } from '../../../lib/type_cast_helpers'; @@ -124,8 +125,11 @@ export const ragAction: ActionDefinition = { if (!response.ok) { const errorText = await response.text().catch(() => ''); - throw new Error( - `RAG search error (${response.status}): ${errorText || 'Unknown error'}`, + throw UpstreamHttpError.fromResponse( + 'rag', + response, + errorText, + '/api/v1/search', ); } diff --git a/services/platform/lib/config-watcher.ts b/services/platform/lib/config-watcher.ts index 1ee647ed3a..b9ac31862e 100644 --- a/services/platform/lib/config-watcher.ts +++ b/services/platform/lib/config-watcher.ts @@ -18,10 +18,25 @@ const ATOMIC_WRITE_TMP_RE = /\.\d+\.[a-f0-9]{8}\.tmp$/; // Must match validateOrgSlug at services/platform/convex/lib/file_io.ts. const ORG_SLUG_REGEX = /^[a-z0-9][a-z0-9_-]*$/; +/** + * Tail-debounce window for SSE invalidations: events arriving within this + * window for the same (type, orgSlug, slug) key collapse to a single + * delivery. Bulk operations (org migrations, mass `git mv`) would + * otherwise fan out one SSE frame per file × per connected client. + */ +const EMIT_DEBOUNCE_MS = 100; + /** * Parse a relative path within the config directory into a structured event, * under the uniform org-first layout `${TALE_CONFIG_DIR}///...`. * + * Per-domain file filter (a write must match the domain's content shape; + * otherwise the event is dropped): + * - agents / workflows / providers / branding / integrations: `.json` only + * - skills: any file (`SKILL.md`, `scripts/*.py`, assets) — skill query + * keys are invalidated at slug granularity, so any write under the slug + * dir must emit. + * * Examples (with `default` as one possible orgSlug): * default/agents/my-agent.json → { type: 'agents', orgSlug: 'default', slug: 'my-agent' } * acme/agents/my-agent.json → { type: 'agents', orgSlug: 'acme', slug: 'my-agent' } @@ -29,11 +44,16 @@ const ORG_SLUG_REGEX = /^[a-z0-9][a-z0-9_-]*$/; * default/integrations/slack/config.json → { type: 'integrations', orgSlug: 'default', slug: 'slack' } * default/branding/branding.json → { type: 'branding', orgSlug: 'default' } * default/skills/code-reviewer/SKILL.md → { type: 'skills', orgSlug: 'default', slug: 'code-reviewer' } + * default/skills/code-reviewer/scripts/x.py → { type: 'skills', orgSlug: 'default', slug: 'code-reviewer' } * * Returns null for paths that don't fit the `//` shape - * (org slug must validate; domain must be recognized). + * (org slug must validate; domain must be recognized; per-domain filter must + * pass; secret sidecars dropped). */ function parseConfigChange(relativePath: string): ConfigChangeEvent | null { + // Secret sidecars are written by operators only; never broadcast. + if (relativePath.endsWith('.secrets.json')) return null; + const parts = relativePath.split('/'); if (parts.length < 2) return null; @@ -45,6 +65,7 @@ function parseConfigChange(relativePath: string): ConfigChangeEvent | null { if (domain === 'branding') { // Branding is default-only on the read side, but still emit per-org so // future per-org branding (or operator inspection) sees the event. + if (!relativePath.endsWith('.json')) return null; return { type: 'branding', orgSlug }; } @@ -63,24 +84,28 @@ function parseConfigChange(relativePath: string): ConfigChangeEvent | null { if (rest.length === 0) return null; if (type === 'agents') { + if (!relativePath.endsWith('.json')) return null; // /agents/.json const filename = rest[0]; return { type, orgSlug, slug: filename.replace(/\.json$/, '') }; } if (type === 'workflows') { + if (!relativePath.endsWith('.json')) return null; // /workflows/[folder/]name.json — slug is the path without extension const slug = rest.join('/').replace(/\.json$/, ''); return { type, orgSlug, slug }; } if (type === 'integrations') { + if (!relativePath.endsWith('.json')) return null; // /integrations//config.json (or other bundle files) const slug = rest[0]; return { type, orgSlug, slug }; } if (type === 'providers') { + if (!relativePath.endsWith('.json')) return null; // /providers/.json const filename = rest[0]; return { type, orgSlug, slug: filename.replace(/\.json$/, '') }; @@ -113,27 +138,42 @@ export function createConfigWatcher(configDir: string): ConfigWatcher { ], }); + // Per-key tail debounce: collapses bursts of events for the same + // (type, orgSlug, slug) so a bulk operation (e.g. mass migration) + // doesn't fan out one SSE frame per file per connected client. + const pending = new Map>(); + + const emitDebounced = (event: ConfigChangeEvent) => { + const key = `${event.type}:${event.orgSlug ?? ''}:${event.slug ?? ''}`; + const existing = pending.get(key); + if (existing) clearTimeout(existing); + pending.set( + key, + setTimeout(() => { + pending.delete(key); + for (const cb of callbacks) { + cb(event); + } + }, EMIT_DEBOUNCE_MS), + ); + }; + watcher.on('all', (_eventName, filePath) => { const rel = relative(configDir, filePath); - - // Only react to JSON file changes; ignore secret sidecar files - if (!rel.endsWith('.json')) return; - if (rel.endsWith('.secrets.json')) return; - const event = parseConfigChange(rel); if (!event) return; - - for (const cb of callbacks) { - cb(event); - } + emitDebounced(event); }); return { onChange(callback) { callbacks.push(callback); }, - close() { - return watcher.close(); + async close() { + // Drop any pending debounced events so we don't emit after close. + for (const t of pending.values()) clearTimeout(t); + pending.clear(); + await watcher.close(); }, }; } diff --git a/services/platform/lib/shared/constants/reserved-org-slugs.ts b/services/platform/lib/shared/constants/reserved-org-slugs.ts new file mode 100644 index 0000000000..e2a84342a6 --- /dev/null +++ b/services/platform/lib/shared/constants/reserved-org-slugs.ts @@ -0,0 +1,19 @@ +/** + * Org slugs that the platform reserves and refuses to assign to + * user-created organizations. + * + * `default` is reserved because the platform pins several global + * resources to it (branding, retention defaults, scaffold seed + * target). If a user could claim that slug they'd inherit those + * globals, including the ability to mutate platform branding via + * `isCallerAdmin` (see `convex/branding/internal_queries.ts`). + * + * Importable from both Convex (`convex/auth.ts`) and the React + * organization form — kept in `lib/shared/constants/` so it stays + * Node-runtime-neutral. + */ +export const RESERVED_ORG_SLUGS: ReadonlySet = new Set(['default']); + +export function isReservedOrgSlug(slug: string): boolean { + return RESERVED_ORG_SLUGS.has(slug.toLowerCase()); +} diff --git a/services/rag/app/models.py b/services/rag/app/models.py index fa021223ef..a824f1e4a3 100644 --- a/services/rag/app/models.py +++ b/services/rag/app/models.py @@ -19,13 +19,16 @@ class HealthResponse(BaseModel): class ConfigResponse(BaseModel): - """Configuration response (non-sensitive values only).""" + """Configuration response (non-sensitive values only). + + LLM-specific fields (model name, embedding model) require an + `org_slug` to resolve in the multi-org world and are intentionally + omitted from this endpoint; query the per-org config separately. + """ host: str port: int log_level: str - openai_model: str - openai_embedding_model: str chunk_size: int chunk_overlap: int top_k: int diff --git a/services/rag/app/routers/health.py b/services/rag/app/routers/health.py index 0b5c686969..24a7a08689 100644 --- a/services/rag/app/routers/health.py +++ b/services/rag/app/routers/health.py @@ -13,7 +13,7 @@ from typing import Any -from fastapi import APIRouter, HTTPException, status +from fastapi import APIRouter from loguru import logger from .. import __version__ @@ -87,22 +87,13 @@ async def get_config(): Auth-gated via the protected router; before round-2 v15 this leaked deployment fingerprints (model names, host/port, chunking params) - to any caller with reach to the RAG port. + to any caller with reach to the RAG port. LLM/embedding model names + require an `org_slug` to resolve and are omitted here. """ - try: - llm_config = settings.get_llm_config() - except ValueError as exc: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="LLM configuration not available", - ) from exc - return ConfigResponse( host=settings.host, port=settings.port, log_level=settings.log_level, - openai_model=llm_config.get("model", ""), - openai_embedding_model=llm_config.get("embedding_model", ""), chunk_size=settings.chunk_size, chunk_overlap=settings.chunk_overlap, top_k=settings.top_k, diff --git a/tools/cli/src/commands/deploy/index.ts b/tools/cli/src/commands/deploy/index.ts index c7d9816dd5..b96da2b814 100644 --- a/tools/cli/src/commands/deploy/index.ts +++ b/tools/cli/src/commands/deploy/index.ts @@ -60,9 +60,13 @@ export function createDeployCommand(): Command { // (typical: a new `SANDBOX_TOKEN` for an existing deployment), // force-recreate the running services so their in-memory env // refreshes to the new value rather than keeping the stale null. + // Also force-recreate on --override-all so the reseed action + // runs against the new binary, not a stale container that the + // image/config-unchanged path would have left running. const forceRecreate = - regeneratedAutoSecrets !== undefined && - regeneratedAutoSecrets.length > 0; + (regeneratedAutoSecrets !== undefined && + regeneratedAutoSecrets.length > 0) || + (options.overrideAll ?? false); const env = loadEnv(projectDir); const version = pkg.version.includes('-dev') ? 'latest' : pkg.version; diff --git a/tools/cli/src/lib/actions/deploy.ts b/tools/cli/src/lib/actions/deploy.ts index 3166dde853..82ee5fcec0 100644 --- a/tools/cli/src/lib/actions/deploy.ts +++ b/tools/cli/src/lib/actions/deploy.ts @@ -145,8 +145,22 @@ export async function deploy(options: DeployOptions): Promise { const prefix = dryRun ? '[DRY-RUN] ' : ''; logger.header(`${prefix}Deploying Tale ${version}`); - // (Auto-migration framework removed — `tale migrate config-layout` is - // the only opt-in, manually-run migration now.) + // Auto-migration framework removed — `tale migrate config-layout` is + // the only opt-in, manually-run migration now. Fail fast (before + // pulling images / rolling services) if the project still has the + // pre-refactor flat layout at the root; otherwise a no-op deploy + // could complete while the host config silently never reaches the + // container. + { + const { legacyDirs } = await findOrgDirs(env.DEPLOY_DIR); + if (legacyDirs.length > 0) { + throw new Error( + `Legacy flat layout detected at project root (${legacyDirs.join(', ')}/). ` + + `Run 'tale migrate config-layout' then 'tale deploy --override-all -y' ` + + `(see docs/self-hosted/operate/upgrades.md).`, + ); + } + } // Check if this is a first-time deployment const currentColor = await getCurrentColor(env.DEPLOY_DIR); @@ -572,7 +586,10 @@ export async function deploy(options: DeployOptions): Promise { `${prefix}Dry-run complete! Would deploy version ${version}`, ); } else { - logger.success(`Deployment complete! Version ${version} is now live`); + // Containers are now rolled. Don't print "Deployment complete!" + // yet — that announces success over the wire, but sync + reseed + // still run below and either can abort the deploy. + logger.info(`${prefix}Services updated to version ${version}.`); } // Sync project files to the convex container (owns convex-data volume rw) @@ -594,6 +611,10 @@ export async function deploy(options: DeployOptions): Promise { assumeYes: options.assumeYes ?? false, }); } + + if (!dryRun) { + logger.success(`Deployment complete! Version ${version} is now live`); + } }); } finally { process.removeListener('SIGINT', onInterrupt); @@ -681,14 +702,11 @@ async function syncProjectFiles( const { orgDirs, legacyDirs } = await findOrgDirs(projectDir); if (legacyDirs.length > 0) { - logger.error( - `${prefix}Legacy flat layout detected at project root (${legacyDirs.join(', ')}/).`, + throw new Error( + `Legacy flat layout detected at project root (${legacyDirs.join(', ')}/). ` + + `Run 'tale migrate config-layout' then 'tale deploy --override-all -y' ` + + `(see docs/self-hosted/operate/upgrades.md).`, ); - logger.info( - `${prefix} Move config under 'default//' (or run 'tale init --force' to rescaffold).`, - ); - logger.info(`${prefix} Aborting --override push.`); - return; } if (orgDirs.length === 0) { diff --git a/tools/cli/src/lib/actions/start.ts b/tools/cli/src/lib/actions/start.ts index 2f86fa701e..51e2e56572 100644 --- a/tools/cli/src/lib/actions/start.ts +++ b/tools/cli/src/lib/actions/start.ts @@ -223,6 +223,7 @@ export async function start(options: StartOptions): Promise { { version, registry: env.GHCR_REGISTRY }, hostAlias, port, + { projectDir }, ); const overrideFile = findComposeOverride(projectDir); From b2db3161f133466a98ea96b33af6805d635e236e Mon Sep 17 00:00:00 2001 From: larryro <371767072@qq.com> Date: Thu, 28 May 2026 15:01:17 +0800 Subject: [PATCH 04/41] fix(platform,cli,crawler,rag,docs): close P0/P1 gaps from second-round review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-round multi-agent review of refactor/uniform-org-first-config-layout surfaced 5 P0s and ~30 P1s. This commit closes all of them. P0 — cross-tenant isolation - workflow_engine RAG delete_document, document extract/apply_docx_structured: add verifyStorageIdsBelongToOrg guards (mirror compare/retrieve pattern). - crawler vision: refactor VisionClient + process_pages_with_llm to per-org _org_states / _chat_states keyed by get_active_org(); previously a 15s TTL singleton leaked org A's API key to org B's request within the window. llm_cache (OCR/desc/LLM) entries are now org-scoped via _scoped_key so the same content from two orgs never collides. - New test_vision_isolation.py locks the invariant down (6 cases). P1 — abstractions and CLI/upgrade flow - UpstreamHttpError: .message = safeMessage only (snippet kept on .bodySnippet so it doesn't cross the Convex client boundary as a default toast); parse Retry-After into retryAfterMs; endpoint defaults to response.url; new toConvexError() carries structured fields across the wire; 9 new tests covering 401/403/404/429 carve-outs, Retry-After parsing, ConvexError marshalling. - Migrate 8 raw `new Error` sites to UpstreamHttpError.fromResponse (crawler_action ×3, file_metadata, fetch_document_comparison 4xx paths, web fetch_and_extract, docx extract/apply structured helpers). - delete_document re-throws retryable upstream errors instead of folding them into {success:false}, so action retries can recover. - rag_search_tool search-path now mirrors retrieve-path: returns safe summary instead of throwing past the agent runtime. P1 — shared utilities (dedup) - New lib/shared/constants/org-slug.ts owns ORG_SLUG_REGEX, isValidOrgSlug, assertValidOrgSlug — replaces 3 inline copies (file_io.ts, config-watcher.ts, reseed_all_orgs.ts) and tightens the bash regex in migrate-config-layout/script.sh. - lib/file_io.ts gains getConfigRoot(area?) and safeJoinWithinDir helpers; 6 file_utils.ts files + config_store/store.ts drop ~80 lines of copy-pasted path-traversal guards. - organizations/resolve_org_slug.ts now re-exports orgSlugFromId (single implementation across ~46 callers). P1 — CLI upgrade flow alignment - migrate-config-layout/script.sh: pre-scan dst-collisions, SKIP notices to stdout (only ERROR on stderr), invalid org slug surfaces as conflict+error not silent skip. - start.ts: import LEGACY_DOMAIN_DIR_NAMES from deploy.ts (closes the missing 'retention' drift), hard-fail on legacy layout (consistent with deploy), ensureEnv unconditional (matches deploy semantics for auto-secret refresh). - migrate-config-layout.ts: actionable error when convex container isn't running; help text says "byte-for-byte" not "sha256" (matches cmp -s implementation). - Three-locale docs/upgrades.md: drop "(and other config)" overpromise, reflect deploy hard-fail (not "starts up empty"), document old- container-must-be-running prereq for step 1, fix DE/FR grammar ("du läufst" → "du ausführst", "neu walkst" → "neu durchgehst", "re-walks" → "reparcours"). P1 — reseed CLI robustness - reseed-all-orgs.ts: line-aware trailing-JSON parser (replaces fragile lastIndexOf('{')); grep `|| true` so grep zero-matches don't poison pipefail; failure branch parses payload too so failed-slug detail reaches CI logs; timeout-124 exit gets a distinct "timed out, safe to re-run" message. - reseed_all_orgs.ts: invalid betterAuth slugs flow into results; pagination guards against stuck cursor + 1000-page cap. P1 — RAG internal concurrency - search() returns (results, usage) tuple — drops the mutable self.last_search_usage singleton that mis-attributed tokens under concurrent calls. - Module-level _pin_dim_lock serializes the first _pinned_dims write across orgs (was racing past `if dims is None`). - _org_locks LRU-capped at 256 to bound memory if a caller ever sprays distinct slugs. - shutdown() drains _background_tasks before close_pool(). P1 — crawler data correctness - DELETE FROM chunks: add `AND domain = $2` so same URL path on two domains doesn't over-delete. - delete_page_chunks now accepts optional domain arg. - pg_website_store: parse asyncpg DELETE tag as integer (was literal string compare against "DELETE 0"). P1 — branding hardening - requireBrandingAdmin: trusted-headers branch no longer short- circuits past isCallerAdmin's default-org check. - safeGetUrl in getLegacyBranding now logs instead of swallowing. - saveImage/deleteImage readdir errors: distinguish ENOENT from EACCES/EISDIR. - server.ts branding route: explicit Content-Type allowlist + sep- bounded prefix check (defense in depth over the existing filename validator). P1 — docker entrypoint + 2026-03-28 migration script - FORCE_SEED default ("false") so script stays correct under any future set -u audit. - $data_dir single source of truth — drops /app/data hardcodes that diverged from $TALE_CONFIG_DIR. - chown -R replaced with `find ! -user app -exec chown app:app` so large volumes don't re-walk every startup. - POSTGRES_URL parsing handles bracketed IPv6 ([::1]) and URL- encoded password segments (pure-bash, no python dependency). - mkdir + atomic_cp chained with `&&` instead of `;` so a failed mkdir doesn't cause a misattributed copy diagnostic. - 2026-03-28 migration: drop `2>/dev/null` on cp so I/O errors surface; keep `|| true` only for the empty-glob case. P1 — file_metadata retry classification - extractFileMetadata uses isUpstreamHttpError to distinguish transient (5xx/408/429 → retry) from permanent (4xx, org-slug lookup failure → markFailed). Earlier retried permanent errors N times burning scheduler slots. P1 — auth + org form - beforeCreateOrganization: lowercase-normalize slug BEFORE reservation + uniqueness checks (closes Default/default cased bypass); assertValidOrgSlug on entry. - New beforeUpdateOrganization hook: same guards on rename so owners can't claim reserved slugs post-creation. - organization-form.tsx: extract deriveOrgSlug helper (was inlined three places); route Zod refine messages through useT (was hardcoded English); add three-locale i18n keys. P1 — scaffold test coverage - Add tests for invalid-slug skipped:true return, retention override on/off, strict:true aggregated throw, non-strict aggregated result. Verification - bun run check: all lint + type + test suites pass. - Platform: 274 test files, 70941 assertions green. - Crawler: 487 tests, RAG: 298 tests. --- docs/de/self-hosted/operate/upgrades.md | 22 ++- docs/en/self-hosted/operate/upgrades.md | 17 +- docs/fr/self-hosted/operate/upgrades.md | 22 ++- scripts/2026-03-28-migrate-convex-data.sh | 8 +- services/convex/docker-entrypoint.sh | 63 ++++++- .../crawler/app/services/indexing_service.py | 21 ++- .../crawler/app/services/pg_website_store.py | 17 +- services/crawler/app/services/vision/cache.py | 58 ++++-- .../app/services/vision/openai_client.py | 162 +++++++++++----- services/crawler/tests/conftest.py | 18 ++ .../crawler/tests/test_vision_isolation.py | 174 ++++++++++++++++++ .../components/organization-form.tsx | 61 +++--- .../settings/governance/hooks/mutations.ts | 2 +- .../fetch_document_comparison.test.ts | 10 +- .../documents/fetch_document_content.test.ts | 14 +- .../helpers/fetch_document_comparison.ts | 17 +- .../agent_tools/rag/query_rag_context.ts | 36 +++- .../convex/agent_tools/rag/rag_search_tool.ts | 22 ++- .../web/helpers/fetch_and_extract.ts | 17 +- .../web/helpers/query_web_context.ts | 5 +- .../agent_tools/web/helpers/search_pages.ts | 20 +- services/platform/convex/agents/file_utils.ts | 31 +--- services/platform/convex/auth.ts | 66 ++++++- .../platform/convex/branding/file_actions.ts | 29 ++- .../platform/convex/branding/file_utils.ts | 42 +---- .../convex/branding/internal_queries.ts | 11 +- .../convex/file_metadata/internal_actions.ts | 29 ++- .../convex/integrations/file_utils.ts | 31 +--- .../platform/convex/lib/config_store/store.ts | 38 ++-- .../__tests__/upstream_http_error.test.ts | 110 ++++++++++- .../convex/lib/errors/upstream_http_error.ts | 99 +++++++++- services/platform/convex/lib/file_io.ts | 51 ++++- .../platform/convex/lib/helpers/rag_config.ts | 12 +- .../convex/organizations/reseed_all_orgs.ts | 43 ++++- .../organizations/resolve_org_slug.test.ts | 4 +- .../convex/organizations/resolve_org_slug.ts | 32 +--- .../convex/organizations/scaffold.test.ts | 96 +++++++++- .../platform/convex/providers/file_utils.ts | 44 ++--- services/platform/convex/skills/file_utils.ts | 41 ++--- .../action_defs/crawler/crawler_action.ts | 28 ++- .../action_defs/document/document_action.ts | 21 +++ .../document/helpers/apply_docx_structured.ts | 8 +- .../helpers/extract_docx_structured.ts | 8 +- .../rag/helpers/delete_document.test.ts | 18 +- .../rag/helpers/delete_document.ts | 35 +++- .../rag/helpers/upload_file_direct.test.ts | 8 +- .../action_defs/rag/rag_action.ts | 3 + .../platform/convex/workflows/file_utils.ts | 34 ++-- services/platform/lib/config-watcher.ts | 4 +- .../platform/lib/shared/constants/org-slug.ts | 32 ++++ services/platform/messages/de.json | 3 + services/platform/messages/en.json | 3 + services/platform/messages/fr.json | 3 + services/platform/server.ts | 31 +++- services/rag/app/routers/search.py | 5 +- services/rag/app/services/rag_service.py | 90 ++++++--- services/rag/tests/test_rag_service.py | 54 ++++-- tools/cli/src/commands/migrate.ts | 4 +- tools/cli/src/lib/actions/deploy.ts | 14 +- .../src/lib/actions/migrate-config-layout.ts | 19 +- tools/cli/src/lib/actions/reseed-all-orgs.ts | 95 +++++++--- tools/cli/src/lib/actions/start.ts | 60 +++--- .../src/lib/migrate-config-layout/script.sh | 49 ++++- 63 files changed, 1668 insertions(+), 556 deletions(-) create mode 100644 services/crawler/tests/test_vision_isolation.py create mode 100644 services/platform/lib/shared/constants/org-slug.ts diff --git a/docs/de/self-hosted/operate/upgrades.md b/docs/de/self-hosted/operate/upgrades.md index db5eee2a08..887b37d01e 100644 --- a/docs/de/self-hosted/operate/upgrades.md +++ b/docs/de/self-hosted/operate/upgrades.md @@ -18,7 +18,7 @@ Zwei Dinge sind es wert, zuerst zu bestätigen: ## Die zwei Kommandos -`tale upgrade` aktualisiert das CLI-Binary selbst. Die deployte Plattform-Version stimmt mit der Version des CLI überein — diese Kopplung ist Absicht, damit das CLI, das du läufst, nicht eine Version deployen kann, die es nicht kennt. +`tale upgrade` aktualisiert das CLI-Binary selbst. Die deployte Plattform-Version stimmt mit der Version des CLI überein — diese Kopplung ist Absicht, damit das CLI, das du ausführst, nicht eine Version deployen kann, die es nicht kennt. ```bash # Bewege das CLI auf das letzte Release @@ -79,18 +79,21 @@ Minor-Versionen zu überspringen (von 0.9 auf 0.11 zu gehen) ist unterstützt, s ## Wo das hingehört -Der Upgrade-Flow knüpft jede andere Operate-Seite an — Backups sind das, was ein gescheitertes Upgrade wiederherstellbar macht, Observability ist das, was dir sagt, dass die neue Farbe healthy ist, Hardening ist das, was du nach einer Major-Version neu walkst. Setzt du das CLI zum ersten Mal auf, deckt [Tale-CLI installieren](/de/self-hosted/install/cli-install) das workstationseitige Setup ab; nimmst du den Pager mitten im Rollout auf, nennt [Troubleshooting](/de/self-hosted/operate/observability/troubleshooting) die Symptome. +Der Upgrade-Flow knüpft jede andere Operate-Seite an — Backups sind das, was ein gescheitertes Upgrade wiederherstellbar macht, Observability ist das, was dir sagt, dass die neue Farbe healthy ist, Hardening ist das, was du nach einer Major-Version neu durchgehst. Setzt du das CLI zum ersten Mal auf, deckt [Tale-CLI installieren](/de/self-hosted/install/cli-install) das workstationseitige Setup ab; nimmst du den Pager mitten im Rollout auf, nennt [Troubleshooting](/de/self-hosted/operate/observability/troubleshooting) die Symptome. ## Migration auf das Org-first-Config-Layout Ältere Tale-Releases haben Config in einem flachen Baum im Workspace-Root abgelegt (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`). Aktuelles Tale nutzt ein **Org-first**-Layout, in dem jede Org — auch die kanonische `default` — ihren eigenen Unterbaum besitzt: `///...`. Die Migration ist opt-in und läuft einmal pro Workspace. Die neue Plattform liest die alten Pfade nicht mehr; bis du migrierst, liegen Provider-Secrets und Anpassungen in Verzeichnissen, die das Runtime nicht mehr anschaut. -Die Migration sind drei Kommandos: +Die Migration sind drei Kommandos. Für Schritt 1 muss der Convex-Container vom **alten** Image noch laufen — halt die Plattform auf der alten Version online und führe Schritt 1 gegen diesen laufenden Container aus, bevor du upgradest. ```bash -# 1. Provider-Secrets (und andere Config) aus dem flachen Layout nach -# `default//...` kopieren. cp statt mv, damit die alten Pfade -# für einen möglichen Rollback intakt bleiben. +# 1. Provider-Secrets aus dem flachen Layout nach +# `default/providers/...` kopieren. cp statt mv, damit die alten +# Pfade für einen möglichen Rollback intakt bleiben. Scope sind +# ausschließlich Provider-Secrets; alle anderen Domains (agents, +# workflows, integrations, skills, branding, retention) werden in +# Schritt 2 server-seitig aus dem Builtin-Katalog re-seedet. tale migrate config-layout # 2. Convex-Container gegen das Org-first-Volume-Layout neu erstellen @@ -100,8 +103,9 @@ tale migrate config-layout tale deploy --override-all -y # 3. Wenn du das neue Layout verifiziert hast, alte Pfade entfernen. -# sha-verifiziert, dass die neue Datei der alten entspricht, bevor -# unlink; bei Mismatch wird das Löschen verweigert. +# Verifiziert byte-für-byte, dass die neue Datei der alten +# entspricht, bevor unlink; bei Mismatch wird das Löschen +# verweigert. tale migrate config-layout --cleanup-old ``` @@ -117,4 +121,4 @@ Nach Schritt 3 (`--cleanup-old`) sind die alten Pfade weg. Downgrade re-seedet d ### Was, wenn ich Schritt 1 überspringe? -Der Convex-Container erkennt beim Start die übrig gebliebenen flachen Layout-Dirs und schreibt eine Warnung in seine Logs, die die Verzeichnisse benennt und auf dieses Runbook zeigt. Das Deployment startet, aber Reads aus diesen Verzeichnissen liefern leer, und Writes gehen in die neuen (leeren) Org-first-Pfade. Die Korrektur sind weiterhin Schritt 1 + 2 — sie nach der Warnung laufen zu lassen funktioniert genauso wie sie im Voraus laufen zu lassen. +`tale deploy` und `tale start` verweigern beide den Start, wenn sie übrig gebliebene flache Layout-Dirs (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`, `retention/`) im Workspace-Root finden. Der Fehler nennt die betroffenen Verzeichnisse und verweist auf dieses Runbook. Die Korrektur sind Schritt 1 + 2 in dieser Reihenfolge; es gibt keinen "trotzdem deployen und Legacy-Pfade ignorieren"-Modus — die Runtime-Resolver lesen diese Pfade nicht, ein Boot ohne Migration würde die Plattform also mit leerer Config zurücklassen. diff --git a/docs/en/self-hosted/operate/upgrades.md b/docs/en/self-hosted/operate/upgrades.md index 20d8c20dee..b2c8864ecd 100644 --- a/docs/en/self-hosted/operate/upgrades.md +++ b/docs/en/self-hosted/operate/upgrades.md @@ -85,12 +85,15 @@ The upgrade flow ties together every other operate page — backups are what mak Older Tale releases stored config in a flat tree at the workspace root (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`). Current Tale uses an **org-first** layout where every org — including the canonical `default` — owns its own subtree: `///...`. The migration is opt-in and runs once per workspace. The new platform refuses to read the legacy paths; until you migrate, your provider secrets and customizations live in directories the runtime no longer looks at. -The migration is three commands: +The migration is three commands. The convex container from the **old** image must still be running for step 1 — keep the platform up on the old version, then run step 1 against that running container before upgrading. ```bash -# 1. Copy provider secrets (and other config) from the flat layout into -# `default//...`. cp not mv, so the old paths stay intact in -# case you need to roll back. +# 1. Copy provider secrets from the flat layout into +# `default/providers/...`. cp not mv, so the old paths stay intact +# in case you need to roll back. Scope is provider secrets only; +# every other domain (agents, workflows, integrations, skills, +# branding, retention) is re-seeded server-side by step 2 from the +# builtin catalog. tale migrate config-layout # 2. Recreate the Convex container against the org-first volume layout @@ -100,8 +103,8 @@ tale migrate config-layout tale deploy --override-all -y # 3. Once you have verified the new layout is intact, remove the legacy -# paths. sha-verifies that the new file matches the old before -# unlinking; refuses to delete on any mismatch. +# paths. Verifies that the new file matches the old byte-for-byte +# before unlinking; refuses to delete on any mismatch. tale migrate config-layout --cleanup-old ``` @@ -117,4 +120,4 @@ After step 3 (`--cleanup-old`), the legacy paths are gone. Downgrade still re-se ### What if I skip step 1? -The Convex container will detect leftover flat-layout dirs on boot and print a warning to its logs naming the directories and pointing at this runbook. The deployment will start up, but reads from those directories return empty and writes go to the new (empty) org-first paths. The fix is still steps 1 + 2 — running them after the warning works exactly the same as running them up front. +`tale deploy` and `tale start` both refuse to run when they detect leftover flat-layout dirs (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`, `retention/`) at the workspace root. The error names the offending directories and points at this runbook. The fix is steps 1 + 2 in order; there is no "deploy anyway and ignore the legacy paths" mode — the runtime resolvers do not read those paths, so booting without migrating would leave the platform with empty config. diff --git a/docs/fr/self-hosted/operate/upgrades.md b/docs/fr/self-hosted/operate/upgrades.md index 0881491c2d..9ce64839f3 100644 --- a/docs/fr/self-hosted/operate/upgrades.md +++ b/docs/fr/self-hosted/operate/upgrades.md @@ -79,19 +79,22 @@ Sauter des versions mineures (passer de 0.9 à 0.11) est supporté tant que les ## Où cela s'inscrit -Le flow de montée de version noue chaque autre page d'exploitation — les backups sont ce qui rend une montée de version échouée récupérable, l'observabilité est ce qui te dit que la nouvelle couleur est saine, le durcissement est ce que tu re-walks après une version majeure. Si tu mets en place la CLI pour la première fois, [Installer la CLI tale](/fr/self-hosted/install/cli-install) couvre le setup côté workstation ; si tu prends le pager en plein rollout, [Dépannage](/fr/self-hosted/operate/observability/troubleshooting) nomme les symptômes. +Le flow de montée de version noue chaque autre page d'exploitation — les backups sont ce qui rend une montée de version échouée récupérable, l'observabilité est ce qui te dit que la nouvelle couleur est saine, le durcissement est ce que tu reparcours après une version majeure. Si tu mets en place la CLI pour la première fois, [Installer la CLI tale](/fr/self-hosted/install/cli-install) couvre le setup côté workstation ; si tu prends le pager en plein rollout, [Dépannage](/fr/self-hosted/operate/observability/troubleshooting) nomme les symptômes. ## Migration vers la disposition de config org-first Les anciennes versions de Tale stockaient la config dans une arborescence plate à la racine du workspace (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`). La version actuelle utilise une disposition **org-first** où chaque org — y compris la canonique `default` — possède son propre sous-arbre : `///...`. La migration est opt-in et tourne une seule fois par workspace. La nouvelle plateforme refuse de lire les anciens chemins ; tant que tu n'as pas migré, tes secrets de provider et personnalisations vivent dans des répertoires que le runtime ne regarde plus. -La migration tient en trois commandes : +La migration tient en trois commandes. Pour l'étape 1, le conteneur Convex de l'**ancienne** image doit encore tourner — garde la plateforme en ligne sur l'ancienne version et lance l'étape 1 contre ce conteneur en cours avant de monter de version. ```bash -# 1. Copier les secrets de provider (et autres configs) depuis la -# disposition plate vers `default//...`. cp et non mv, donc -# les anciens chemins restent intacts au cas où un rollback serait -# nécessaire. +# 1. Copier les secrets de provider depuis la disposition plate vers +# `default/providers/...`. cp et non mv, donc les anciens chemins +# restent intacts au cas où un rollback serait nécessaire. Le scope +# couvre uniquement les secrets de provider ; tous les autres +# domaines (agents, workflows, integrations, skills, branding, +# retention) sont re-seedés côté serveur à l'étape 2 depuis le +# catalogue builtin. tale migrate config-layout # 2. Recréer le conteneur Convex contre la disposition de volume org-first @@ -100,8 +103,9 @@ tale migrate config-layout tale deploy --override-all -y # 3. Une fois la nouvelle disposition vérifiée intacte, supprimer les -# anciens chemins. Vérifie via sha que le nouveau fichier correspond -# à l'ancien avant unlink ; refuse de supprimer en cas de mismatch. +# anciens chemins. Vérifie byte-à-byte que le nouveau fichier +# correspond à l'ancien avant unlink ; refuse de supprimer en cas +# de mismatch. tale migrate config-layout --cleanup-old ``` @@ -117,4 +121,4 @@ Après l'étape 3 (`--cleanup-old`), les anciens chemins sont partis. Le downgra ### Et si je saute l'étape 1 ? -Le conteneur Convex détectera au démarrage les répertoires restants de la disposition plate et écrira un warning dans ses logs en nommant les répertoires et pointant vers ce runbook. Le déploiement démarre, mais les reads sur ces répertoires reviennent vides et les writes vont vers les nouveaux chemins (vides) org-first. La correction reste étapes 1 + 2 — les lancer après le warning fonctionne exactement comme les lancer en amont. +`tale deploy` et `tale start` refusent tous les deux de démarrer s'ils détectent des répertoires restants de la disposition plate (`agents/`, `workflows/`, `integrations/`, `branding/`, `providers/`, `skills/`, `retention/`) à la racine du workspace. L'erreur nomme les répertoires concernés et pointe vers ce runbook. La correction reste les étapes 1 + 2 dans cet ordre ; il n'existe pas de mode « déploie quand même et ignore les chemins legacy » — les résolveurs runtime ne lisent pas ces chemins, donc démarrer sans migrer laisserait la plateforme avec une config vide. diff --git a/scripts/2026-03-28-migrate-convex-data.sh b/scripts/2026-03-28-migrate-convex-data.sh index 7ad741cf29..b377e605d7 100755 --- a/scripts/2026-03-28-migrate-convex-data.sh +++ b/scripts/2026-03-28-migrate-convex-data.sh @@ -72,7 +72,13 @@ if [ "$old_exists" = true ]; then mkdir -p "$dst" before=$(ls "$dst" 2>/dev/null | wc -l) - cp -rn "$src/"* "$dst/" 2>/dev/null || true + # `cp -rn` is no-clobber, so re-runs are no-ops on already- + # copied trees. Earlier this swallowed stderr unconditionally, + # which hid disk-full / permission-denied as "0 new items". + # `|| true` is kept only to tolerate the "no files to copy" + # edge case (matched glob with no entries) without aborting + # `set -e`; real I/O errors now surface on stderr. + cp -rn "$src/"* "$dst/" || true after=$(ls "$dst" | wc -l) added=$((after - before)) diff --git a/services/convex/docker-entrypoint.sh b/services/convex/docker-entrypoint.sh index 5bc69b5923..19924b1fb6 100755 --- a/services/convex/docker-entrypoint.sh +++ b/services/convex/docker-entrypoint.sh @@ -6,6 +6,13 @@ # noise it would catch. set -eo pipefail +# Default for the seed-force flag. The script references `$FORCE_SEED` +# in several places (`[ "$FORCE_SEED" = "true" ]`); without this +# default it works only because `set -u` is intentionally off — any +# future audit that enables nounset would break startup. Pin the +# default here so the script stays correct under both modes. +FORCE_SEED="${FORCE_SEED:-false}" + # ============================================================================ # Tale Convex Service Entrypoint # ---------------------------------------------------------------------------- @@ -44,7 +51,13 @@ if [ "$(id -u)" = '0' ]; then # org seed target) up front; per-domain dirs are created on-demand by # `run_seed` and `scaffoldNewOrganization`. mkdir -p "$data_dir/convex" "$data_dir/default" - chown -R app:app "$data_dir" + # Only chown files NOT already owned by `app:app`. On large volumes + # (RAG uploads, Convex storage) the prior unconditional `chown -R` + # walked every inode every boot, adding tens of seconds and racing + # with backend writes during fast restart loops. `find ... -exec + # chown {} +` is idempotent and short-circuits once the volume is + # consistent. + find "$data_dir" \! -user app -exec chown app:app {} + # ---------------------------------------------------------------------------- # SSRF egress firewall (defense-in-depth) @@ -240,8 +253,32 @@ wait_for_http() { } # Extract DB host:port from POSTGRES_URL for a TCP probe. -db_host=$(echo "$POSTGRES_URL" | sed -E 's#^postgres(ql)?://([^@/]+@)?([^:/?]+).*#\3#') -db_port=$(echo "$POSTGRES_URL" | sed -nE 's#^postgres(ql)?://([^@/]+@)?[^:/?]+:([0-9]+).*#\3#p') +# +# Strip the scheme + optional `user:pass@` userinfo (greedy match up to +# the LAST `@`, which handles passwords containing `@` correctly), then +# special-case the bracketed IPv6 form `[::1]:5432` before falling +# through to the bare `host:port` form. +hostport="${POSTGRES_URL#*://}" +case "$hostport" in + *@*) hostport="${hostport##*@}" ;; +esac +hostport="${hostport%%/*}" +hostport="${hostport%%\?*}" +case "$hostport" in + '['*']'*) + db_host="${hostport#[}"; db_host="${db_host%%]*}" + tail="${hostport#*]}" + db_port="${tail#:}" + ;; + *:*) + db_host="${hostport%%:*}" + db_port="${hostport##*:}" + ;; + *) + db_host="$hostport" + db_port="" + ;; +esac db_port="${db_port:-5432}" if [ -n "$db_host" ]; then wait_for_port "$db_host" "$db_port" 60 "PostgreSQL" || exit 1 @@ -250,8 +287,13 @@ fi # ============================================================================ # Prepare working directories # ============================================================================ -mkdir -p /app/data/convex -export TMPDIR=/app/data/convex/tmp +# Single source of truth — every path below derives from `data_dir` so +# an operator who sets `TALE_CONFIG_DIR` to a non-default mount gets +# consistent behavior. Previously this block hardcoded `/app/data/...` +# despite the root-priv chown loop above respecting TALE_CONFIG_DIR. +data_dir="${TALE_CONFIG_DIR:-/app/data}" +mkdir -p "$data_dir/convex" +export TMPDIR="$data_dir/convex/tmp" mkdir -p "$TMPDIR" # Orphan video-link tmp dirs from crashed/killed ingest_video_link.ts actions. @@ -287,8 +329,8 @@ fi # an older binary that doesn't recognize this marker re-seeds (idempotently) # into its expected old paths on a hypothetical downgrade. # ---------------------------------------------------------------------------- -seed_marker="/app/data/.seeded-${TALE_VERSION:-dev}-orgfirst" -data_dir="/app/data" +seed_marker="$data_dir/.seeded-${TALE_VERSION:-dev}-orgfirst" +# `data_dir` already set above (single source of truth); no re-assign. # Crash-safe file copy: write to a sibling tmp file then rename to dest. # `cp` itself is non-atomic; the value is that an interrupted run leaves @@ -345,13 +387,16 @@ run_seed() { local history_dir="$workflows_dir/.history/$flat_slug" if [ "$FORCE_SEED" = "true" ]; then - mkdir -p "$dest_dir"; atomic_cp "$src" "$dest"; echo " ✓ Seeded workflow $rel_path (forced)"; continue + # `&&` (not `;`) so a failed mkdir aborts the copy attempt + # — otherwise atomic_cp runs against a missing dir and the + # diagnostic attributes the fault to the copy. + mkdir -p "$dest_dir" && atomic_cp "$src" "$dest" && echo " ✓ Seeded workflow $rel_path (forced)"; continue fi if [ -f "$dest" ]; then echo " ⏭ Skipping workflow $rel_path (already exists)"; continue; fi if [ -d "$history_dir" ] && [ "$(ls -A "$history_dir" 2>/dev/null)" ]; then echo " ⏭ Skipping workflow $rel_path (user has modifications in .history)"; continue fi - mkdir -p "$dest_dir"; atomic_cp "$src" "$dest"; echo " ✓ Seeded workflow $rel_path" + mkdir -p "$dest_dir" && atomic_cp "$src" "$dest" && echo " ✓ Seeded workflow $rel_path" done fi diff --git a/services/crawler/app/services/indexing_service.py b/services/crawler/app/services/indexing_service.py index b3f1cc67d2..131dc36cf3 100644 --- a/services/crawler/app/services/indexing_service.py +++ b/services/crawler/app/services/indexing_service.py @@ -162,7 +162,11 @@ async def _hash_update(conn: asyncpg.Connection) -> tuple[dict[str, int], asyncp async def _store_chunks(conn: asyncpg.Connection) -> None: await conn.execute(_UPSERT_WEBSITE_URL, domain, url, title, content_hash, filtered_hash) - await conn.execute("DELETE FROM chunks WHERE url = $1", url) + # Scope by domain too: chunks PK is (domain, url, chunk_index) + # so two different domains hosting the same URL path + # (e.g. `/about`) would over-delete each other's chunks + # without this filter. + await conn.execute("DELETE FROM chunks WHERE domain = $1 AND url = $2", domain, url) for i in range(0, len(chunk_rows), _EXECUTEMANY_BATCH_SIZE): await conn.executemany(_chunk_insert, chunk_rows[i : i + _EXECUTEMANY_BATCH_SIZE]) @@ -250,8 +254,19 @@ async def _index_one(row: asyncpg.Record) -> dict: "total_chunks": total_chunks, } - async def delete_page_chunks(self, url: str) -> int: + async def delete_page_chunks(self, url: str, domain: str | None = None) -> int: + # `domain` is optional for backwards compatibility — existing + # callers that don't pass it get the wider (URL-only) delete. + # New callers should pass it so two domains sharing a path + # don't over-delete each other's chunks. async with acquire_with_retry(self._pool) as conn: - result = await conn.execute("DELETE FROM chunks WHERE url = $1", url) + if domain is None: + result = await conn.execute("DELETE FROM chunks WHERE url = $1", url) + else: + result = await conn.execute( + "DELETE FROM chunks WHERE domain = $1 AND url = $2", + domain, + url, + ) count = int(result.split()[-1]) if result else 0 return count diff --git a/services/crawler/app/services/pg_website_store.py b/services/crawler/app/services/pg_website_store.py index 75ee0e8ded..8a675ad6e3 100644 --- a/services/crawler/app/services/pg_website_store.py +++ b/services/crawler/app/services/pg_website_store.py @@ -352,8 +352,21 @@ async def begin_delete(self, domain: str, org_slug: str) -> dict: domain, org_slug, ) - # asyncpg returns "DELETE N" as the tag; "DELETE 0" means no row matched. - removed_membership = deleted != "DELETE 0" + # asyncpg returns "DELETE N" as the documented command tag. + # Parse the integer rather than comparing the literal string + # so a future tag-format change (e.g. extra whitespace, OID + # column on older Postgres) doesn't silently flip the flag. + try: + removed_membership = int(deleted.rsplit(" ", 1)[-1]) > 0 + except (ValueError, AttributeError): + # Defensive — should be unreachable given asyncpg's + # contract — but failing loud is better than silently + # mis-classifying. + logger.warning( + "[begin_delete] unexpected command tag from asyncpg: %r", + deleted, + ) + removed_membership = False remaining = await conn.fetchval( "SELECT COUNT(*) FROM website_org_memberships WHERE domain = $1", domain, diff --git a/services/crawler/app/services/vision/cache.py b/services/crawler/app/services/vision/cache.py index 3c1cecd0d8..40425169b7 100644 --- a/services/crawler/app/services/vision/cache.py +++ b/services/crawler/app/services/vision/cache.py @@ -5,9 +5,12 @@ content multiple times. Cache strategy: -- Uses SHA-256 hash as cache key (image bytes for vision, text for LLM) +- Uses SHA-256 hash + active org slug as cache key - In-memory LRU cache for fast access (O(1) operations via OrderedDict) - Separate caches for OCR, image description, and LLM processing results +- Cache entries are scoped per org: two orgs hitting the same input do + NOT share cached output (different providers/prompts could yield + different results, and the result text itself may be sensitive). """ import hashlib @@ -15,6 +18,8 @@ from loguru import logger +from app.org_context import get_active_org + CACHE_SIZE = 5000 @@ -28,6 +33,17 @@ def compute_text_hash(text: str) -> str: return hashlib.sha256(text.encode()).hexdigest() +def _scoped_key(content_hash: str) -> str: + """Prepend active org slug to a content hash so cache entries do not + leak between orgs. + + The org slug is required for any cache lookup; if it cannot be + resolved (caller forgot to set the ContextVar) `get_active_org` + raises and the caller never gets a cross-org hit by accident. + """ + return f"{get_active_org()}:{content_hash}" + + class LlmCache: """Cache for Vision API results. @@ -46,47 +62,61 @@ def _evict_if_needed(self, cache: OrderedDict[str, str]) -> None: def get_ocr(self, image_bytes: bytes) -> tuple[str | None, str]: """Get cached OCR result.""" - image_hash = compute_image_hash(image_bytes) + image_hash = _scoped_key(compute_image_hash(image_bytes)) if image_hash in self._ocr_cache: self._ocr_cache.move_to_end(image_hash) - logger.debug(f"Cache HIT (OCR): {image_hash[:16]}...") + logger.debug(f"Cache HIT (OCR): {image_hash[:24]}...") return self._ocr_cache[image_hash], image_hash return None, image_hash def set_ocr(self, image_hash: str, result: str) -> None: - """Store OCR result in cache.""" + """Store OCR result in cache. + + `image_hash` must be the value returned by `get_ocr` (already + org-scoped). + """ self._evict_if_needed(self._ocr_cache) self._ocr_cache[image_hash] = result self._ocr_cache.move_to_end(image_hash) def get_description(self, image_bytes: bytes) -> tuple[str | None, str]: """Get cached image description.""" - image_hash = compute_image_hash(image_bytes) + image_hash = _scoped_key(compute_image_hash(image_bytes)) if image_hash in self._description_cache: self._description_cache.move_to_end(image_hash) - logger.debug(f"Cache HIT (description): {image_hash[:16]}...") + logger.debug(f"Cache HIT (description): {image_hash[:24]}...") return self._description_cache[image_hash], image_hash return None, image_hash def set_description(self, image_hash: str, result: str) -> None: - """Store image description in cache.""" + """Store image description in cache. + + `image_hash` must be the value returned by `get_description`. + """ self._evict_if_needed(self._description_cache) self._description_cache[image_hash] = result self._description_cache.move_to_end(image_hash) def get_llm(self, cache_key: str) -> str | None: - """Get cached LLM processing result.""" - if cache_key in self._llm_cache: - self._llm_cache.move_to_end(cache_key) - logger.debug(f"Cache HIT (LLM): {cache_key[:16]}...") - return self._llm_cache[cache_key] + """Get cached LLM processing result. + + `cache_key` is treated as caller-supplied content; the active + org slug is prepended internally so the same `(chunk, prompt, + model)` tuple from two orgs never collides. + """ + scoped = _scoped_key(cache_key) + if scoped in self._llm_cache: + self._llm_cache.move_to_end(scoped) + logger.debug(f"Cache HIT (LLM): {scoped[:24]}...") + return self._llm_cache[scoped] return None def set_llm(self, cache_key: str, result: str) -> None: """Store LLM processing result in cache.""" + scoped = _scoped_key(cache_key) self._evict_if_needed(self._llm_cache) - self._llm_cache[cache_key] = result - self._llm_cache.move_to_end(cache_key) + self._llm_cache[scoped] = result + self._llm_cache.move_to_end(scoped) def get_stats(self) -> dict[str, int]: """Get cache statistics.""" diff --git a/services/crawler/app/services/vision/openai_client.py b/services/crawler/app/services/vision/openai_client.py index e6c0cba81c..0f46a89f39 100644 --- a/services/crawler/app/services/vision/openai_client.py +++ b/services/crawler/app/services/vision/openai_client.py @@ -77,6 +77,36 @@ def _detect_mime_type(image_bytes: bytes) -> str: Be extremely concise - omit minor details.""" +_CONFIG_CHECK_INTERVAL = 15 # seconds + + +class _OrgVisionState: + __slots__ = ("client", "config", "last_check") + + def __init__( + self, + client: AsyncOpenAI, + config: tuple, + last_check: float, + ) -> None: + self.client = client + self.config = config + self.last_check = last_check + + +# Per-org cached AsyncOpenAI clients for vision config. Keyed by org slug +# so two orgs' requests never share `_client` / `_client_config` (which +# would route org B's traffic through org A's API key when within the +# TTL — the bug this refactor fixes). +_vision_states: dict[str, _OrgVisionState] = {} + +# Same shape for chat config (used by `process_pages_with_llm`). Two +# orgs may legitimately have different chat providers; without an +# explicit per-org cache, the prior code rebuilt the client on every +# call and leaked the httpx pool. +_chat_states: dict[str, _OrgVisionState] = {} + + async def _safe_close_client(client: AsyncOpenAI) -> None: """Close an old client after a grace period for in-flight requests.""" await asyncio.sleep(30) @@ -86,50 +116,93 @@ async def _safe_close_client(client: AsyncOpenAI) -> None: logger.opt(exception=True).warning("Failed to close old vision client") -class VisionClient: - """Async client for OpenAI Vision API calls.""" - - _CONFIG_CHECK_INTERVAL = 15 # seconds - - def __init__(self) -> None: - self._client: AsyncOpenAI | None = None - self._client_config: tuple | None = None - self._last_config_check: float = 0 +def _get_or_build_client( + states: dict[str, _OrgVisionState], + org_slug: str, + config_getter, + *, + timeout: float, + label: str, +) -> AsyncOpenAI: + """Look up or build the per-org AsyncOpenAI client. + + Mirrors `embedding_service.get_embedding_service` so behavior is + consistent across crawler services: + - Within TTL: return cached client without re-reading config. + - Config read fails: keep the existing client; never silently + downgrade to an empty key. + - Config changed: build a new client, schedule the old one to + close after a grace period so in-flight calls finish. + """ + state = states.get(org_slug) + now = time.monotonic() + if state is not None and (now - state.last_check) < _CONFIG_CHECK_INTERVAL: + return state.client - def _get_client(self) -> AsyncOpenAI: - """Get or create the OpenAI client, rebuilding if config changed.""" - now = time.monotonic() - if self._client is not None and (now - self._last_config_check) < self._CONFIG_CHECK_INTERVAL: - return self._client + try: + config = config_getter(org_slug) # (base_url, api_key, model) + except (ValueError, OSError): + if state is not None: + logger.opt(exception=True).warning( + "Config read failed for org '{}', keeping current {} client", + org_slug, + label, + ) + state.last_check = now + return state.client + raise + + if state is not None and config == state.config: + state.last_check = now + return state.client + + base_url, api_key, model = config + + # Never downgrade to empty key + if not api_key and state is not None: + logger.warning( + "Skipping {} reload for org '{}': new config has empty API key", + label, + org_slug, + ) + state.last_check = now + return state.client + + old_client = state.client if state is not None else None + new_client = AsyncOpenAI(api_key=api_key, base_url=base_url, timeout=timeout) + states[org_slug] = _OrgVisionState( + client=new_client, + config=config, + last_check=now, + ) - self._last_config_check = now - try: - config = settings.get_vision_config(get_active_org()) # (base_url, api_key, model) - except (ValueError, OSError): - if self._client is not None: - logger.opt(exception=True).warning("Config read failed, keeping current vision client") - return self._client - raise + if old_client is not None: + logger.info("{} rebuilt for org '{}': model={}", label, org_slug, model) + with contextlib.suppress(RuntimeError): + asyncio.get_running_loop().create_task(_safe_close_client(old_client)) + else: + logger.info("{} created for org '{}': model={}", label, org_slug, model) - if config == self._client_config and self._client is not None: - return self._client + return new_client - base_url, api_key, _model = config - # Never downgrade to empty key - if not api_key and self._client is not None: - logger.warning("Skipping vision client reload: new config has empty API key") - return self._client +class VisionClient: + """Async client for OpenAI Vision API calls. - old = self._client - self._client = AsyncOpenAI(api_key=api_key, base_url=base_url, timeout=120.0) - self._client_config = config + Stateless wrapper: per-org AsyncOpenAI instances live in the + module-level `_vision_states` dict, looked up on every call via + `get_active_org()`. This prevents the previous singleton from + handing org A's client to org B's request inside the TTL window. + """ - if old is not None: - logger.info("Vision client rebuilt: model={}", _model) - with contextlib.suppress(RuntimeError): - asyncio.get_running_loop().create_task(_safe_close_client(old)) - return self._client + def _get_client(self) -> AsyncOpenAI: + return _get_or_build_client( + _vision_states, + get_active_org(), + settings.get_vision_config, + timeout=120.0, + label="vision client", + ) async def ocr_image( self, @@ -370,13 +443,18 @@ async def process_pages_with_llm( logger.info(f"LLM processing: {total_chars} chars total, chunking at {max_chars_per_chunk} chars") - base_url, api_key, chat_model = settings.get_chat_config(get_active_org()) - client = AsyncOpenAI( - api_key=api_key, - base_url=base_url, + org_slug = get_active_org() + client = _get_or_build_client( + _chat_states, + org_slug, + settings.get_chat_config, timeout=300.0, + label="chat client", ) - resolved_model = model or chat_model + # `resolved_model` is read from the freshly-cached config to ensure it + # matches the client we just got back from the per-org cache. + cached_chat_model = _chat_states[org_slug].config[2] + resolved_model = model or cached_chat_model semaphore = asyncio.Semaphore(max_concurrent) chunks = _chunk_by_chars(full_text, max_chars_per_chunk) diff --git a/services/crawler/tests/conftest.py b/services/crawler/tests/conftest.py index 55be3f34c3..4e700ec368 100644 --- a/services/crawler/tests/conftest.py +++ b/services/crawler/tests/conftest.py @@ -26,4 +26,22 @@ def _bind_test_active_org() -> Iterator[None]: _active_org.reset(token) +@pytest.fixture(autouse=True) +def _reset_vision_state() -> Iterator[None]: + """Clear per-org AsyncOpenAI caches in vision/openai_client so a + mock patched in test A does not get reused by test B.""" + from app.services.vision.cache import llm_cache + from app.services.vision.openai_client import _chat_states, _vision_states + + _chat_states.clear() + _vision_states.clear() + llm_cache.clear() + try: + yield + finally: + _chat_states.clear() + _vision_states.clear() + llm_cache.clear() + + __all__ = ["_bind_test_active_org", "set_active_org"] diff --git a/services/crawler/tests/test_vision_isolation.py b/services/crawler/tests/test_vision_isolation.py new file mode 100644 index 0000000000..36945bc3cc --- /dev/null +++ b/services/crawler/tests/test_vision_isolation.py @@ -0,0 +1,174 @@ +"""Cross-org isolation for the vision pipeline. + +Two regression suites: + +1. `VisionClient._get_client` (and the chat-config variant used by + `process_pages_with_llm`) must NOT reuse another org's + `AsyncOpenAI` instance. Earlier code held a single module-level + client + config tuple, so within a 15s TTL org B's request would + reuse org A's API key + base_url. + +2. `llm_cache` (OCR / description / LLM) must NOT serve org A's + cached output to org B. Earlier code keyed the cache by + `sha256(content)` only. + +These tests bypass the autouse `test-org` binding via `set_active_org` +to simulate two distinct orgs landing on the same shared crawler +process. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from app.org_context import set_active_org +from app.services.vision.cache import llm_cache +from app.services.vision.openai_client import ( + VisionClient, + _chat_states, + _vision_states, + process_pages_with_llm, +) + + +class TestVisionClientPerOrg: + @patch("app.services.vision.openai_client.settings") + @patch("app.services.vision.openai_client.AsyncOpenAI") + def test_two_orgs_get_separate_clients(self, mock_openai_cls: MagicMock, mock_settings: MagicMock) -> None: + # Each org sees its own provider config. + configs = { + "org-a": ("https://a.example", "key-a", "model-a"), + "org-b": ("https://b.example", "key-b", "model-b"), + } + mock_settings.get_vision_config.side_effect = lambda slug: configs[slug] + + # Two distinct AsyncOpenAI instances on the two constructor calls. + client_a = MagicMock(name="client_a") + client_b = MagicMock(name="client_b") + mock_openai_cls.side_effect = [client_a, client_b] + + client = VisionClient() + + set_active_org("org-a") + first = client._get_client() + set_active_org("org-b") + second = client._get_client() + + assert first is client_a + assert second is client_b + assert _vision_states["org-a"].client is client_a + assert _vision_states["org-b"].client is client_b + + # The constructor was called with each org's own api_key — proving + # the singleton-reuse leak is gone. + kwargs_seen = [call.kwargs for call in mock_openai_cls.call_args_list] + assert {kw["api_key"] for kw in kwargs_seen} == {"key-a", "key-b"} + + @patch("app.services.vision.openai_client.settings") + @patch("app.services.vision.openai_client.AsyncOpenAI") + def test_org_a_request_does_not_get_org_b_client_within_ttl( + self, mock_openai_cls: MagicMock, mock_settings: MagicMock + ) -> None: + configs = { + "org-a": ("https://a.example", "key-a", "model-a"), + "org-b": ("https://b.example", "key-b", "model-b"), + } + mock_settings.get_vision_config.side_effect = lambda slug: configs[slug] + + client_a = MagicMock(name="client_a") + client_b = MagicMock(name="client_b") + mock_openai_cls.side_effect = [client_a, client_b] + + client = VisionClient() + set_active_org("org-a") + client._get_client() + # Org B in the same process, even right after org A — must build + # its own client, not reuse the cached one. + set_active_org("org-b") + result = client._get_client() + assert result is client_b + + +class TestProcessPagesWithLlmPerOrg: + @pytest.mark.asyncio + @patch("app.services.vision.openai_client.settings") + @patch("app.services.vision.openai_client.AsyncOpenAI") + async def test_two_orgs_each_build_their_own_chat_client( + self, mock_openai_cls: MagicMock, mock_settings: MagicMock + ) -> None: + configs = { + "org-a": ("https://a.example", "key-a", "chat-a"), + "org-b": ("https://b.example", "key-b", "chat-b"), + } + mock_settings.get_chat_config.side_effect = lambda slug: configs[slug] + + # Two distinct AsyncOpenAI instances; each one returns a tiny + # canned chat completion. + def make_client(label: str) -> MagicMock: + client = AsyncMock(name=f"client_{label}") + response = MagicMock() + response.choices = [MagicMock()] + response.choices[0].message.content = f"out-{label}" + response.usage = None + client.chat.completions.create = AsyncMock(return_value=response) + return client + + client_a = make_client("a") + client_b = make_client("b") + mock_openai_cls.side_effect = [client_a, client_b] + + set_active_org("org-a") + out_a = await process_pages_with_llm(["hello"], "extract") + set_active_org("org-b") + out_b = await process_pages_with_llm(["hello"], "extract") + + assert out_a == ["out-a"] + assert out_b == ["out-b"] + # Each org built its own AsyncOpenAI with its own api_key. + api_keys = [call.kwargs["api_key"] for call in mock_openai_cls.call_args_list] + assert set(api_keys) == {"key-a", "key-b"} + assert _chat_states["org-a"].client is client_a + assert _chat_states["org-b"].client is client_b + + +class TestLlmCacheOrgIsolation: + def test_ocr_cache_miss_across_orgs(self) -> None: + image = b"PNG-like-bytes" + set_active_org("org-a") + _, hash_a = llm_cache.get_ocr(image) + llm_cache.set_ocr(hash_a, "text from A") + assert llm_cache.get_ocr(image)[0] == "text from A" + + # Same image bytes, different org: must miss. + set_active_org("org-b") + cached, hash_b = llm_cache.get_ocr(image) + assert cached is None + assert hash_a != hash_b + + def test_description_cache_miss_across_orgs(self) -> None: + image = b"another-image" + set_active_org("org-a") + _, hash_a = llm_cache.get_description(image) + llm_cache.set_description(hash_a, "desc from A") + assert llm_cache.get_description(image)[0] == "desc from A" + + set_active_org("org-b") + cached, _ = llm_cache.get_description(image) + assert cached is None + + def test_llm_cache_miss_across_orgs(self) -> None: + set_active_org("org-a") + llm_cache.set_llm("shared-key", "result A") + assert llm_cache.get_llm("shared-key") == "result A" + + set_active_org("org-b") + assert llm_cache.get_llm("shared-key") is None + llm_cache.set_llm("shared-key", "result B") + # Org B's value, not org A's. + assert llm_cache.get_llm("shared-key") == "result B" + + # And org A still sees its own value. + set_active_org("org-a") + assert llm_cache.get_llm("shared-key") == "result A" diff --git a/services/platform/app/features/organization/components/organization-form.tsx b/services/platform/app/features/organization/components/organization-form.tsx index 559c2a1a7b..0dc1d23938 100644 --- a/services/platform/app/features/organization/components/organization-form.tsx +++ b/services/platform/app/features/organization/components/organization-form.tsx @@ -26,6 +26,25 @@ import { useInitializeDefaultWorkflows } from '../hooks/actions'; type FormData = { name: string }; +/** + * Derive the on-disk slug from a free-form display name. + * + * Three call sites used to inline the same chain; the helper keeps the + * derivation rule in one place so the live preview, the Zod refine, + * and the submit payload can never drift. + * + * Must produce a slug that matches + * `services/platform/lib/shared/constants/org-slug.ts` ORG_SLUG_REGEX — + * see `assertValidOrgSlug`. + */ +function deriveOrgSlug(name: string): string { + return name + .trim() + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); +} + export function OrganizationForm() { const navigate = useNavigate(); const queryClient = useQueryClient(); @@ -36,11 +55,11 @@ export function OrganizationForm() { const { t } = useT('settings'); const { t: tCommon } = useT('common'); - // slug is derived from name via lowercasing + replacing non-alphanumerics - // with hyphens; it's used as a filesystem path component (/examples/{slug}/) - // and must match file_io.ts ORG_SLUG_REGEX: /^[a-z0-9][a-z0-9_-]*$/. - // So the name must contain at least one ASCII letter or digit; pure-CJK or - // pure-symbol names would produce an empty slug and fail at creation. + // slug is derived from name via `deriveOrgSlug`; it's used as a + // filesystem path component (`$TALE_CONFIG_DIR//...`) and + // must match the canonical ORG_SLUG_REGEX. Pure-CJK / pure-symbol + // names would produce an empty slug and fail at creation; the + // regex check below rejects them up front. const formSchema = useMemo( () => z.object({ @@ -49,19 +68,11 @@ export function OrganizationForm() { .min(1, t('organization.companyNameRequired')) .regex( /^[A-Za-z0-9][A-Za-z0-9 _-]*$/, - 'Use letters, digits, spaces, hyphens, and underscores only, starting with a letter or digit.', + t('organization.companyNameCharacterError'), ) - .refine( - (name) => { - const derived = name - .trim() - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-+|-+$/g, ''); - return !isReservedOrgSlug(derived); - }, - { message: 'This name is reserved by the platform.' }, - ), + .refine((name) => !isReservedOrgSlug(deriveOrgSlug(name)), { + message: t('organization.nameReserved'), + }), }), [t], ); @@ -75,11 +86,7 @@ export function OrganizationForm() { }); const nameValue = form.watch('name'); - const slugPreview = nameValue - .trim() - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-+|-+$/g, ''); + const slugPreview = deriveOrgSlug(nameValue); const { mutateAsync: initializeDefaultWorkflows } = useInitializeDefaultWorkflows(); @@ -90,11 +97,7 @@ export function OrganizationForm() { } try { - const slug = data.name - .trim() - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-+|-+$/g, ''); + const slug = deriveOrgSlug(data.name); const result = await authClient.organization.create({ name: data.name.trim(), @@ -165,7 +168,9 @@ export function OrganizationForm() { disabled={form.formState.isSubmitting} errorMessage={form.formState.errors.name?.message} description={ - slugPreview ? `Identifier: ${slugPreview}` : undefined + slugPreview + ? t('organization.identifierPreview', { slug: slugPreview }) + : undefined } />