diff --git a/.changeset/bundle-skills-single-pass.md b/.changeset/bundle-skills-single-pass.md
new file mode 100644
index 00000000000..30b2c428b22
--- /dev/null
+++ b/.changeset/bundle-skills-single-pass.md
@@ -0,0 +1,5 @@
+---
+"trigger.dev": patch
+---
+
+Fix `chat.agent` skills silently missing in `trigger dev` for projects whose task files read `process.env` at module top level (e.g. a third-party SDK client initialized at import). Skill folders now bundle into `.trigger/skills/` reliably regardless of which env vars are set when the CLI launches.
diff --git a/.changeset/chat-start-session-action-typed-client-data.md b/.changeset/chat-start-session-action-typed-client-data.md
new file mode 100644
index 00000000000..acd75037caf
--- /dev/null
+++ b/.changeset/chat-start-session-action-typed-client-data.md
@@ -0,0 +1,22 @@
+---
+"@trigger.dev/sdk": patch
+---
+
+Type `chat.createStartSessionAction` against your chat agent so `clientData` is typed end-to-end on the first turn:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "@/trigger/chat";
+
+export const startChatSession = chat.createStartSessionAction<typeof myChat>("my-chat");
+
+// In the browser, threaded from the transport's typed startSession callback:
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  startSession: ({ chatId, clientData }) =>
+    startChatSession({ chatId, clientData }),
+  // ...
+});
+```
+
+`ChatStartSessionParams` gains a typed `clientData` field — folded into the first run's `payload.metadata` so `onPreload` / `onChatStart` see the same shape per-turn `metadata` carries via the transport. The opaque session-level `metadata` field is unchanged.
diff --git a/.changeset/mollifier-redis-worker-primitives.md b/.changeset/mollifier-redis-worker-primitives.md
new file mode 100644
index 00000000000..a209e530c24
--- /dev/null
+++ b/.changeset/mollifier-redis-worker-primitives.md
@@ -0,0 +1,9 @@
+---
+"@trigger.dev/redis-worker": patch
+---
+
+Add MollifierBuffer and MollifierDrainer primitives for trigger burst smoothing.
+
+MollifierBuffer (`accept`, `pop`, `ack`, `requeue`, `fail`, `evaluateTrip`) is a per-env FIFO over Redis with atomic Lua transitions for status tracking. `evaluateTrip` is a sliding-window trip evaluator the webapp gate uses to detect per-env trigger bursts.
+
+MollifierDrainer pops entries through a polling loop with a user-supplied handler. The loop survives transient Redis errors via capped exponential backoff (up to 5s), and per-env pop failures don't poison the rest of the batch — one env's blip is logged and counted as failed for that tick. Rotation is two-level: orgs at the top, envs within each org. The buffer maintains `mollifier:orgs` and `mollifier:org-envs:${orgId}` atomically with per-env queues, so the drainer walks orgs → envs directly without an in-memory cache. The `maxOrgsPerTick` option (default 500) caps how many orgs are scheduled per tick; for each picked org, one env is popped (rotating round-robin within the org). An org with N envs gets the same per-tick scheduling slot as an org with 1 env, so tenant-level drainage throughput is determined by org count rather than env count.
diff --git a/.changeset/pre.json b/.changeset/pre.json
index a5d1b75f8c7..e4a34aff561 100644
--- a/.changeset/pre.json
+++ b/.changeset/pre.json
@@ -18,5 +18,27 @@
     "@trigger.dev/schema-to-json": "4.4.6",
     "@trigger.dev/sdk": "4.4.6"
   },
-  "changesets": []
+  "changesets": [
+    "agent-skills",
+    "ai-prompts",
+    "ai-tool-helpers",
+    "bundle-skills-single-pass",
+    "cap-idempotency-key-length",
+    "chat-agent-on-boot-hook",
+    "chat-agent",
+    "chat-history-read-primitives",
+    "chat-session-attributes",
+    "chat-start-session-action-typed-client-data",
+    "cli-deploy-skip-rewrite-timestamp",
+    "locals-key-dual-package-fix",
+    "mcp-agent-chat-sessions",
+    "mcp-list-runs-region",
+    "mock-chat-agent-test-harness",
+    "mollifier-redis-worker-primitives",
+    "plugin-auth-path",
+    "resource-catalog-runtime-registration",
+    "retry-sigsegv",
+    "runs-list-region-filter",
+    "sessions-primitive"
+  ]
 }
diff --git a/.changeset/resource-catalog-runtime-registration.md b/.changeset/resource-catalog-runtime-registration.md
new file mode 100644
index 00000000000..5046f09e1f1
--- /dev/null
+++ b/.changeset/resource-catalog-runtime-registration.md
@@ -0,0 +1,6 @@
+---
+"@trigger.dev/core": patch
+"trigger.dev": patch
+---
+
+Fix `COULD_NOT_FIND_EXECUTOR` when a task's definition is loaded via `await import(...)` from inside another task's `run()`. The runtime workers now register such tasks with a sentinel file context, and the catalog logs a one-time warning per task id.
diff --git a/.github/workflows/changesets-pr.yml b/.github/workflows/changesets-pr.yml
index 01c303a95ca..66b7f24c656 100644
--- a/.github/workflows/changesets-pr.yml
+++ b/.github/workflows/changesets-pr.yml
@@ -22,6 +22,7 @@ jobs:
     permissions:
       contents: write
       pull-requests: write
+      checks: write
     if: github.repository == 'triggerdotdev/trigger.dev'
     steps:
       - name: Checkout
@@ -72,3 +73,27 @@ jobs:
                 -f body="$ENHANCED_BODY"
             fi
           fi
+
+      # The changesets bot authors release PRs with GITHUB_TOKEN, which by GitHub
+      # design cannot trigger downstream workflows. That leaves the required
+      # "All PR Checks" status permanently Expected and the PR unmergeable.
+      # The release PR only bumps package.json + lockfile + CHANGELOGs from
+      # changesets already on main, so we self-report the required check as
+      # success. If a human ever pushes to changeset-release/main, the real
+      # pr_checks.yml fires and its result overwrites this one (last write wins
+      # for the same context on the same SHA).
+      - name: Self-report "All PR Checks" success on release PR
+        if: steps.changesets.outputs.published != 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          PR_NUMBER=$(gh pr list --head changeset-release/main --json number --jq '.[0].number')
+          if [ -z "$PR_NUMBER" ]; then exit 0; fi
+          HEAD_SHA=$(gh pr view "$PR_NUMBER" --json headRefOid --jq '.headRefOid')
+          gh api -X POST repos/${{ github.repository }}/check-runs \
+            -f name="All PR Checks" \
+            -f head_sha="$HEAD_SHA" \
+            -f status=completed \
+            -f conclusion=success \
+            -f 'output[title]=Auto-pass for changeset release PR' \
+            -f 'output[summary]=Required check auto-satisfied for changeset-release/main PRs. Full CI ran on the underlying commits before they landed on main.'
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index d352752fb0d..8ab9a4e3207 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -302,6 +302,13 @@ jobs:
       - name: Generate Prisma Client
         run: pnpm run generate
 
+      - name: Exit changeset pre mode (if active)
+        run: |
+          if [ -f .changeset/pre.json ]; then
+            echo "Repo is in changeset pre mode; exiting so snapshot release can run"
+            pnpm exec changeset pre exit
+          fi
+
       - name: Snapshot version
         run: pnpm exec changeset version --snapshot "${GITHUB_EVENT_INPUTS_PRERELEASE_TAG}"
         env:
diff --git a/.github/workflows/vouch-check-pr.yml b/.github/workflows/vouch-check-pr.yml
index 29090296bb0..d854b1e0ce6 100644
--- a/.github/workflows/vouch-check-pr.yml
+++ b/.github/workflows/vouch-check-pr.yml
@@ -32,7 +32,8 @@ jobs:
       github.event.pull_request.author_association != 'OWNER' &&
       github.event.pull_request.author_association != 'COLLABORATOR' &&
       github.event.pull_request.user.login != 'devin-ai-integration[bot]' &&
-      github.event.pull_request.user.login != 'dependabot[bot]'
+      github.event.pull_request.user.login != 'dependabot[bot]' &&
+      github.event.pull_request.user.login != 'github-actions[bot]'
     runs-on: ubuntu-latest
     steps:
       - name: Close non-draft PR
diff --git a/.server-changes/admin-tabs-preserve-search.md b/.server-changes/admin-tabs-preserve-search.md
deleted file mode 100644
index 7caaa642626..00000000000
--- a/.server-changes/admin-tabs-preserve-search.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: improvement
----
-
-Preserve search string when switching between the Users and Organizations tabs in the admin dashboard.
diff --git a/.server-changes/agent-playground.md b/.server-changes/agent-playground.md
deleted file mode 100644
index f2e0852add7..00000000000
--- a/.server-changes/agent-playground.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: feature
----
-
-New Agent Playground for testing `chat.agent` tasks interactively — multi-turn chat with tool-call visualization, a side panel for payload / schema / clientData configuration, and trigger-config controls for `maxDuration`, version pin, and region.
diff --git a/.server-changes/agents-dashboard.md b/.server-changes/agents-dashboard.md
deleted file mode 100644
index 1aca65320bb..00000000000
--- a/.server-changes/agents-dashboard.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: feature
----
-
-New Agents page in the dashboard listing every `chat.agent` task in the environment with active/inactive status and run counts, plus fuzzy search for navigating large agent catalogs.
diff --git a/.server-changes/ai-span-inspector.md b/.server-changes/ai-span-inspector.md
deleted file mode 100644
index 41f7a5dea90..00000000000
--- a/.server-changes/ai-span-inspector.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: feature
----
-
-AI generation spans in the run trace get a dedicated inspector showing model, provider, token counts, cost, token speed, finish reason, service tier, tool count, and a link to the prompt version that produced the generation.
diff --git a/.server-changes/fix-ck-queue-length-cap-and-dashboard.md b/.server-changes/fix-ck-queue-length-cap-and-dashboard.md
deleted file mode 100644
index 9b225b29d92..00000000000
--- a/.server-changes/fix-ck-queue-length-cap-and-dashboard.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: fix
----
-
-Per-queue length limits and the dashboard's "Queued | Running" columns now reflect the true total across all concurrency-key variants. Previously both read 0 for any queue that used concurrency keys, allowing the per-queue cap to be bypassed.
diff --git a/.server-changes/fix-worker-deployment-version-race.md b/.server-changes/fix-worker-deployment-version-race.md
deleted file mode 100644
index b0ad7de9e89..00000000000
--- a/.server-changes/fix-worker-deployment-version-race.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: fix
----
-
-Retry on unique-constraint collisions when assigning the next worker deployment version so concurrent deploys to the same environment no longer fail with P2002.
diff --git a/.server-changes/google-auth-conflict-warn.md b/.server-changes/google-auth-conflict-warn.md
deleted file mode 100644
index 4e6b630ab21..00000000000
--- a/.server-changes/google-auth-conflict-warn.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: improvement
----
-
-Downgrade the "Google auth conflict" log from `error` to `warn`. This branch handles an expected user-state mismatch (Google ID belongs to one user, email is on another) by returning the existing auth user — there's no exception to chase, so it shouldn't page on the Sentry error channel.
diff --git a/.server-changes/models-registry.md b/.server-changes/models-registry.md
deleted file mode 100644
index ee87f625868..00000000000
--- a/.server-changes/models-registry.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: feature
----
-
-New Models page in the dashboard: a provider-grouped catalog of LLMs (OpenAI, Anthropic, Google, etc.) with pricing, capabilities, and cross-tenant usage metrics, plus per-model detail pages with token / cost / latency charts and a side-by-side compare panel.
diff --git a/.server-changes/plugin-auth-path.md b/.server-changes/plugin-auth-path.md
deleted file mode 100644
index c8269125ffc..00000000000
--- a/.server-changes/plugin-auth-path.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: improvement
----
-
-Webapp now supports a plugin system. Initially consolidates authentication and authorization paths.
diff --git a/.server-changes/prompts-dashboard.md b/.server-changes/prompts-dashboard.md
deleted file mode 100644
index 10397b9da22..00000000000
--- a/.server-changes/prompts-dashboard.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: feature
----
-
-New Prompts page in the dashboard: list view with per-prompt usage sparklines, detail view with the template alongside Generations / Metrics / Versions tabs, and a dashboard override UI for changing the template text or model without redeploying.
diff --git a/.server-changes/realtimestreams-dedupe.md b/.server-changes/realtimestreams-dedupe.md
deleted file mode 100644
index 69987f7b4b6..00000000000
--- a/.server-changes/realtimestreams-dedupe.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: fix
----
-
-Dedupe the `realtimeStreams` array push on `PUT /realtime/v1/streams/:runId/:target/:streamId` so repeat stream-init calls for the same `(run, streamId)` skip the row UPDATE, mirroring the existing append handler.
diff --git a/.server-changes/run-agent-view.md b/.server-changes/run-agent-view.md
deleted file mode 100644
index 570351f89ed..00000000000
--- a/.server-changes/run-agent-view.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: feature
----
-
-Run detail page gains an Agent view alongside the Trace view, rendering the agent's `UIMessage` conversation in real time from the backing Session for any run whose `taskKind` is `AGENT`.
diff --git a/.server-changes/runs-task-source-filter.md b/.server-changes/runs-task-source-filter.md
deleted file mode 100644
index 70c8e2ff895..00000000000
--- a/.server-changes/runs-task-source-filter.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: feature
----
-
-Task source filter on the Runs list — slice runs by Standard, Scheduled, or Agent so agent runs can be separated from mixed workloads at a glance.
diff --git a/.server-changes/sessions-dashboard.md b/.server-changes/sessions-dashboard.md
deleted file mode 100644
index 7adc299aec6..00000000000
--- a/.server-changes/sessions-dashboard.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: feature
----
-
-New Sessions page in the dashboard for inspecting `chat.agent` Session rows alongside their underlying runs, with filters by status, type, task identifier, and period, and a detail view that streams the live conversation from the backing Session's `.out` and `.in` channels.
diff --git a/.server-changes/streamdown-v2-upgrade.md b/.server-changes/streamdown-v2-upgrade.md
deleted file mode 100644
index 8a0b3f17af0..00000000000
--- a/.server-changes/streamdown-v2-upgrade.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: improvement
----
-
-Upgrade streamdown from v1.4.0 to v2.5.0. Custom Shiki syntax highlighting theme matching our CodeMirror dark theme colors. Consolidate duplicated lazy StreamdownRenderer into a shared component.
diff --git a/.server-changes/task-metadata-cache.md b/.server-changes/task-metadata-cache.md
deleted file mode 100644
index a71bbdf347b..00000000000
--- a/.server-changes/task-metadata-cache.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: improvement
----
-
-Cache task defaults in Redis so the trigger API skips per-request database lookups, restoring the fast trigger path when callers pass queue and TTL options.
diff --git a/.server-changes/webapp-sentry-fingerprint-p1001.md b/.server-changes/webapp-sentry-fingerprint-p1001.md
deleted file mode 100644
index dd2f1ecc55d..00000000000
--- a/.server-changes/webapp-sentry-fingerprint-p1001.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-area: webapp
-type: improvement
----
-
-Group Prisma P1001 ("Can't reach database server") errors into a single Sentry issue via a `beforeSend` fingerprint rule, so DB outages no longer fan out into hundreds of distinct issues that bury other alerts. Adds a small extensible rule table for future collapsing rules.
diff --git a/apps/webapp/app/components/integrations/VercelBuildSettings.tsx b/apps/webapp/app/components/integrations/VercelBuildSettings.tsx
index 92d0d0a9992..d8e9f3fe3f8 100644
--- a/apps/webapp/app/components/integrations/VercelBuildSettings.tsx
+++ b/apps/webapp/app/components/integrations/VercelBuildSettings.tsx
@@ -23,6 +23,12 @@ type BuildSettingsFieldsProps = {
   disabledEnvSlugs?: Partial<Record<EnvSlug, string>>;
   autoPromote?: boolean;
   onAutoPromoteChange?: (value: boolean) => void;
+  /** The currently pinned TRIGGER_VERSION on Vercel production, if any. Shown under the
+   * Atomic deployments toggle so the user knows what version is set on Vercel right now. */
+  currentTriggerVersion?: string | null;
+  /** True when the Vercel lookup for TRIGGER_VERSION failed. We show this so the user knows
+   * the pin status is unknown — distinct from "not set". */
+  currentTriggerVersionFetchFailed?: boolean;
   /** Hide the section-level master toggles for "Pull env vars" and "Discover new env vars". */
   hideSectionToggles?: boolean;
 };
@@ -39,6 +45,8 @@ export function BuildSettingsFields({
   disabledEnvSlugs,
   autoPromote,
   onAutoPromoteChange,
+  currentTriggerVersion,
+  currentTriggerVersionFetchFailed,
   hideSectionToggles,
 }: BuildSettingsFieldsProps) {
   const isSlugDisabled = (slug: EnvSlug) => !!disabledEnvSlugs?.[slug];
@@ -208,6 +216,20 @@ export function BuildSettingsFields({
           </TextLink>
           .
         </Hint>
+        {currentTriggerVersion && (
+          <Hint className="pr-6">
+            Currently pinned to{" "}
+            <span className="font-mono text-text-bright">{currentTriggerVersion}</span> in Vercel
+            production.
+          </Hint>
+        )}
+        {!currentTriggerVersion && currentTriggerVersionFetchFailed && (
+          <Hint className="pr-6 text-warning">
+            Couldn't read{" "}
+            <span className="font-mono text-text-bright">TRIGGER_VERSION</span> from Vercel —
+            check the Vercel dashboard to confirm the production pin.
+          </Hint>
+        )}
       </div>
 
       {/* Auto promotion — only visible when atomic deployments are on */}
diff --git a/apps/webapp/app/entry.server.tsx b/apps/webapp/app/entry.server.tsx
index 436ec288211..11c3274e865 100644
--- a/apps/webapp/app/entry.server.tsx
+++ b/apps/webapp/app/entry.server.tsx
@@ -6,6 +6,7 @@ import isbot from "isbot";
 import { renderToPipeableStream } from "react-dom/server";
 import { PassThrough } from "stream";
 import * as Worker from "~/services/worker.server";
+import { initMollifierDrainerWorker } from "~/v3/mollifierDrainerWorker.server";
 import { bootstrap } from "./bootstrap";
 import { LocaleContextProvider } from "./components/primitives/LocaleProvider";
 import {
@@ -247,6 +248,8 @@ Worker.init().catch((error) => {
   logError(error);
 });
 
+initMollifierDrainerWorker();
+
 bootstrap().catch((error) => {
   logError(error);
 });
diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
index 8eacb9634e1..6fb6c4ac283 100644
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@@ -1054,6 +1054,47 @@ const EnvironmentSchema = z
     COMMON_WORKER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
     COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"),
 
+    TRIGGER_MOLLIFIER_ENABLED: z.string().default("0"),
+    // Separate switch for the drainer (consumer side) so it can be split
+    // off onto a dedicated worker service. Unset → inherits
+    // TRIGGER_MOLLIFIER_ENABLED, so single-container self-hosters don't have to
+    // flip two switches. In multi-replica deployments, set this to "0"
+    // explicitly on every replica except the one dedicated drainer
+    // service — otherwise every replica's polling loop races for the
+    // same buffer entries. `TRIGGER_MOLLIFIER_ENABLED` is still the master kill
+    // switch; setting this to "1" while `TRIGGER_MOLLIFIER_ENABLED` is "0" is a
+    // no-op because the gate-side singleton refuses to construct a
+    // buffer when the system is off.
+    TRIGGER_MOLLIFIER_DRAINER_ENABLED: z.string().default(process.env.TRIGGER_MOLLIFIER_ENABLED ?? "0"),
+    TRIGGER_MOLLIFIER_SHADOW_MODE: z.string().default("0"),
+    TRIGGER_MOLLIFIER_REDIS_HOST: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.REDIS_HOST),
+    TRIGGER_MOLLIFIER_REDIS_PORT: z.coerce
+      .number()
+      .optional()
+      .transform(
+        (v) => v ?? (process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT) : undefined),
+      ),
+    TRIGGER_MOLLIFIER_REDIS_USERNAME: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.REDIS_USERNAME),
+    TRIGGER_MOLLIFIER_REDIS_PASSWORD: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.REDIS_PASSWORD),
+    TRIGGER_MOLLIFIER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
+    TRIGGER_MOLLIFIER_TRIP_WINDOW_MS: z.coerce.number().int().positive().default(200),
+    TRIGGER_MOLLIFIER_TRIP_THRESHOLD: z.coerce.number().int().positive().default(100),
+    TRIGGER_MOLLIFIER_HOLD_MS: z.coerce.number().int().positive().default(500),
+    TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY: z.coerce.number().int().positive().default(50),
+    TRIGGER_MOLLIFIER_ENTRY_TTL_S: z.coerce.number().int().positive().default(600),
+    TRIGGER_MOLLIFIER_DRAIN_MAX_ATTEMPTS: z.coerce.number().int().positive().default(3),
+    TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS: z.coerce.number().int().positive().default(30_000),
+    TRIGGER_MOLLIFIER_DRAIN_MAX_ORGS_PER_TICK: z.coerce.number().int().positive().default(500),
+
     BATCH_TRIGGER_PROCESS_JOB_VISIBILITY_TIMEOUT_MS: z.coerce
       .number()
       .int()
diff --git a/apps/webapp/app/models/vercelIntegration.server.ts b/apps/webapp/app/models/vercelIntegration.server.ts
index 82bedc6430f..9b553655671 100644
--- a/apps/webapp/app/models/vercelIntegration.server.ts
+++ b/apps/webapp/app/models/vercelIntegration.server.ts
@@ -960,7 +960,7 @@ export class VercelIntegrationRepository {
               key: "TRIGGER_SECRET_KEY",
               value: runtimeEnv.apiKey,
               target: vercelTarget,
-              type: "encrypted",
+              type: "sensitive",
               environmentType: runtimeEnv.type,
             });
           }
@@ -1061,7 +1061,7 @@ export class VercelIntegrationRepository {
           key: "TRIGGER_SECRET_KEY",
           value: params.apiKey,
           target: vercelTarget,
-          type: "encrypted",
+          type: "sensitive",
         });
 
         logger.info("Synced regenerated API key to Vercel", {
@@ -1115,28 +1115,26 @@ export class VercelIntegrationRepository {
             return (env as any).customEnvironmentIds?.includes(customEnvironmentId);
           });
 
+          // Always delete-then-create rather than editProjectEnv, because Vercel rejects
+          // in-place type changes (e.g. encrypted -> sensitive).
           if (existingEnv && existingEnv.id) {
-            await client.projects.editProjectEnv({
-              idOrName: vercelProjectId,
-              id: existingEnv.id,
-              ...(teamId && { teamId }),
-              requestBody: {
-                value,
-                type,
-              },
-            });
-          } else {
-            await client.projects.createProjectEnv({
+            await client.projects.batchRemoveProjectEnv({
               idOrName: vercelProjectId,
               ...(teamId && { teamId }),
-              requestBody: {
-                key,
-                value,
-                type,
-                customEnvironmentIds: [customEnvironmentId],
-              } as any,
+              requestBody: { ids: [existingEnv.id] },
             });
           }
+
+          await client.projects.createProjectEnv({
+            idOrName: vercelProjectId,
+            ...(teamId && { teamId }),
+            requestBody: {
+              key,
+              value,
+              type,
+              customEnvironmentIds: [customEnvironmentId],
+            } as any,
+          });
         })(),
         (error) => toVercelApiError(error)
       )
@@ -1709,29 +1707,27 @@ export class VercelIntegrationRepository {
       return target.length === envTargets.length && target.every((t) => envTargets.includes(t));
     });
 
+    // Always delete-then-create rather than editProjectEnv, because Vercel rejects
+    // in-place type changes (e.g. encrypted -> sensitive). Same approach used by
+    // syncApiKeysToVercel via removeAllVercelEnvVarsByKey.
     if (existingEnv && existingEnv.id) {
-      await client.projects.editProjectEnv({
-        idOrName: vercelProjectId,
-        id: existingEnv.id,
-        ...(teamId && { teamId }),
-        requestBody: {
-          value,
-          target: target as any,
-          type,
-        },
-      });
-    } else {
-      await client.projects.createProjectEnv({
+      await client.projects.batchRemoveProjectEnv({
         idOrName: vercelProjectId,
         ...(teamId && { teamId }),
-        requestBody: {
-          key,
-          value,
-          target: target as any,
-          type,
-        },
+        requestBody: { ids: [existingEnv.id] },
       });
     }
+
+    await client.projects.createProjectEnv({
+      idOrName: vercelProjectId,
+      ...(teamId && { teamId }),
+      requestBody: {
+        key,
+        value,
+        target: target as any,
+        type,
+      },
+    });
   }
 
   static getAutoAssignCustomDomains(
diff --git a/apps/webapp/app/presenters/v3/VercelSettingsPresenter.server.ts b/apps/webapp/app/presenters/v3/VercelSettingsPresenter.server.ts
index 4a57e3ec0ef..4fa08122adc 100644
--- a/apps/webapp/app/presenters/v3/VercelSettingsPresenter.server.ts
+++ b/apps/webapp/app/presenters/v3/VercelSettingsPresenter.server.ts
@@ -42,6 +42,13 @@ export type VercelSettingsResult = {
   autoAssignCustomDomains?: boolean | null;
   /** URL to manage Vercel integration access (project sharing) on vercel.com */
   vercelManageAccessUrl?: string;
+  /** The currently pinned TRIGGER_VERSION on Vercel production, if set. Used to surface
+   * the pin in the UI and prompt the user to clear it when atomic deployments are disabled. */
+  currentTriggerVersion?: string | null;
+  /** True when the Vercel lookup for TRIGGER_VERSION failed (network/auth/etc). Distinct
+   * from "no pin set" — the UI uses this to warn the user and still prompt them on disable
+   * so they can manually verify that production isn't pinned. */
+  currentTriggerVersionFetchFailed?: boolean;
 };
 
 export type VercelAvailableProject = {
@@ -248,13 +255,17 @@ export class VercelSettingsPresenter extends BasePresenter {
           customEnvironments: VercelCustomEnvironment[];
           autoAssignCustomDomains: boolean | null;
           vercelManageAccessUrl?: string;
+          currentTriggerVersion: string | null;
+          currentTriggerVersionFetchFailed: boolean;
         }> => {
           if (!orgIntegration) {
-            return { customEnvironments: [], autoAssignCustomDomains: null };
+            return { customEnvironments: [], autoAssignCustomDomains: null, currentTriggerVersion: null, currentTriggerVersionFetchFailed: false };
           }
           const clientResult = await VercelIntegrationRepository.getVercelClient(orgIntegration);
           if (clientResult.isErr()) {
-            return { customEnvironments: [], autoAssignCustomDomains: null };
+            // We couldn't even build a Vercel client — treat as fetch failure so the UI
+            // still prompts the user when they disable atomic deployments.
+            return { customEnvironments: [], autoAssignCustomDomains: null, currentTriggerVersion: null, currentTriggerVersionFetchFailed: true };
           }
           const client = clientResult.value;
           const teamId = await VercelIntegrationRepository.getTeamIdFromIntegration(orgIntegration);
@@ -275,10 +286,10 @@ export class VercelSettingsPresenter extends BasePresenter {
           }
 
           if (!connectedProject) {
-            return { customEnvironments: [], autoAssignCustomDomains: null, vercelManageAccessUrl };
+            return { customEnvironments: [], autoAssignCustomDomains: null, vercelManageAccessUrl, currentTriggerVersion: null, currentTriggerVersionFetchFailed: false };
           }
 
-          const [customEnvsResult, autoAssignResult] = await Promise.all([
+          const [customEnvsResult, autoAssignResult, triggerVersionResult] = await Promise.all([
             VercelIntegrationRepository.getVercelCustomEnvironments(
               client,
               connectedProject.vercelProjectId,
@@ -289,18 +300,44 @@ export class VercelSettingsPresenter extends BasePresenter {
               connectedProject.vercelProjectId,
               teamId
             ),
+            VercelIntegrationRepository.getVercelEnvironmentVariableValues(
+              client,
+              connectedProject.vercelProjectId,
+              teamId,
+              "production",
+              (key) => key === "TRIGGER_VERSION"
+            ),
           ]);
+
+          let currentTriggerVersion: string | null = null;
+          let currentTriggerVersionFetchFailed = false;
+          if (triggerVersionResult.isOk()) {
+            const match = triggerVersionResult.value.find(
+              (envVar) => envVar.key === "TRIGGER_VERSION" && envVar.target.includes("production")
+            );
+            currentTriggerVersion = match?.value ?? null;
+          } else {
+            currentTriggerVersionFetchFailed = true;
+            logger.warn("Failed to fetch current TRIGGER_VERSION from Vercel — surfacing as unknown", {
+              projectId,
+              vercelProjectId: connectedProject.vercelProjectId,
+              error: triggerVersionResult.error.message,
+            });
+          }
+
           return {
             customEnvironments: customEnvsResult.isOk() ? customEnvsResult.value : [],
             autoAssignCustomDomains: autoAssignResult.isOk() ? autoAssignResult.value : null,
             vercelManageAccessUrl,
+            currentTriggerVersion,
+            currentTriggerVersionFetchFailed,
           };
         };
 
         return fromPromise(
           fetchVercelData(),
           (error) => ({ type: "other" as const, cause: error })
-        ).map(({ customEnvironments, autoAssignCustomDomains, vercelManageAccessUrl }) => ({
+        ).map(({ customEnvironments, autoAssignCustomDomains, vercelManageAccessUrl, currentTriggerVersion, currentTriggerVersionFetchFailed }) => ({
           enabled: true,
           hasOrgIntegration,
           authInvalid: false,
@@ -311,6 +348,8 @@ export class VercelSettingsPresenter extends BasePresenter {
           customEnvironments,
           autoAssignCustomDomains,
           vercelManageAccessUrl,
+          currentTriggerVersion,
+          currentTriggerVersionFetchFailed,
         } as VercelSettingsResult));
       }).mapErr((error) => {
         // Log the error and return a safe fallback
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.batches/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.batches/route.tsx
index 17dcfbc4619..47318edd355 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.batches/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.batches/route.tsx
@@ -54,6 +54,7 @@ import {
   v3BatchPath,
   v3BatchRunsPath,
 } from "~/utils/pathBuilder";
+import { throwNotFound } from "~/utils/httpErrors";
 
 export const meta: MetaFunction = () => {
   return [
@@ -74,7 +75,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
 
   const environment = await findEnvironmentBySlug(project.id, envParam, userId);
   if (!environment) {
-    throw new Error("Environment not found");
+    throwNotFound("Environment not found");
   }
 
   const url = new URL(request.url);
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx
index f555f98171e..d271e6f2b22 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx
@@ -40,6 +40,7 @@ import { useOrganization } from "~/hooks/useOrganizations";
 import { useProject } from "~/hooks/useProject";
 import { useSearchParams } from "~/hooks/useSearchParam";
 import { useShortcutKeys } from "~/hooks/useShortcutKeys";
+import { redirectWithErrorMessage } from "~/models/message.server";
 import { findProjectBySlug } from "~/models/project.server";
 import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server";
 import { getRunFiltersFromRequest } from "~/presenters/RunFilters.server";
@@ -59,6 +60,7 @@ import {
   v3TestPath,
   v3TestTaskPath,
 } from "~/utils/pathBuilder";
+import { throwNotFound } from "~/utils/httpErrors";
 import { ListPagination } from "../../components/ListPagination";
 import { CreateBulkActionInspector } from "../resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.bulkaction";
 import { Callout } from "~/components/primitives/Callout";
@@ -77,12 +79,12 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
 
   const project = await findProjectBySlug(organizationSlug, projectParam, userId);
   if (!project) {
-    throw new Error("Project not found");
+    return redirectWithErrorMessage("/", request, "Project not found");
   }
 
   const environment = await findEnvironmentBySlug(project.id, envParam, userId);
   if (!environment) {
-    throw new Error("Environment not found");
+    throwNotFound("Environment not found");
   }
 
   const filters = await getRunFiltersFromRequest(request);
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.schedules.$scheduleParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.schedules.$scheduleParam/route.tsx
index f4e663b1b7d..a837274222b 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.schedules.$scheduleParam/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.schedules.$scheduleParam/route.tsx
@@ -55,6 +55,7 @@ import {
   v3SchedulePath,
   v3SchedulesPath,
 } from "~/utils/pathBuilder";
+import { throwNotFound } from "~/utils/httpErrors";
 import { DeleteTaskScheduleService } from "~/v3/services/deleteTaskSchedule.server";
 import { SetActiveOnTaskScheduleService } from "~/v3/services/setActiveOnTaskSchedule.server";
 
@@ -84,7 +85,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
   });
 
   if (!result) {
-    throw new Error("Schedule not found");
+    throwNotFound("Schedule not found");
   }
 
   return typedjson({ schedule: result.schedule });
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions.$sessionParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions.$sessionParam/route.tsx
index c873dd9f406..688477281d6 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions.$sessionParam/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions.$sessionParam/route.tsx
@@ -51,6 +51,7 @@ import {
   v3RunsPath,
   v3SessionsPath,
 } from "~/utils/pathBuilder";
+import { throwNotFound } from "~/utils/httpErrors";
 
 const ParamsSchema = EnvironmentParamSchema.extend({
   sessionParam: z.string(),
@@ -71,7 +72,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
 
   const environment = await findEnvironmentBySlug(project.id, envParam, userId);
   if (!environment) {
-    throw new Error("Environment not found");
+    throwNotFound("Environment not found");
   }
 
   const presenter = new SessionPresenter($replica);
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx
index 99b0a96b5d1..8d2fa6f7961 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx
@@ -19,6 +19,7 @@ import { SessionListPresenter } from "~/presenters/v3/SessionListPresenter.serve
 import { clickhouseClient } from "~/services/clickhouseInstance.server";
 import { requireUserId } from "~/services/session.server";
 import { docsPath, EnvironmentParamSchema } from "~/utils/pathBuilder";
+import { throwNotFound } from "~/utils/httpErrors";
 
 export const meta: MetaFunction = () => {
   return [
@@ -39,7 +40,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
 
   const environment = await findEnvironmentBySlug(project.id, envParam, userId);
   if (!environment) {
-    throw new Error("Environment not found");
+    throwNotFound("Environment not found");
   }
 
   const filters = getSessionFiltersFromRequest(request);
diff --git a/apps/webapp/app/routes/api.v1.artifacts.ts b/apps/webapp/app/routes/api.v1.artifacts.ts
index 82ae3f53756..c74c66a222d 100644
--- a/apps/webapp/app/routes/api.v1.artifacts.ts
+++ b/apps/webapp/app/routes/api.v1.artifacts.ts
@@ -13,79 +13,85 @@ export async function action({ request }: ActionFunctionArgs) {
     return json({ error: "Method Not Allowed" }, { status: 405 });
   }
 
-  const authenticationResult = await authenticateRequest(request, {
-    apiKey: true,
-    organizationAccessToken: false,
-    personalAccessToken: false,
-  });
+  try {
+    const authenticationResult = await authenticateRequest(request, {
+      apiKey: true,
+      organizationAccessToken: false,
+      personalAccessToken: false,
+    });
 
-  if (!authenticationResult || !authenticationResult.result.ok) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult || !authenticationResult.result.ok) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const [, rawBody] = await tryCatch(request.json());
-  const body = CreateArtifactRequestBody.safeParse(rawBody ?? {});
+    const [, rawBody] = await tryCatch(request.json());
+    const body = CreateArtifactRequestBody.safeParse(rawBody ?? {});
 
-  if (!body.success) {
-    return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
-  }
+    if (!body.success) {
+      return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
+    }
 
-  const { environment: authenticatedEnv } = authenticationResult.result;
+    const { environment: authenticatedEnv } = authenticationResult.result;
 
-  const service = new ArtifactsService();
-  return await service
-    .createArtifact(body.data.type, authenticatedEnv, body.data.contentLength)
-    .match(
-      (result) => {
-        return json(
-          {
-            artifactKey: result.artifactKey,
-            uploadUrl: result.uploadUrl,
-            uploadFields: result.uploadFields,
-            expiresAt: result.expiresAt.toISOString(),
-          } satisfies CreateArtifactResponseBody,
-          { status: 201 }
-        );
-      },
-      (error) => {
-        switch (error.type) {
-          case "artifact_size_exceeds_limit": {
-            logger.warn("Artifact size exceeds limit", { error });
-            const sizeMB = parseFloat((error.contentLength / (1024 * 1024)).toFixed(1));
-            const limitMB = parseFloat((error.sizeLimit / (1024 * 1024)).toFixed(1));
+    const service = new ArtifactsService();
+    return await service
+      .createArtifact(body.data.type, authenticatedEnv, body.data.contentLength)
+      .match(
+        (result) => {
+          return json(
+            {
+              artifactKey: result.artifactKey,
+              uploadUrl: result.uploadUrl,
+              uploadFields: result.uploadFields,
+              expiresAt: result.expiresAt.toISOString(),
+            } satisfies CreateArtifactResponseBody,
+            { status: 201 }
+          );
+        },
+        (error) => {
+          switch (error.type) {
+            case "artifact_size_exceeds_limit": {
+              logger.warn("Artifact size exceeds limit", { error });
+              const sizeMB = parseFloat((error.contentLength / (1024 * 1024)).toFixed(1));
+              const limitMB = parseFloat((error.sizeLimit / (1024 * 1024)).toFixed(1));
 
-            let errorMessage;
+              let errorMessage;
 
-            switch (body.data.type) {
-              case "deployment_context":
-                errorMessage = `Artifact size (${sizeMB} MB) exceeds the allowed limit of ${limitMB} MB. Make sure you are in the correct directory of your Trigger.dev project. Reach out to us if you are seeing this error consistently.`;
-                break;
-              default:
-                body.data.type satisfies never;
-                errorMessage = `Artifact size (${sizeMB} MB) exceeds the allowed limit of ${limitMB} MB`;
+              switch (body.data.type) {
+                case "deployment_context":
+                  errorMessage = `Artifact size (${sizeMB} MB) exceeds the allowed limit of ${limitMB} MB. Make sure you are in the correct directory of your Trigger.dev project. Reach out to us if you are seeing this error consistently.`;
+                  break;
+                default:
+                  body.data.type satisfies never;
+                  errorMessage = `Artifact size (${sizeMB} MB) exceeds the allowed limit of ${limitMB} MB`;
+              }
+              return json(
+                {
+                  error: errorMessage,
+                },
+                { status: 400 }
+              );
+            }
+            case "failed_to_create_presigned_post": {
+              logger.error("Failed to create presigned POST", { error });
+              return json({ error: "Failed to generate artifact upload URL" }, { status: 500 });
+            }
+            case "artifacts_bucket_not_configured": {
+              logger.error("Artifacts bucket not configured", { error });
+              return json({ error: "Internal server error" }, { status: 500 });
+            }
+            default: {
+              error satisfies never;
+              logger.error("Failed creating artifact", { error });
+              return json({ error: "Internal server error" }, { status: 500 });
             }
-            return json(
-              {
-                error: errorMessage,
-              },
-              { status: 400 }
-            );
-          }
-          case "failed_to_create_presigned_post": {
-            logger.error("Failed to create presigned POST", { error });
-            return json({ error: "Failed to generate artifact upload URL" }, { status: 500 });
-          }
-          case "artifacts_bucket_not_configured": {
-            logger.error("Artifacts bucket not configured", { error });
-            return json({ error: "Internal server error" }, { status: 500 });
-          }
-          default: {
-            error satisfies never;
-            logger.error("Failed creating artifact", { error });
-            return json({ error: "Internal server error" }, { status: 500 });
           }
         }
-      }
-    );
+      );
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to create artifact", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.auth.jwt.claims.ts b/apps/webapp/app/routes/api.v1.auth.jwt.claims.ts
index 0091078dbb5..b62874d5608 100644
--- a/apps/webapp/app/routes/api.v1.auth.jwt.claims.ts
+++ b/apps/webapp/app/routes/api.v1.auth.jwt.claims.ts
@@ -1,19 +1,26 @@
 import type { LoaderFunctionArgs } from "@remix-run/server-runtime";
 import { json } from "@remix-run/server-runtime";
 import { authenticateApiRequest } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 
 export async function action({ request }: LoaderFunctionArgs) {
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const claims = {
-    sub: authenticationResult.environment.id,
-    pub: true,
-  };
+    const claims = {
+      sub: authenticationResult.environment.id,
+      pub: true,
+    };
 
-  return json(claims);
+    return json(claims);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to read auth jwt claims", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.auth.jwt.ts b/apps/webapp/app/routes/api.v1.auth.jwt.ts
index b95b1eb7877..c38cdeb14ac 100644
--- a/apps/webapp/app/routes/api.v1.auth.jwt.ts
+++ b/apps/webapp/app/routes/api.v1.auth.jwt.ts
@@ -1,6 +1,7 @@
 import type { LoaderFunctionArgs } from "@remix-run/server-runtime";
 import { json } from "@remix-run/server-runtime";
 import { authenticateApiRequest } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 import { z } from "zod";
 import { generateJWT as internal_generateJWT } from "@trigger.dev/core/v3";
 
@@ -14,36 +15,42 @@ const RequestBodySchema = z.object({
 });
 
 export async function action({ request }: LoaderFunctionArgs) {
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
-
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
+
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
+
+    const parsedBody = RequestBodySchema.safeParse(await request.json());
+
+    if (!parsedBody.success) {
+      return json(
+        { error: "Invalid request body", issues: parsedBody.error.issues },
+        { status: 400 }
+      );
+    }
+
+    const claims = {
+      sub: authenticationResult.environment.id,
+      pub: true,
+      ...parsedBody.data.claims,
+    };
+
+    // Sign with the environment's current canonical key, not the raw header key,
+    // so JWTs minted with a revoked (grace-window) key still validate — validation
+    // in jwtAuth.server.ts uses environment.apiKey.
+    const jwt = await internal_generateJWT({
+      secretKey: authenticationResult.environment.apiKey,
+      payload: claims,
+      expirationTime: parsedBody.data.expirationTime ?? "1h",
+    });
+
+    return json({ token: jwt });
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to mint auth jwt", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
-
-  const parsedBody = RequestBodySchema.safeParse(await request.json());
-
-  if (!parsedBody.success) {
-    return json(
-      { error: "Invalid request body", issues: parsedBody.error.issues },
-      { status: 400 }
-    );
-  }
-
-  const claims = {
-    sub: authenticationResult.environment.id,
-    pub: true,
-    ...parsedBody.data.claims,
-  };
-
-  // Sign with the environment's current canonical key, not the raw header key,
-  // so JWTs minted with a revoked (grace-window) key still validate — validation
-  // in jwtAuth.server.ts uses environment.apiKey.
-  const jwt = await internal_generateJWT({
-    secretKey: authenticationResult.environment.apiKey,
-    payload: claims,
-    expirationTime: parsedBody.data.expirationTime ?? "1h",
-  });
-
-  return json({ token: jwt });
 }
diff --git a/apps/webapp/app/routes/api.v1.authorization-code.ts b/apps/webapp/app/routes/api.v1.authorization-code.ts
index b924b67500a..2e5c1aadf25 100644
--- a/apps/webapp/app/routes/api.v1.authorization-code.ts
+++ b/apps/webapp/app/routes/api.v1.authorization-code.ts
@@ -32,7 +32,7 @@ export async function action({ request }: ActionFunctionArgs) {
         error: error.message,
       });
 
-      return json({ error: error.message }, { status: 400 });
+      return json({ error: "Failed to create authorization code" }, { status: 400 });
     }
 
     return json({ error: "Something went wrong" }, { status: 500 });
diff --git a/apps/webapp/app/routes/api.v1.batches.$batchParam.results.ts b/apps/webapp/app/routes/api.v1.batches.$batchParam.results.ts
index 1a5889fab1d..edb19736691 100644
--- a/apps/webapp/app/routes/api.v1.batches.$batchParam.results.ts
+++ b/apps/webapp/app/routes/api.v1.batches.$batchParam.results.ts
@@ -12,32 +12,38 @@ const ParamsSchema = z.object({
 });
 
 export async function loader({ request, params }: LoaderFunctionArgs) {
-  // Authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
+  try {
+    // Authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing API Key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing API Key" }, { status: 401 });
+    }
 
-  const parsed = ParamsSchema.safeParse(params);
+    const parsed = ParamsSchema.safeParse(params);
 
-  if (!parsed.success) {
-    return json({ error: "Invalid or missing run ID" }, { status: 400 });
-  }
+    if (!parsed.success) {
+      return json({ error: "Invalid or missing run ID" }, { status: 400 });
+    }
 
-  const { batchParam } = parsed.data;
+    const { batchParam } = parsed.data;
 
-  try {
-    const presenter = new ApiBatchResultsPresenter();
-    const result = await presenter.call(batchParam, authenticationResult.environment);
+    try {
+      const presenter = new ApiBatchResultsPresenter();
+      const result = await presenter.call(batchParam, authenticationResult.environment);
 
-    if (!result) {
-      return json({ error: "Batch not found" }, { status: 404 });
-    }
+      if (!result) {
+        return json({ error: "Batch not found" }, { status: 404 });
+      }
 
-    return json(result);
+      return json(result);
+    } catch (error) {
+      logger.error("Failed to load batch results", { error });
+      return json({ error: "Something went wrong, please try again." }, { status: 500 });
+    }
   } catch (error) {
-    logger.error("Failed to load batch results", { error });
-    return json({ error: "Something went wrong, please try again." }, { status: 500 });
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load batch results (outer)", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.background-workers.ts b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.background-workers.ts
index c22399ef60e..26503c14f9d 100644
--- a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.background-workers.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.background-workers.ts
@@ -23,57 +23,63 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
-
-  if (!authenticationResult) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  const authenticatedEnv = authenticationResult.environment;
+    if (!authenticationResult) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const { deploymentId } = parsedParams.data;
+    const authenticatedEnv = authenticationResult.environment;
 
-  const rawBody = await request.json();
-  const body = CreateBackgroundWorkerRequestBody.safeParse(rawBody);
+    const { deploymentId } = parsedParams.data;
 
-  if (!body.success) {
-    return json({ error: "Invalid body", issues: body.error.issues }, { status: 400 });
-  }
+    const rawBody = await request.json();
+    const body = CreateBackgroundWorkerRequestBody.safeParse(rawBody);
 
-  const service = new CreateDeploymentBackgroundWorkerServiceV4();
+    if (!body.success) {
+      return json({ error: "Invalid body", issues: body.error.issues }, { status: 400 });
+    }
 
-  try {
-    const backgroundWorker = await service.call(authenticatedEnv, deploymentId, body.data);
+    const service = new CreateDeploymentBackgroundWorkerServiceV4();
+
+    try {
+      const backgroundWorker = await service.call(authenticatedEnv, deploymentId, body.data);
+
+      if (!backgroundWorker) {
+        return json({ error: "Failed to create background worker" }, { status: 500 });
+      }
+
+      return json(
+        {
+          id: backgroundWorker.friendlyId,
+          version: backgroundWorker.version,
+          contentHash: backgroundWorker.contentHash,
+        },
+        { status: 200 }
+      );
+    } catch (e) {
+      // Customer-facing validation failures (invalid task config, customer cron
+      // expression, etc.). The handler returns 4xx with the message; system
+      // handles it gracefully, no alert needed.
+      if (e instanceof ServiceValidationError) {
+        logger.warn("Failed to create background worker", { error: e.message });
+        return json({ error: e.message }, { status: e.status ?? 400 });
+      }
+      if (e instanceof CreateDeclarativeScheduleError) {
+        logger.warn("Failed to create background worker", { error: e.message });
+        return json({ error: e.message }, { status: 400 });
+      }
+
+      logger.error("Failed to create background worker", { error: e });
 
-    if (!backgroundWorker) {
       return json({ error: "Failed to create background worker" }, { status: 500 });
     }
-
-    return json(
-      {
-        id: backgroundWorker.friendlyId,
-        version: backgroundWorker.version,
-        contentHash: backgroundWorker.contentHash,
-      },
-      { status: 200 }
-    );
-  } catch (e) {
-    // Customer-facing validation failures (invalid task config, customer cron
-    // expression, etc.). The handler returns 4xx with the message; system
-    // handles it gracefully, no alert needed.
-    if (e instanceof ServiceValidationError) {
-      logger.warn("Failed to create background worker", { error: e.message });
-      return json({ error: e.message }, { status: e.status ?? 400 });
-    }
-    if (e instanceof CreateDeclarativeScheduleError) {
-      logger.warn("Failed to create background worker", { error: e.message });
-      return json({ error: e.message }, { status: 400 });
-    }
-
-    logger.error("Failed to create background worker", { error: e });
-
-    return json({ error: "Failed to create background worker" }, { status: 500 });
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to create deployment background worker", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.cancel.ts b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.cancel.ts
index dd209d4494b..56b93fe17e3 100644
--- a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.cancel.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.cancel.ts
@@ -20,53 +20,59 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  const authenticationResult = await authenticateRequest(request, {
-    apiKey: true,
-    organizationAccessToken: false,
-    personalAccessToken: false,
-  });
+  try {
+    const authenticationResult = await authenticateRequest(request, {
+      apiKey: true,
+      organizationAccessToken: false,
+      personalAccessToken: false,
+    });
 
-  if (!authenticationResult || !authenticationResult.result.ok) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult || !authenticationResult.result.ok) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const { environment: authenticatedEnv } = authenticationResult.result;
-  const { deploymentId } = parsedParams.data;
+    const { environment: authenticatedEnv } = authenticationResult.result;
+    const { deploymentId } = parsedParams.data;
 
-  const [, rawBody] = await tryCatch(request.json());
-  const body = CancelDeploymentRequestBody.safeParse(rawBody ?? {});
+    const [, rawBody] = await tryCatch(request.json());
+    const body = CancelDeploymentRequestBody.safeParse(rawBody ?? {});
 
-  if (!body.success) {
-    return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
-  }
+    if (!body.success) {
+      return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
+    }
 
-  const deploymentService = new DeploymentService();
+    const deploymentService = new DeploymentService();
 
-  return await deploymentService
-    .cancelDeployment(authenticatedEnv, deploymentId, {
-      canceledReason: body.data.reason,
-    })
-    .match(
-      () => {
-        return new Response(null, { status: 204 });
-      },
-      (error) => {
-        switch (error.type) {
-          case "deployment_not_found":
-            return json({ error: "Deployment not found" }, { status: 404 });
-          case "failed_to_delete_deployment_timeout":
-            return new Response(null, { status: 204 }); // not a critical error, ignore
-          case "deployment_cannot_be_cancelled":
-            return json(
-              { error: "Deployment is already in a final state and cannot be canceled" },
-              { status: 409 }
-            );
-          case "other":
-          default:
-            error.type satisfies "other";
-            return json({ error: "Internal server error" }, { status: 500 });
+    return await deploymentService
+      .cancelDeployment(authenticatedEnv, deploymentId, {
+        canceledReason: body.data.reason,
+      })
+      .match(
+        () => {
+          return new Response(null, { status: 204 });
+        },
+        (error) => {
+          switch (error.type) {
+            case "deployment_not_found":
+              return json({ error: "Deployment not found" }, { status: 404 });
+            case "failed_to_delete_deployment_timeout":
+              return new Response(null, { status: 204 }); // not a critical error, ignore
+            case "deployment_cannot_be_cancelled":
+              return json(
+                { error: "Deployment is already in a final state and cannot be canceled" },
+                { status: 409 }
+              );
+            case "other":
+            default:
+              error.type satisfies "other";
+              return json({ error: "Internal server error" }, { status: 500 });
+          }
         }
-      }
-    );
+      );
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to cancel deployment", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.fail.ts b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.fail.ts
index 5edea5636e7..43eb45c5364 100644
--- a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.fail.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.fail.ts
@@ -21,32 +21,41 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
-
-  if (!authenticationResult) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
-
-  const authenticatedEnv = authenticationResult.environment;
-
-  const { deploymentId } = parsedParams.data;
-
-  const rawBody = await request.json();
-  const body = FailDeploymentRequestBody.safeParse(rawBody);
-
-  if (!body.success) {
-    return json({ error: "Invalid body", issues: body.error.issues }, { status: 400 });
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
+
+    if (!authenticationResult) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
+
+    const authenticatedEnv = authenticationResult.environment;
+
+    const { deploymentId } = parsedParams.data;
+
+    const rawBody = await request.json();
+    const body = FailDeploymentRequestBody.safeParse(rawBody);
+
+    if (!body.success) {
+      return json({ error: "Invalid body", issues: body.error.issues }, { status: 400 });
+    }
+
+    const service = new FailDeploymentService();
+    await service.call(authenticatedEnv, deploymentId, body.data);
+
+    return json(
+      {
+        id: deploymentId,
+      },
+      { status: 200 }
+    );
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    if (error instanceof SyntaxError) {
+      return json({ error: "Invalid JSON body" }, { status: 400 });
+    }
+    logger.error("Failed to fail deployment", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
-
-  const service = new FailDeploymentService();
-  await service.call(authenticatedEnv, deploymentId, body.data);
-
-  return json(
-    {
-      id: deploymentId,
-    },
-    { status: 200 }
-  );
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.finalize.ts b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.finalize.ts
index 9bafd8644af..c1ce30bbe79 100644
--- a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.finalize.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.finalize.ts
@@ -22,41 +22,47 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
-
-  if (!authenticationResult) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  const authenticatedEnv = authenticationResult.environment;
+    if (!authenticationResult) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const { deploymentId } = parsedParams.data;
+    const authenticatedEnv = authenticationResult.environment;
 
-  const rawBody = await request.json();
-  const body = FinalizeDeploymentRequestBody.safeParse(rawBody);
+    const { deploymentId } = parsedParams.data;
 
-  if (!body.success) {
-    return json({ error: "Invalid body", issues: body.error.issues }, { status: 400 });
-  }
+    const rawBody = await request.json();
+    const body = FinalizeDeploymentRequestBody.safeParse(rawBody);
 
-  try {
-    const service = new FinalizeDeploymentService();
-    await service.call(authenticatedEnv, deploymentId, body.data);
-
-    return json(
-      {
-        id: deploymentId,
-      },
-      { status: 200 }
-    );
-  } catch (error) {
-    if (error instanceof ServiceValidationError) {
-      return json({ error: error.message }, { status: 400 });
+    if (!body.success) {
+      return json({ error: "Invalid body", issues: body.error.issues }, { status: 400 });
     }
 
-    logger.error("Error finalizing deployment", { error });
-    return json({ error: "Internal server error" }, { status: 500 });
+    try {
+      const service = new FinalizeDeploymentService();
+      await service.call(authenticatedEnv, deploymentId, body.data);
+
+      return json(
+        {
+          id: deploymentId,
+        },
+        { status: 200 }
+      );
+    } catch (error) {
+      if (error instanceof ServiceValidationError) {
+        return json({ error: error.message }, { status: 400 });
+      }
+
+      logger.error("Error finalizing deployment", { error });
+      return json({ error: "Internal server error" }, { status: 500 });
+    }
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to finalize deployment", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.generate-registry-credentials.ts b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.generate-registry-credentials.ts
index 161f37f930b..89aaad4301e 100644
--- a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.generate-registry-credentials.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.generate-registry-credentials.ts
@@ -24,77 +24,83 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  const authenticationResult = await authenticateRequest(request, {
-    apiKey: true,
-    organizationAccessToken: false,
-    personalAccessToken: false,
-  });
+  try {
+    const authenticationResult = await authenticateRequest(request, {
+      apiKey: true,
+      organizationAccessToken: false,
+      personalAccessToken: false,
+    });
 
-  if (!authenticationResult || !authenticationResult.result.ok) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult || !authenticationResult.result.ok) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const { environment: authenticatedEnv } = authenticationResult.result;
-  const { deploymentId } = parsedParams.data;
+    const { environment: authenticatedEnv } = authenticationResult.result;
+    const { deploymentId } = parsedParams.data;
 
-  const [, rawBody] = await tryCatch(request.json());
-  const body = ProgressDeploymentRequestBody.safeParse(rawBody ?? {});
+    const [, rawBody] = await tryCatch(request.json());
+    const body = ProgressDeploymentRequestBody.safeParse(rawBody ?? {});
 
-  if (!body.success) {
-    return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
-  }
+    if (!body.success) {
+      return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
+    }
 
-  const deploymentService = new DeploymentService();
+    const deploymentService = new DeploymentService();
 
-  return await deploymentService.generateRegistryCredentials(authenticatedEnv, deploymentId).match(
-    (result) => {
-      return json(
-        {
-          username: result.username,
-          password: result.password,
-          expiresAt: result.expiresAt.toISOString(),
-          repositoryUri: result.repositoryUri,
-        } satisfies GenerateRegistryCredentialsResponseBody,
-        { status: 200 }
-      );
-    },
-    (error) => {
-      switch (error.type) {
-        case "deployment_not_found":
-          return json({ error: "Deployment not found" }, { status: 404 });
-        case "deployment_has_no_image_reference":
-          logger.error(
-            "Failed to generate registry credentials: deployment_has_no_image_reference",
-            { deploymentId }
-          );
-          return json({ error: "Deployment has no image reference" }, { status: 409 });
-        case "deployment_is_already_final":
-          return json(
-            { error: "Failed to generate registry credentials: deployment_is_already_final" },
-            { status: 409 }
-          );
-        case "missing_registry_credentials":
-          logger.error("Failed to generate registry credentials: missing_registry_credentials", {
-            deploymentId,
-          });
-          return json({ error: "Missing registry credentials" }, { status: 409 });
-        case "registry_not_supported":
-          logger.error("Failed to generate registry credentials: registry_not_supported", {
-            deploymentId,
-          });
-          return json({ error: "Registry not supported" }, { status: 409 });
-        case "registry_region_not_supported":
-          logger.error("Failed to generate registry credentials: registry_region_not_supported", {
-            deploymentId,
-          });
-          return json({ error: "Registry region not supported" }, { status: 409 });
-        case "other":
-        default:
-          error.type satisfies "other";
-          logger.error("Failed to generate registry credentials", { error: error.cause });
-          return json({ error: "Internal server error" }, { status: 500 });
+    return await deploymentService.generateRegistryCredentials(authenticatedEnv, deploymentId).match(
+      (result) => {
+        return json(
+          {
+            username: result.username,
+            password: result.password,
+            expiresAt: result.expiresAt.toISOString(),
+            repositoryUri: result.repositoryUri,
+          } satisfies GenerateRegistryCredentialsResponseBody,
+          { status: 200 }
+        );
+      },
+      (error) => {
+        switch (error.type) {
+          case "deployment_not_found":
+            return json({ error: "Deployment not found" }, { status: 404 });
+          case "deployment_has_no_image_reference":
+            logger.error(
+              "Failed to generate registry credentials: deployment_has_no_image_reference",
+              { deploymentId }
+            );
+            return json({ error: "Deployment has no image reference" }, { status: 409 });
+          case "deployment_is_already_final":
+            return json(
+              { error: "Failed to generate registry credentials: deployment_is_already_final" },
+              { status: 409 }
+            );
+          case "missing_registry_credentials":
+            logger.error("Failed to generate registry credentials: missing_registry_credentials", {
+              deploymentId,
+            });
+            return json({ error: "Missing registry credentials" }, { status: 409 });
+          case "registry_not_supported":
+            logger.error("Failed to generate registry credentials: registry_not_supported", {
+              deploymentId,
+            });
+            return json({ error: "Registry not supported" }, { status: 409 });
+          case "registry_region_not_supported":
+            logger.error("Failed to generate registry credentials: registry_region_not_supported", {
+              deploymentId,
+            });
+            return json({ error: "Registry region not supported" }, { status: 409 });
+          case "other":
+          default:
+            error.type satisfies "other";
+            logger.error("Failed to generate registry credentials", { error: error.cause });
+            return json({ error: "Internal server error" }, { status: 500 });
+        }
       }
-    }
-  );
+    );
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to generate registry credentials", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.progress.ts b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.progress.ts
index 2c78c59f552..671f42606a2 100644
--- a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.progress.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.progress.ts
@@ -20,62 +20,68 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  const authenticationResult = await authenticateRequest(request, {
-    apiKey: true,
-    organizationAccessToken: false,
-    personalAccessToken: false,
-  });
+  try {
+    const authenticationResult = await authenticateRequest(request, {
+      apiKey: true,
+      organizationAccessToken: false,
+      personalAccessToken: false,
+    });
 
-  if (!authenticationResult || !authenticationResult.result.ok) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult || !authenticationResult.result.ok) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const { environment: authenticatedEnv } = authenticationResult.result;
-  const { deploymentId } = parsedParams.data;
+    const { environment: authenticatedEnv } = authenticationResult.result;
+    const { deploymentId } = parsedParams.data;
 
-  const [, rawBody] = await tryCatch(request.json());
-  const body = ProgressDeploymentRequestBody.safeParse(rawBody ?? {});
+    const [, rawBody] = await tryCatch(request.json());
+    const body = ProgressDeploymentRequestBody.safeParse(rawBody ?? {});
 
-  if (!body.success) {
-    return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
-  }
+    if (!body.success) {
+      return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
+    }
 
-  const deploymentService = new DeploymentService();
+    const deploymentService = new DeploymentService();
 
-  return await deploymentService
-    .progressDeployment(authenticatedEnv, deploymentId, {
-      contentHash: body.data.contentHash,
-      git: body.data.gitMeta,
-      runtime: body.data.runtime,
-      buildServerMetadata: body.data.buildServerMetadata,
-    })
-    .match(
-      () => {
-        return new Response(null, { status: 204 });
-      },
-      (error) => {
-        switch (error.type) {
-          case "failed_to_extend_deployment_timeout": {
-            logger.warn("Failed to extend deployment timeout", { error: error.cause });
-            return new Response(null, { status: 204 }); // ignore these errors for now
+    return await deploymentService
+      .progressDeployment(authenticatedEnv, deploymentId, {
+        contentHash: body.data.contentHash,
+        git: body.data.gitMeta,
+        runtime: body.data.runtime,
+        buildServerMetadata: body.data.buildServerMetadata,
+      })
+      .match(
+        () => {
+          return new Response(null, { status: 204 });
+        },
+        (error) => {
+          switch (error.type) {
+            case "failed_to_extend_deployment_timeout": {
+              logger.warn("Failed to extend deployment timeout", { error: error.cause });
+              return new Response(null, { status: 204 }); // ignore these errors for now
+            }
+            case "deployment_not_found":
+              return json({ error: "Deployment not found" }, { status: 404 });
+            case "deployment_cannot_be_progressed":
+              return json(
+                { error: "Deployment is not in a progressable state (PENDING or INSTALLING)" },
+                { status: 409 }
+              );
+            case "failed_to_create_remote_build": {
+              logger.error("Failed to create remote Depot build", { error: error.cause });
+              return json({ error: "Failed to create remote build" }, { status: 500 });
+            }
+            case "other":
+            default:
+              error.type satisfies "other";
+              return json({ error: "Internal server error" }, { status: 500 });
           }
-          case "deployment_not_found":
-            return json({ error: "Deployment not found" }, { status: 404 });
-          case "deployment_cannot_be_progressed":
-            return json(
-              { error: "Deployment is not in a progressable state (PENDING or INSTALLING)" },
-              { status: 409 }
-            );
-          case "failed_to_create_remote_build": {
-            logger.error("Failed to create remote Depot build", { error: error.cause });
-            return json({ error: "Failed to create remote build" }, { status: 500 });
-          }
-          case "other":
-          default:
-            error.type satisfies "other";
-            return json({ error: "Internal server error" }, { status: 500 });
         }
-      }
-    );
+      );
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to progress deployment", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.ts b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.ts
index d0593e564fd..dd16fabf24c 100644
--- a/apps/webapp/app/routes/api.v1.deployments.$deploymentId.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.$deploymentId.ts
@@ -16,70 +16,76 @@ export async function loader({ request, params }: LoaderFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  if (!authenticationResult) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const authenticatedEnv = authenticationResult.environment;
+    const authenticatedEnv = authenticationResult.environment;
 
-  const { deploymentId } = parsedParams.data;
+    const { deploymentId } = parsedParams.data;
 
-  const deployment = await prisma.workerDeployment.findFirst({
-    where: {
-      friendlyId: deploymentId,
-      environmentId: authenticatedEnv.id,
-    },
-    include: {
-      worker: {
-        include: {
-          tasks: true,
+    const deployment = await prisma.workerDeployment.findFirst({
+      where: {
+        friendlyId: deploymentId,
+        environmentId: authenticatedEnv.id,
+      },
+      include: {
+        worker: {
+          include: {
+            tasks: true,
+          },
         },
+        integrationDeployments: true,
       },
-      integrationDeployments: true,
-    },
-  });
+    });
 
-  if (!deployment) {
-    return json({ error: "Deployment not found" }, { status: 404 });
-  }
+    if (!deployment) {
+      return json({ error: "Deployment not found" }, { status: 404 });
+    }
 
-  return json({
-    id: deployment.friendlyId,
-    status: deployment.status,
-    contentHash: deployment.contentHash,
-    shortCode: deployment.shortCode,
-    version: deployment.version,
-    imageReference: deployment.imageReference,
-    imagePlatform: deployment.imagePlatform,
-    commitSHA: deployment.commitSHA,
-    externalBuildData:
-      deployment.externalBuildData as GetDeploymentResponseBody["externalBuildData"],
-    errorData: deployment.errorData as GetDeploymentResponseBody["errorData"],
-    worker: deployment.worker
-      ? {
-          id: deployment.worker.friendlyId,
-          version: deployment.worker.version,
-          tasks: deployment.worker.tasks.map((task) => ({
-            id: task.friendlyId,
-            slug: task.slug,
-            filePath: task.filePath,
-            exportName: task.exportName ?? "@deprecated",
-          })),
-        }
-      : undefined,
-    integrationDeployments:
-      deployment.integrationDeployments.length > 0
-        ? deployment.integrationDeployments.map((id) => ({
-            id: id.id,
-            integrationName: id.integrationName,
-            integrationDeploymentId: id.integrationDeploymentId,
-            commitSHA: id.commitSHA,
-            createdAt: id.createdAt,
-          }))
+    return json({
+      id: deployment.friendlyId,
+      status: deployment.status,
+      contentHash: deployment.contentHash,
+      shortCode: deployment.shortCode,
+      version: deployment.version,
+      imageReference: deployment.imageReference,
+      imagePlatform: deployment.imagePlatform,
+      commitSHA: deployment.commitSHA,
+      externalBuildData:
+        deployment.externalBuildData as GetDeploymentResponseBody["externalBuildData"],
+      errorData: deployment.errorData as GetDeploymentResponseBody["errorData"],
+      worker: deployment.worker
+        ? {
+            id: deployment.worker.friendlyId,
+            version: deployment.worker.version,
+            tasks: deployment.worker.tasks.map((task) => ({
+              id: task.friendlyId,
+              slug: task.slug,
+              filePath: task.filePath,
+              exportName: task.exportName ?? "@deprecated",
+            })),
+          }
         : undefined,
-  } satisfies GetDeploymentResponseBody);
+      integrationDeployments:
+        deployment.integrationDeployments.length > 0
+          ? deployment.integrationDeployments.map((id) => ({
+              id: id.id,
+              integrationName: id.integrationName,
+              integrationDeploymentId: id.integrationDeploymentId,
+              commitSHA: id.commitSHA,
+              createdAt: id.createdAt,
+            }))
+          : undefined,
+    } satisfies GetDeploymentResponseBody);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load deployment", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.$deploymentVersion.promote.ts b/apps/webapp/app/routes/api.v1.deployments.$deploymentVersion.promote.ts
index 893b260dc82..9f3b4cad185 100644
--- a/apps/webapp/app/routes/api.v1.deployments.$deploymentVersion.promote.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.$deploymentVersion.promote.ts
@@ -22,49 +22,55 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  if (!authenticationResult) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const authenticatedEnv = authenticationResult.environment;
+    const authenticatedEnv = authenticationResult.environment;
 
-  const url = new URL(request.url);
-  const allowRollbacks = url.searchParams.get("allowRollbacks") === "true";
+    const url = new URL(request.url);
+    const allowRollbacks = url.searchParams.get("allowRollbacks") === "true";
 
-  const { deploymentVersion } = parsedParams.data;
+    const { deploymentVersion } = parsedParams.data;
 
-  const deployment = await prisma.workerDeployment.findFirst({
-    where: {
-      version: deploymentVersion,
-      environmentId: authenticatedEnv.id,
-    },
-  });
+    const deployment = await prisma.workerDeployment.findFirst({
+      where: {
+        version: deploymentVersion,
+        environmentId: authenticatedEnv.id,
+      },
+    });
 
-  if (!deployment) {
-    return json({ error: "Deployment not found" }, { status: 404 });
-  }
+    if (!deployment) {
+      return json({ error: "Deployment not found" }, { status: 404 });
+    }
 
-  try {
-    const service = new ChangeCurrentDeploymentService();
-    await service.call(deployment, "promote", allowRollbacks);
+    try {
+      const service = new ChangeCurrentDeploymentService();
+      await service.call(deployment, "promote", allowRollbacks);
 
-    return json(
-      {
-        id: deployment.friendlyId,
-        version: deployment.version,
-        shortCode: deployment.shortCode,
-      },
-      { status: 200 }
-    );
-  } catch (error) {
-    if (error instanceof ServiceValidationError) {
-      return json({ error: error.message }, { status: 400 });
-    } else {
-      return json({ error: "Failed to promote deployment" }, { status: 500 });
+      return json(
+        {
+          id: deployment.friendlyId,
+          version: deployment.version,
+          shortCode: deployment.shortCode,
+        },
+        { status: 200 }
+      );
+    } catch (error) {
+      if (error instanceof ServiceValidationError) {
+        return json({ error: error.message }, { status: 400 });
+      } else {
+        return json({ error: "Failed to promote deployment" }, { status: 500 });
+      }
     }
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to promote deployment", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
 }
diff --git a/apps/webapp/app/routes/api.v1.deployments.latest.ts b/apps/webapp/app/routes/api.v1.deployments.latest.ts
index 6f31f58fcc2..b8dcb667856 100644
--- a/apps/webapp/app/routes/api.v1.deployments.latest.ts
+++ b/apps/webapp/app/routes/api.v1.deployments.latest.ts
@@ -5,37 +5,43 @@ import { authenticateApiRequest } from "~/services/apiAuth.server";
 import { logger } from "~/services/logger.server";
 
 export async function loader({ request }: LoaderFunctionArgs) {
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  if (!authenticationResult) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const authenticatedEnv = authenticationResult.environment;
+    const authenticatedEnv = authenticationResult.environment;
 
-  const deployment = await prisma.workerDeployment.findFirst({
-    where: {
-      type: WorkerInstanceGroupType.UNMANAGED,
-      environmentId: authenticatedEnv.id,
-    },
-    orderBy: {
-      createdAt: "desc",
-    },
-  });
+    const deployment = await prisma.workerDeployment.findFirst({
+      where: {
+        type: WorkerInstanceGroupType.UNMANAGED,
+        environmentId: authenticatedEnv.id,
+      },
+      orderBy: {
+        createdAt: "desc",
+      },
+    });
 
-  if (!deployment) {
-    return json({ error: "Deployment not found" }, { status: 404 });
-  }
+    if (!deployment) {
+      return json({ error: "Deployment not found" }, { status: 404 });
+    }
 
-  return json({
-    id: deployment.friendlyId,
-    status: deployment.status,
-    contentHash: deployment.contentHash,
-    shortCode: deployment.shortCode,
-    version: deployment.version,
-    imageReference: deployment.imageReference,
-    errorData: deployment.errorData,
-  });
+    return json({
+      id: deployment.friendlyId,
+      status: deployment.status,
+      contentHash: deployment.contentHash,
+      shortCode: deployment.shortCode,
+      version: deployment.version,
+      imageReference: deployment.imageReference,
+      errorData: deployment.errorData,
+    });
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load latest deployment", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.orgs.$orgParam.projects.ts b/apps/webapp/app/routes/api.v1.orgs.$orgParam.projects.ts
index dc1791dabc5..47eb82c6930 100644
--- a/apps/webapp/app/routes/api.v1.orgs.$orgParam.projects.ts
+++ b/apps/webapp/app/routes/api.v1.orgs.$orgParam.projects.ts
@@ -20,17 +20,72 @@ const ParamsSchema = z.object({
 export async function loader({ request, params }: LoaderFunctionArgs) {
   logger.info("get projects", { url: request.url });
 
-  const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
+  try {
+    const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
+
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    }
+
+    const { orgParam } = ParamsSchema.parse(params);
+
+    const projects = await prisma.project.findMany({
+      where: {
+        organization: {
+          ...orgParamWhereClause(orgParam),
+          deletedAt: null,
+          members: {
+            some: {
+              userId: authenticationResult.userId,
+            },
+          },
+        },
+        version: "V3",
+        deletedAt: null,
+      },
+      include: {
+        organization: true,
+      },
+    });
+
+    if (!projects) {
+      return json({ error: "Projects not found" }, { status: 404 });
+    }
+
+    const result: GetProjectsResponseBody = projects.map((project) => ({
+      id: project.id,
+      externalRef: project.externalRef,
+      name: project.name,
+      slug: project.slug,
+      createdAt: project.createdAt,
+      organization: {
+        id: project.organization.id,
+        title: project.organization.title,
+        slug: project.organization.slug,
+        createdAt: project.organization.createdAt,
+      },
+    }));
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    return json(result);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to list org projects", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
+}
 
-  const { orgParam } = ParamsSchema.parse(params);
+export async function action({ request, params }: ActionFunctionArgs) {
+  try {
+    const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
 
-  const projects = await prisma.project.findMany({
-    where: {
-      organization: {
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    }
+
+    const { orgParam } = ParamsSchema.parse(params);
+
+    const organization = await prisma.organization.findFirst({
+      where: {
         ...orgParamWhereClause(orgParam),
         deletedAt: null,
         members: {
@@ -39,95 +94,53 @@ export async function loader({ request, params }: LoaderFunctionArgs) {
           },
         },
       },
-      version: "V3",
-      deletedAt: null,
-    },
-    include: {
-      organization: true,
-    },
-  });
-
-  if (!projects) {
-    return json({ error: "Projects not found" }, { status: 404 });
-  }
-
-  const result: GetProjectsResponseBody = projects.map((project) => ({
-    id: project.id,
-    externalRef: project.externalRef,
-    name: project.name,
-    slug: project.slug,
-    createdAt: project.createdAt,
-    organization: {
-      id: project.organization.id,
-      title: project.organization.title,
-      slug: project.organization.slug,
-      createdAt: project.organization.createdAt,
-    },
-  }));
-
-  return json(result);
-}
-
-export async function action({ request, params }: ActionFunctionArgs) {
-  const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
-
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
-  }
-
-  const { orgParam } = ParamsSchema.parse(params);
-
-  const organization = await prisma.organization.findFirst({
-    where: {
-      ...orgParamWhereClause(orgParam),
-      deletedAt: null,
-      members: {
-        some: {
-          userId: authenticationResult.userId,
-        },
+    });
+
+    if (!organization) {
+      return json({ error: "Organization not found" }, { status: 404 });
+    }
+
+    const body = await request.json();
+    const parsedBody = CreateProjectRequestBody.safeParse(body);
+
+    if (!parsedBody.success) {
+      return json({ error: "Invalid request body" }, { status: 400 });
+    }
+
+    const [error, project] = await tryCatch(
+      createProject({
+        organizationSlug: organization.slug,
+        name: parsedBody.data.name,
+        userId: authenticationResult.userId,
+        version: "v3",
+      })
+    );
+
+    if (error) {
+      logger.error("Failed to create project", { error });
+      return json({ error: "Failed to create project" }, { status: 400 });
+    }
+
+    const result: GetProjectResponseBody = {
+      id: project.id,
+      externalRef: project.externalRef,
+      name: project.name,
+      slug: project.slug,
+      createdAt: project.createdAt,
+      organization: {
+        id: project.organization.id,
+        title: project.organization.title,
+        slug: project.organization.slug,
+        createdAt: project.organization.createdAt,
       },
-    },
-  });
-
-  if (!organization) {
-    return json({ error: "Organization not found" }, { status: 404 });
-  }
-
-  const body = await request.json();
-  const parsedBody = CreateProjectRequestBody.safeParse(body);
-
-  if (!parsedBody.success) {
-    return json({ error: "Invalid request body" }, { status: 400 });
-  }
+    };
 
-  const [error, project] = await tryCatch(
-    createProject({
-      organizationSlug: organization.slug,
-      name: parsedBody.data.name,
-      userId: authenticationResult.userId,
-      version: "v3",
-    })
-  );
-
-  if (error) {
-    return json({ error: error.message }, { status: 400 });
+    return json(result);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to create org project", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
-
-  const result: GetProjectResponseBody = {
-    id: project.id,
-    externalRef: project.externalRef,
-    name: project.name,
-    slug: project.slug,
-    createdAt: project.createdAt,
-    organization: {
-      id: project.organization.id,
-      title: project.organization.title,
-      slug: project.organization.slug,
-      createdAt: project.organization.createdAt,
-    },
-  };
-
-  return json(result);
 }
 
 function orgParamWhereClause(orgParam: string) {
diff --git a/apps/webapp/app/routes/api.v1.orgs.$organizationSlug.projects.$projectParam.vercel.projects.ts b/apps/webapp/app/routes/api.v1.orgs.$organizationSlug.projects.$projectParam.vercel.projects.ts
index aaf54685888..6fc85d69cc3 100644
--- a/apps/webapp/app/routes/api.v1.orgs.$organizationSlug.projects.$projectParam.vercel.projects.ts
+++ b/apps/webapp/app/routes/api.v1.orgs.$organizationSlug.projects.$projectParam.vercel.projects.ts
@@ -29,119 +29,125 @@ export async function loader({ request, params }: LoaderFunctionArgs) {
     return apiCors(request, json({}));
   }
 
-  const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
-
-  if (!authenticationResult) {
-    return apiCors(
-      request,
-      json({ error: "Invalid or Missing Access Token" }, { status: 401 })
-    );
-  }
-
-  const parsedParams = ParamsSchema.safeParse(params);
-  if (!parsedParams.success) {
-    return apiCors(
-      request,
-      json({ error: "Invalid parameters" }, { status: 400 })
-    );
-  }
-
-  const { organizationSlug, projectParam } = parsedParams.data;
-
-  const result = await fromPromise(
-    (async () => {
-      // Find the project, verifying org membership
-      const project = await prisma.project.findFirst({
-        where: {
-          slug: projectParam,
-          organization: {
-            slug: organizationSlug,
-            members: {
-              some: {
-                userId: authenticationResult.userId,
+  try {
+    const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
+
+    if (!authenticationResult) {
+      return apiCors(
+        request,
+        json({ error: "Invalid or Missing Access Token" }, { status: 401 })
+      );
+    }
+
+    const parsedParams = ParamsSchema.safeParse(params);
+    if (!parsedParams.success) {
+      return apiCors(
+        request,
+        json({ error: "Invalid parameters" }, { status: 400 })
+      );
+    }
+
+    const { organizationSlug, projectParam } = parsedParams.data;
+
+    const result = await fromPromise(
+      (async () => {
+        // Find the project, verifying org membership
+        const project = await prisma.project.findFirst({
+          where: {
+            slug: projectParam,
+            organization: {
+              slug: organizationSlug,
+              members: {
+                some: {
+                  userId: authenticationResult.userId,
+                },
               },
             },
+            deletedAt: null,
           },
-          deletedAt: null,
-        },
-        select: {
-          id: true,
-          name: true,
-          slug: true,
-          organizationId: true,
-        },
-      });
-
-      if (!project) {
-        return { type: "not_found" as const };
-      }
-
-      // Get Vercel integration for the project
-      const vercelService = new VercelIntegrationService();
-      const integration = await vercelService.getVercelProjectIntegration(project.id);
+          select: {
+            id: true,
+            name: true,
+            slug: true,
+            organizationId: true,
+          },
+        });
 
-      return { type: "success" as const, project, integration };
-    })(),
-    (error) => error
-  );
+        if (!project) {
+          return { type: "not_found" as const };
+        }
 
-  if (result.isErr()) {
-    logger.error("Failed to fetch Vercel projects", {
-      error: result.error,
-      organizationSlug,
-      projectParam,
-    });
+        // Get Vercel integration for the project
+        const vercelService = new VercelIntegrationService();
+        const integration = await vercelService.getVercelProjectIntegration(project.id);
 
-    return apiCors(
-      request,
-      json({ error: "Internal server error" }, { status: 500 })
+        return { type: "success" as const, project, integration };
+      })(),
+      (error) => error
     );
-  }
 
-  if (result.value.type === "not_found") {
-    return apiCors(
-      request,
-      json({ error: "Project not found" }, { status: 404 })
-    );
-  }
+    if (result.isErr()) {
+      logger.error("Failed to fetch Vercel projects", {
+        error: result.error,
+        organizationSlug,
+        projectParam,
+      });
 
-  const { project, integration } = result.value;
+      return apiCors(
+        request,
+        json({ error: "Internal server error" }, { status: 500 })
+      );
+    }
+
+    if (result.value.type === "not_found") {
+      return apiCors(
+        request,
+        json({ error: "Project not found" }, { status: 404 })
+      );
+    }
+
+    const { project, integration } = result.value;
+
+    if (!integration) {
+      return apiCors(
+        request,
+        json({
+          connected: false,
+          vercelProject: null,
+          config: null,
+          syncEnvVarsMapping: null,
+        })
+      );
+    }
+
+    const { parsedIntegrationData } = integration;
 
-  if (!integration) {
     return apiCors(
       request,
       json({
-        connected: false,
-        vercelProject: null,
-        config: null,
-        syncEnvVarsMapping: null,
+        connected: true,
+        vercelProject: {
+          id: parsedIntegrationData.vercelProjectId,
+          name: parsedIntegrationData.vercelProjectName,
+          teamId: parsedIntegrationData.vercelTeamId,
+        },
+        config: {
+          atomicBuilds: parsedIntegrationData.config.atomicBuilds,
+          pullEnvVarsBeforeBuild: parsedIntegrationData.config.pullEnvVarsBeforeBuild,
+          vercelStagingEnvironment: parsedIntegrationData.config.vercelStagingEnvironment,
+        },
+        syncEnvVarsMapping: parsedIntegrationData.syncEnvVarsMapping,
+        triggerProject: {
+          id: project.id,
+          name: project.name,
+          slug: project.slug,
+        },
       })
     );
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to fetch Vercel projects", { error });
+    return apiCors(request, json({ error: "Internal Server Error" }, { status: 500 }));
   }
-
-  const { parsedIntegrationData } = integration;
-
-  return apiCors(
-    request,
-    json({
-      connected: true,
-      vercelProject: {
-        id: parsedIntegrationData.vercelProjectId,
-        name: parsedIntegrationData.vercelProjectName,
-        teamId: parsedIntegrationData.vercelTeamId,
-      },
-      config: {
-        atomicBuilds: parsedIntegrationData.config.atomicBuilds,
-        pullEnvVarsBeforeBuild: parsedIntegrationData.config.pullEnvVarsBeforeBuild,
-        vercelStagingEnvironment: parsedIntegrationData.config.vercelStagingEnvironment,
-      },
-      syncEnvVarsMapping: parsedIntegrationData.syncEnvVarsMapping,
-      triggerProject: {
-        id: project.id,
-        name: project.name,
-        slug: project.slug,
-      },
-    })
-  );
 }
 
diff --git a/apps/webapp/app/routes/api.v1.orgs.ts b/apps/webapp/app/routes/api.v1.orgs.ts
index 626162f234b..31ef3783f3e 100644
--- a/apps/webapp/app/routes/api.v1.orgs.ts
+++ b/apps/webapp/app/routes/api.v1.orgs.ts
@@ -2,36 +2,43 @@ import type { LoaderFunctionArgs } from "@remix-run/server-runtime";
 import { json } from "@remix-run/server-runtime";
 import { GetOrgsResponseBody } from "@trigger.dev/core/v3";
 import { prisma } from "~/db.server";
+import { logger } from "~/services/logger.server";
 import { authenticateApiRequestWithPersonalAccessToken } from "~/services/personalAccessToken.server";
 
 export async function loader({ request }: LoaderFunctionArgs) {
-  const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
+  try {
+    const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    }
 
-  const orgs = await prisma.organization.findMany({
-    where: {
-      deletedAt: null,
-      members: {
-        some: {
-          userId: authenticationResult.userId,
+    const orgs = await prisma.organization.findMany({
+      where: {
+        deletedAt: null,
+        members: {
+          some: {
+            userId: authenticationResult.userId,
+          },
         },
       },
-    },
-  });
+    });
 
-  if (!orgs) {
-    return json({ error: "Orgs not found" }, { status: 404 });
-  }
+    if (!orgs) {
+      return json({ error: "Orgs not found" }, { status: 404 });
+    }
 
-  const result: GetOrgsResponseBody = orgs.map((org) => ({
-    id: org.id,
-    title: org.title,
-    slug: org.slug,
-    createdAt: org.createdAt,
-  }));
+    const result: GetOrgsResponseBody = orgs.map((org) => ({
+      id: org.id,
+      title: org.title,
+      slug: org.slug,
+      createdAt: org.createdAt,
+    }));
 
-  return json(result);
+    return json(result);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to list orgs", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.jwt.ts b/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.jwt.ts
index e4b48ece05e..b9cb61e1f20 100644
--- a/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.jwt.ts
+++ b/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.jwt.ts
@@ -5,6 +5,7 @@ import {
   authenticatedEnvironmentForAuthentication,
   authenticateRequest,
 } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 
 const ParamsSchema = z.object({
   projectRef: z.string(),
@@ -21,52 +22,58 @@ const RequestBodySchema = z.object({
 });
 
 export async function action({ request, params }: ActionFunctionArgs) {
-  const authenticationResult = await authenticateRequest(request, {
-    personalAccessToken: true,
-    organizationAccessToken: true,
-    apiKey: false,
-  });
+  try {
+    const authenticationResult = await authenticateRequest(request, {
+      personalAccessToken: true,
+      organizationAccessToken: true,
+      apiKey: false,
+    });
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    }
 
-  const parsedParams = ParamsSchema.safeParse(params);
+    const parsedParams = ParamsSchema.safeParse(params);
 
-  if (!parsedParams.success) {
-    return json({ error: "Invalid Params" }, { status: 400 });
-  }
+    if (!parsedParams.success) {
+      return json({ error: "Invalid Params" }, { status: 400 });
+    }
 
-  const { projectRef, env } = parsedParams.data;
-  const triggerBranch = request.headers.get("x-trigger-branch") ?? undefined;
+    const { projectRef, env } = parsedParams.data;
+    const triggerBranch = request.headers.get("x-trigger-branch") ?? undefined;
 
-  const runtimeEnv = await authenticatedEnvironmentForAuthentication(
-    authenticationResult,
-    projectRef,
-    env,
-    triggerBranch
-  );
+    const runtimeEnv = await authenticatedEnvironmentForAuthentication(
+      authenticationResult,
+      projectRef,
+      env,
+      triggerBranch
+    );
 
-  const parsedBody = RequestBodySchema.safeParse(await request.json());
+    const parsedBody = RequestBodySchema.safeParse(await request.json());
 
-  if (!parsedBody.success) {
-    return json(
-      { error: "Invalid request body", issues: parsedBody.error.issues },
-      { status: 400 }
-    );
-  }
+    if (!parsedBody.success) {
+      return json(
+        { error: "Invalid request body", issues: parsedBody.error.issues },
+        { status: 400 }
+      );
+    }
 
-  const claims = {
-    sub: runtimeEnv.id,
-    pub: true,
-    ...parsedBody.data.claims,
-  };
+    const claims = {
+      sub: runtimeEnv.id,
+      pub: true,
+      ...parsedBody.data.claims,
+    };
 
-  const jwt = await internal_generateJWT({
-    secretKey: runtimeEnv.apiKey,
-    payload: claims,
-    expirationTime: parsedBody.data.expirationTime ?? "1h",
-  });
+    const jwt = await internal_generateJWT({
+      secretKey: runtimeEnv.apiKey,
+      payload: claims,
+      expirationTime: parsedBody.data.expirationTime ?? "1h",
+    });
 
-  return json({ token: jwt });
+    return json({ token: jwt });
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to generate env JWT", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.ts b/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.ts
index e0349aab558..218cc580dd3 100644
--- a/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.ts
+++ b/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.ts
@@ -7,6 +7,7 @@ import {
   authenticateRequest,
   branchNameFromRequest,
 } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 
 const ParamsSchema = z.object({
   projectRef: z.string(),
@@ -24,25 +25,31 @@ export async function loader({ request, params }: LoaderFunctionArgs) {
 
   const { projectRef, env } = parsedParams.data;
 
-  const authenticationResult = await authenticateRequest(request);
-
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
+  try {
+    const authenticationResult = await authenticateRequest(request);
+
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
+
+    const environment = await authenticatedEnvironmentForAuthentication(
+      authenticationResult,
+      projectRef,
+      env,
+      branchNameFromRequest(request)
+    );
+
+    const result: GetProjectEnvResponse = {
+      apiKey: environment.apiKey,
+      name: environment.project.name,
+      apiUrl: processEnv.API_ORIGIN ?? processEnv.APP_ORIGIN,
+      projectId: environment.project.id,
+    };
+
+    return json(result);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load project env", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
-
-  const environment = await authenticatedEnvironmentForAuthentication(
-    authenticationResult,
-    projectRef,
-    env,
-    branchNameFromRequest(request)
-  );
-
-  const result: GetProjectEnvResponse = {
-    apiKey: environment.apiKey,
-    name: environment.project.name,
-    apiUrl: processEnv.API_ORIGIN ?? processEnv.APP_ORIGIN,
-    projectId: environment.project.id,
-  };
-
-  return json(result);
 }
diff --git a/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.workers.$tagName.ts b/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.workers.$tagName.ts
index ddb398b4c21..07774339dc8 100644
--- a/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.workers.$tagName.ts
+++ b/apps/webapp/app/routes/api.v1.projects.$projectRef.$env.workers.$tagName.ts
@@ -9,6 +9,7 @@ import {
   authenticatedEnvironmentForAuthentication,
   authenticateRequest,
 } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 
 const ParamsSchema = z.object({
   projectRef: z.string(),
@@ -23,34 +24,37 @@ const HeadersSchema = z.object({
 type ParamsSchema = z.infer<typeof ParamsSchema>;
 
 export async function loader({ request, params }: LoaderFunctionArgs) {
-  const authenticationResult = await authenticateRequest(request, {
-    personalAccessToken: true,
-    organizationAccessToken: true,
-    apiKey: false,
-  });
+  try {
+    const authenticationResult = await authenticateRequest(request, {
+      personalAccessToken: true,
+      organizationAccessToken: true,
+      apiKey: false,
+    });
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    }
 
-  const parsedParams = ParamsSchema.safeParse(params);
+    const parsedParams = ParamsSchema.safeParse(params);
 
-  if (!parsedParams.success) {
-    return json({ error: "Invalid Params" }, { status: 400 });
-  }
-  const { projectRef, env } = parsedParams.data;
+    if (!parsedParams.success) {
+      return json({ error: "Invalid Params" }, { status: 400 });
+    }
+    const { projectRef, env } = parsedParams.data;
 
-  const parsedHeaders = HeadersSchema.safeParse(Object.fromEntries(request.headers));
-  const triggerBranch = parsedHeaders.success ? parsedHeaders.data["x-trigger-branch"] : undefined;
+    const parsedHeaders = HeadersSchema.safeParse(Object.fromEntries(request.headers));
+    const triggerBranch = parsedHeaders.success
+      ? parsedHeaders.data["x-trigger-branch"]
+      : undefined;
 
-  const runtimeEnv = await authenticatedEnvironmentForAuthentication(
-    authenticationResult,
-    projectRef,
-    env,
-    triggerBranch
-  );
+    const runtimeEnv = await authenticatedEnvironmentForAuthentication(
+      authenticationResult,
+      projectRef,
+      env,
+      triggerBranch
+    );
 
-  const currentWorker = await findCurrentWorkerFromEnvironment(
+    const currentWorker = await findCurrentWorkerFromEnvironment(
     {
       id: runtimeEnv.id,
       type: runtimeEnv.type,
@@ -109,5 +113,10 @@ export async function loader({ request, params }: LoaderFunctionArgs) {
     urls,
   };
 
-  return json(response);
+    return json(response);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load worker by tag", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.projects.$projectRef.alertChannels.ts b/apps/webapp/app/routes/api.v1.projects.$projectRef.alertChannels.ts
index a2f2dcf417f..73597075615 100644
--- a/apps/webapp/app/routes/api.v1.projects.$projectRef.alertChannels.ts
+++ b/apps/webapp/app/routes/api.v1.projects.$projectRef.alertChannels.ts
@@ -15,81 +15,87 @@ const ParamsSchema = z.object({
 });
 
 export async function action({ request, params }: ActionFunctionArgs) {
-  const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
+  try {
+    const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    }
 
-  const parsedParams = ParamsSchema.safeParse(params);
+    const parsedParams = ParamsSchema.safeParse(params);
 
-  if (!parsedParams.success) {
-    return json({ error: "Invalid Params" }, { status: 400 });
-  }
-
-  const { projectRef } = parsedParams.data;
+    if (!parsedParams.success) {
+      return json({ error: "Invalid Params" }, { status: 400 });
+    }
 
-  const rawBody = await request.json();
+    const { projectRef } = parsedParams.data;
 
-  const body = ApiCreateAlertChannel.safeParse(rawBody);
+    const rawBody = await request.json();
 
-  if (!body.success) {
-    return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
-  }
+    const body = ApiCreateAlertChannel.safeParse(rawBody);
 
-  const service = new CreateAlertChannelService();
+    if (!body.success) {
+      return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
+    }
 
-  try {
-    if (body.data.channel === "email") {
-      if (!body.data.channelData.email) {
-        return json({ error: "Email is required" }, { status: 422 });
+    const service = new CreateAlertChannelService();
+
+    try {
+      if (body.data.channel === "email") {
+        if (!body.data.channelData.email) {
+          return json({ error: "Email is required" }, { status: 422 });
+        }
+
+        const alertChannel = await service.call(projectRef, authenticationResult.userId, {
+          name: body.data.name,
+          alertTypes: body.data.alertTypes.map((type) =>
+            ApiAlertChannelPresenter.alertTypeFromApi(type)
+          ),
+          channel: {
+            type: "EMAIL",
+            email: body.data.channelData.email,
+          },
+          deduplicationKey: body.data.deduplicationKey,
+          environmentTypes: body.data.environmentTypes,
+        });
+
+        return json(await ApiAlertChannelPresenter.alertChannelToApi(alertChannel));
       }
 
-      const alertChannel = await service.call(projectRef, authenticationResult.userId, {
-        name: body.data.name,
-        alertTypes: body.data.alertTypes.map((type) =>
-          ApiAlertChannelPresenter.alertTypeFromApi(type)
-        ),
-        channel: {
-          type: "EMAIL",
-          email: body.data.channelData.email,
-        },
-        deduplicationKey: body.data.deduplicationKey,
-        environmentTypes: body.data.environmentTypes,
-      });
-
-      return json(await ApiAlertChannelPresenter.alertChannelToApi(alertChannel));
-    }
+      if (body.data.channel === "webhook") {
+        if (!body.data.channelData.url) {
+          return json({ error: "webhook url is required" }, { status: 422 });
+        }
+
+        const alertChannel = await service.call(projectRef, authenticationResult.userId, {
+          name: body.data.name,
+          alertTypes: body.data.alertTypes.map((type) =>
+            ApiAlertChannelPresenter.alertTypeFromApi(type)
+          ),
+          channel: {
+            type: "WEBHOOK",
+            url: body.data.channelData.url,
+            secret: body.data.channelData.secret,
+          },
+          deduplicationKey: body.data.deduplicationKey,
+          environmentTypes: body.data.environmentTypes,
+        });
+
+        return json(await ApiAlertChannelPresenter.alertChannelToApi(alertChannel));
+      }
 
-    if (body.data.channel === "webhook") {
-      if (!body.data.channelData.url) {
-        return json({ error: "webhook url is required" }, { status: 422 });
+      return json({ error: "Invalid channel type" }, { status: 422 });
+    } catch (error) {
+      if (error instanceof ServiceValidationError) {
+        return json({ error: error.message }, { status: 422 });
       }
 
-      const alertChannel = await service.call(projectRef, authenticationResult.userId, {
-        name: body.data.name,
-        alertTypes: body.data.alertTypes.map((type) =>
-          ApiAlertChannelPresenter.alertTypeFromApi(type)
-        ),
-        channel: {
-          type: "WEBHOOK",
-          url: body.data.channelData.url,
-          secret: body.data.channelData.secret,
-        },
-        deduplicationKey: body.data.deduplicationKey,
-        environmentTypes: body.data.environmentTypes,
-      });
-
-      return json(await ApiAlertChannelPresenter.alertChannelToApi(alertChannel));
+      logger.error("Failed to create alert channel", { error });
+      return json({ error: "Something went wrong, please try again." }, { status: 500 });
     }
-
-    return json({ error: "Invalid channel type" }, { status: 422 });
   } catch (error) {
-    if (error instanceof ServiceValidationError) {
-      return json({ error: error.message }, { status: 422 });
-    }
-
-    logger.error("Failed to create alert channel", { error });
-    return json({ error: "Something went wrong, please try again." }, { status: 500 });
+    if (error instanceof Response) throw error;
+    logger.error("Failed to create alert channel (outer)", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
 }
diff --git a/apps/webapp/app/routes/api.v1.projects.$projectRef.background-workers.$envSlug.$version.ts b/apps/webapp/app/routes/api.v1.projects.$projectRef.background-workers.$envSlug.$version.ts
index 6b044c9e833..e09bc510d91 100644
--- a/apps/webapp/app/routes/api.v1.projects.$projectRef.background-workers.$envSlug.$version.ts
+++ b/apps/webapp/app/routes/api.v1.projects.$projectRef.background-workers.$envSlug.$version.ts
@@ -6,6 +6,7 @@ import {
   authenticatedEnvironmentForAuthentication,
   branchNameFromRequest,
 } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 import zlib from "node:zlib";
 
 const ParamsSchema = z.object({
@@ -21,44 +22,45 @@ export async function loader({ params, request }: LoaderFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  const authenticationResult = await authenticateRequest(request);
+  try {
+    const authenticationResult = await authenticateRequest(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const environment = await authenticatedEnvironmentForAuthentication(
-    authenticationResult,
-    parsedParams.data.projectRef,
-    parsedParams.data.envSlug,
-    branchNameFromRequest(request)
-  );
+    const environment = await authenticatedEnvironmentForAuthentication(
+      authenticationResult,
+      parsedParams.data.projectRef,
+      parsedParams.data.envSlug,
+      branchNameFromRequest(request)
+    );
 
-  // Find the background worker and tasks and files
-  const backgroundWorker = await prisma.backgroundWorker.findFirst({
-    where: {
-      runtimeEnvironmentId: environment.id,
-      version: parsedParams.data.version,
-    },
-    include: {
-      tasks: true,
-      files: {
-        include: {
-          tasks: {
-            select: {
-              slug: true,
+    // Find the background worker and tasks and files
+    const backgroundWorker = await prisma.backgroundWorker.findFirst({
+      where: {
+        runtimeEnvironmentId: environment.id,
+        version: parsedParams.data.version,
+      },
+      include: {
+        tasks: true,
+        files: {
+          include: {
+            tasks: {
+              select: {
+                slug: true,
+              },
             },
           },
         },
       },
-    },
-  });
+    });
 
-  if (!backgroundWorker) {
-    return json({ error: "Background worker not found" }, { status: 404 });
-  }
+    if (!backgroundWorker) {
+      return json({ error: "Background worker not found" }, { status: 404 });
+    }
 
-  return json({
+    return json({
     id: backgroundWorker.friendlyId,
     version: backgroundWorker.version,
     cliVersion: backgroundWorker.cliVersion,
@@ -74,14 +76,19 @@ export async function loader({ params, request }: LoaderFunctionArgs) {
       retryConfig: task.retryConfig,
       queueConfig: task.queueConfig,
     })),
-    files: backgroundWorker.files.map((file) => ({
-      id: file.friendlyId,
-      filePath: file.filePath,
-      contentHash: file.contentHash,
-      contents: decompressContent(file.contents),
-      tasks: Array.from(new Set(file.tasks.map((task) => task.slug))),
-    })),
-  });
+      files: backgroundWorker.files.map((file) => ({
+        id: file.friendlyId,
+        filePath: file.filePath,
+        contentHash: file.contentHash,
+        contents: decompressContent(file.contents),
+        tasks: Array.from(new Set(file.tasks.map((task) => task.slug))),
+      })),
+    });
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load background worker", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
 
 function decompressContent(compressedBuffer: Uint8Array): string {
diff --git a/apps/webapp/app/routes/api.v1.projects.$projectRef.background-workers.ts b/apps/webapp/app/routes/api.v1.projects.$projectRef.background-workers.ts
index bc9842f0afa..12e5eb24dde 100644
--- a/apps/webapp/app/routes/api.v1.projects.$projectRef.background-workers.ts
+++ b/apps/webapp/app/routes/api.v1.projects.$projectRef.background-workers.ts
@@ -25,53 +25,59 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  if (!authenticationResult) {
-    logger.info("Invalid or missing api key", { url: request.url });
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      logger.info("Invalid or missing api key", { url: request.url });
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const authenticatedEnv = authenticationResult.environment;
+    const authenticatedEnv = authenticationResult.environment;
 
-  const { projectRef } = parsedParams.data;
+    const { projectRef } = parsedParams.data;
 
-  const rawBody = await request.json();
-  const body = CreateBackgroundWorkerRequestBody.safeParse(rawBody);
+    const rawBody = await request.json();
+    const body = CreateBackgroundWorkerRequestBody.safeParse(rawBody);
 
-  if (!body.success) {
-    return json({ error: "Invalid body", issues: body.error.issues }, { status: 400 });
-  }
+    if (!body.success) {
+      return json({ error: "Invalid body", issues: body.error.issues }, { status: 400 });
+    }
 
-  const service = new CreateBackgroundWorkerService();
+    const service = new CreateBackgroundWorkerService();
 
-  try {
-    const backgroundWorker = await service.call(projectRef, authenticatedEnv, body.data);
+    try {
+      const backgroundWorker = await service.call(projectRef, authenticatedEnv, body.data);
 
-    return json(
-      {
-        id: backgroundWorker.friendlyId,
-        version: backgroundWorker.version,
-        contentHash: backgroundWorker.contentHash,
-      },
-      { status: 200 }
-    );
-  } catch (e) {
-    // Customer-facing validation failures (invalid task config, customer cron
-    // expression, etc.). The handler returns 4xx with the message; system
-    // handles it gracefully, no alert needed.
-    if (e instanceof ServiceValidationError) {
-      logger.warn("Failed to create background worker", { error: e.message });
-      return json({ error: e.message }, { status: 400 });
-    }
-    if (e instanceof CreateDeclarativeScheduleError) {
-      logger.warn("Failed to create background worker", { error: e.message });
-      return json({ error: e.message }, { status: 400 });
-    }
+      return json(
+        {
+          id: backgroundWorker.friendlyId,
+          version: backgroundWorker.version,
+          contentHash: backgroundWorker.contentHash,
+        },
+        { status: 200 }
+      );
+    } catch (e) {
+      // Customer-facing validation failures (invalid task config, customer cron
+      // expression, etc.). The handler returns 4xx with the message; system
+      // handles it gracefully, no alert needed.
+      if (e instanceof ServiceValidationError) {
+        logger.warn("Failed to create background worker", { error: e.message });
+        return json({ error: e.message }, { status: 400 });
+      }
+      if (e instanceof CreateDeclarativeScheduleError) {
+        logger.warn("Failed to create background worker", { error: e.message });
+        return json({ error: e.message }, { status: 400 });
+      }
 
-    logger.error("Failed to create background worker", { error: e });
+      logger.error("Failed to create background worker", { error: e });
 
-    return json({ error: "Failed to create background worker" }, { status: 500 });
+      return json({ error: "Failed to create background worker" }, { status: 500 });
+    }
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to create project background worker", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
 }
diff --git a/apps/webapp/app/routes/api.v1.projects.$projectRef.dev-status.ts b/apps/webapp/app/routes/api.v1.projects.$projectRef.dev-status.ts
index f5f632a8223..7d536042fe6 100644
--- a/apps/webapp/app/routes/api.v1.projects.$projectRef.dev-status.ts
+++ b/apps/webapp/app/routes/api.v1.projects.$projectRef.dev-status.ts
@@ -5,37 +5,44 @@ import {
   authenticatedEnvironmentForAuthentication,
   authenticateRequest,
 } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 
 const ParamsSchema = z.object({
   projectRef: z.string(),
 });
 
 export async function loader({ request, params }: LoaderFunctionArgs) {
-  const authenticationResult = await authenticateRequest(request, {
-    personalAccessToken: true,
-    organizationAccessToken: true,
-    apiKey: false,
-  });
-
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+  try {
+    const authenticationResult = await authenticateRequest(request, {
+      personalAccessToken: true,
+      organizationAccessToken: true,
+      apiKey: false,
+    });
+
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    }
+
+    const parsedParams = ParamsSchema.safeParse(params);
+
+    if (!parsedParams.success) {
+      return json({ error: "Invalid Params" }, { status: 400 });
+    }
+
+    const { projectRef } = parsedParams.data;
+
+    const runtimeEnv = await authenticatedEnvironmentForAuthentication(
+      authenticationResult,
+      projectRef,
+      "dev"
+    );
+
+    const isConnected = await devPresence.isConnected(runtimeEnv.id);
+
+    return json({ isConnected });
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load dev status", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
-
-  const parsedParams = ParamsSchema.safeParse(params);
-
-  if (!parsedParams.success) {
-    return json({ error: "Invalid Params" }, { status: 400 });
-  }
-
-  const { projectRef } = parsedParams.data;
-
-  const runtimeEnv = await authenticatedEnvironmentForAuthentication(
-    authenticationResult,
-    projectRef,
-    "dev"
-  );
-
-  const isConnected = await devPresence.isConnected(runtimeEnv.id);
-
-  return json({ isConnected });
 }
diff --git a/apps/webapp/app/routes/api.v1.projects.$projectRef.envvars.$slug.$name.ts b/apps/webapp/app/routes/api.v1.projects.$projectRef.envvars.$slug.$name.ts
index e3081b090e3..00e155622ce 100644
--- a/apps/webapp/app/routes/api.v1.projects.$projectRef.envvars.$slug.$name.ts
+++ b/apps/webapp/app/routes/api.v1.projects.$projectRef.envvars.$slug.$name.ts
@@ -7,6 +7,7 @@ import {
   authenticatedEnvironmentForAuthentication,
   branchNameFromRequest,
 } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 import { EnvironmentVariablesRepository } from "~/v3/environmentVariables/environmentVariablesRepository.server";
 
 const ParamsSchema = z.object({
@@ -22,73 +23,82 @@ export async function action({ params, request }: ActionFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  const authenticationResult = await authenticateRequest(request);
+  try {
+    const authenticationResult = await authenticateRequest(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
-
-  const environment = await authenticatedEnvironmentForAuthentication(
-    authenticationResult,
-    parsedParams.data.projectRef,
-    parsedParams.data.slug,
-    branchNameFromRequest(request)
-  );
-
-  // Find the environment variable
-  const variable = await prisma.environmentVariable.findFirst({
-    where: {
-      key: parsedParams.data.name,
-      projectId: environment.project.id,
-    },
-  });
-
-  if (!variable) {
-    return json({ error: "Environment variable not found" }, { status: 404 });
-  }
-
-  const repository = new EnvironmentVariablesRepository();
-
-  switch (request.method.toUpperCase()) {
-    case "DELETE": {
-      const result = await repository.deleteValue(environment.project.id, {
-        id: variable.id,
-        environmentId: environment.id,
-      });
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-      if (result.success) {
-        return json({ success: true });
-      } else {
-        return json({ error: result.error }, { status: 400 });
-      }
+    const environment = await authenticatedEnvironmentForAuthentication(
+      authenticationResult,
+      parsedParams.data.projectRef,
+      parsedParams.data.slug,
+      branchNameFromRequest(request)
+    );
+
+    // Find the environment variable
+    const variable = await prisma.environmentVariable.findFirst({
+      where: {
+        key: parsedParams.data.name,
+        projectId: environment.project.id,
+      },
+    });
+
+    if (!variable) {
+      return json({ error: "Environment variable not found" }, { status: 404 });
     }
-    case "PUT":
-    case "POST": {
-      const jsonBody = await request.json();
 
-      const body = UpdateEnvironmentVariableRequestBody.safeParse(jsonBody);
+    const repository = new EnvironmentVariablesRepository();
 
-      if (!body.success) {
-        return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
-      }
+    switch (request.method.toUpperCase()) {
+      case "DELETE": {
+        const result = await repository.deleteValue(environment.project.id, {
+          id: variable.id,
+          environmentId: environment.id,
+        });
 
-      const result = await repository.edit(environment.project.id, {
-        values: [
-          {
-            value: body.data.value,
-            environmentId: environment.id,
-          },
-        ],
-        id: variable.id,
-        keepEmptyValues: true,
-      });
-
-      if (result.success) {
-        return json({ success: true });
-      } else {
-        return json({ error: result.error }, { status: 400 });
+        if (result.success) {
+          return json({ success: true });
+        } else {
+          return json({ error: result.error }, { status: 400 });
+        }
+      }
+      case "PUT":
+      case "POST": {
+        const jsonBody = await request.json();
+
+        const body = UpdateEnvironmentVariableRequestBody.safeParse(jsonBody);
+
+        if (!body.success) {
+          return json(
+            { error: "Invalid request body", issues: body.error.issues },
+            { status: 400 }
+          );
+        }
+
+        const result = await repository.edit(environment.project.id, {
+          values: [
+            {
+              value: body.data.value,
+              environmentId: environment.id,
+            },
+          ],
+          id: variable.id,
+          keepEmptyValues: true,
+        });
+
+        if (result.success) {
+          return json({ success: true });
+        } else {
+          return json({ error: result.error }, { status: 400 });
+        }
       }
     }
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to update environment variable", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
   }
 }
 
@@ -99,48 +109,54 @@ export async function loader({ params, request }: LoaderFunctionArgs) {
     return json({ error: "Invalid params" }, { status: 400 });
   }
 
-  const authenticationResult = await authenticateRequest(request);
+  try {
+    const authenticationResult = await authenticateRequest(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const environment = await authenticatedEnvironmentForAuthentication(
-    authenticationResult,
-    parsedParams.data.projectRef,
-    parsedParams.data.slug,
-    branchNameFromRequest(request)
-  );
-
-  // Find the environment variable
-  const variable = await prisma.environmentVariable.findFirst({
-    where: {
-      key: parsedParams.data.name,
-      projectId: environment.project.id,
-    },
-  });
-
-  if (!variable) {
-    return json({ error: "Environment variable not found" }, { status: 404 });
-  }
+    const environment = await authenticatedEnvironmentForAuthentication(
+      authenticationResult,
+      parsedParams.data.projectRef,
+      parsedParams.data.slug,
+      branchNameFromRequest(request)
+    );
+
+    // Find the environment variable
+    const variable = await prisma.environmentVariable.findFirst({
+      where: {
+        key: parsedParams.data.name,
+        projectId: environment.project.id,
+      },
+    });
+
+    if (!variable) {
+      return json({ error: "Environment variable not found" }, { status: 404 });
+    }
 
-  const repository = new EnvironmentVariablesRepository();
+    const repository = new EnvironmentVariablesRepository();
 
-  const variables = await repository.getEnvironmentWithRedactedSecrets(
-    environment.project.id,
-    environment.id,
-    environment.parentEnvironmentId ?? undefined
-  );
+    const variables = await repository.getEnvironmentWithRedactedSecrets(
+      environment.project.id,
+      environment.id,
+      environment.parentEnvironmentId ?? undefined
+    );
 
-  const environmentVariable = variables.find((v) => v.key === parsedParams.data.name);
+    const environmentVariable = variables.find((v) => v.key === parsedParams.data.name);
 
-  if (!environmentVariable) {
-    return json({ error: "Environment variable not found" }, { status: 404 });
-  }
+    if (!environmentVariable) {
+      return json({ error: "Environment variable not found" }, { status: 404 });
+    }
 
-  return json({
-    name: environmentVariable.key,
-    value: environmentVariable.value,
-    isSecret: environmentVariable.isSecret,
-  });
+    return json({
+      name: environmentVariable.key,
+      value: environmentVariable.value,
+      isSecret: environmentVariable.isSecret,
+    });
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to get environment variable", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.projects.ts b/apps/webapp/app/routes/api.v1.projects.ts
index 3a12417dce0..372a8108f41 100644
--- a/apps/webapp/app/routes/api.v1.projects.ts
+++ b/apps/webapp/app/routes/api.v1.projects.ts
@@ -8,47 +8,53 @@ import { authenticateApiRequestWithPersonalAccessToken } from "~/services/person
 export async function loader({ request }: LoaderFunctionArgs) {
   logger.info("get projects", { url: request.url });
 
-  const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
+  try {
+    const authenticationResult = await authenticateApiRequestWithPersonalAccessToken(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing Access Token" }, { status: 401 });
+    }
 
-  const projects = await prisma.project.findMany({
-    where: {
-      organization: {
-        deletedAt: null,
-        members: {
-          some: {
-            userId: authenticationResult.userId,
+    const projects = await prisma.project.findMany({
+      where: {
+        organization: {
+          deletedAt: null,
+          members: {
+            some: {
+              userId: authenticationResult.userId,
+            },
           },
         },
+        version: "V3",
+        deletedAt: null,
+      },
+      include: {
+        organization: true,
       },
-      version: "V3",
-      deletedAt: null,
-    },
-    include: {
-      organization: true,
-    },
-  });
+    });
 
-  if (!projects) {
-    return json({ error: "Projects not found" }, { status: 404 });
-  }
+    if (!projects) {
+      return json({ error: "Projects not found" }, { status: 404 });
+    }
 
-  const result: GetProjectsResponseBody = projects.map((project) => ({
-    id: project.id,
-    externalRef: project.externalRef,
-    name: project.name,
-    slug: project.slug,
-    createdAt: project.createdAt,
-    organization: {
-      id: project.organization.id,
-      title: project.organization.title,
-      slug: project.organization.slug,
-      createdAt: project.organization.createdAt,
-    },
-  }));
+    const result: GetProjectsResponseBody = projects.map((project) => ({
+      id: project.id,
+      externalRef: project.externalRef,
+      name: project.name,
+      slug: project.slug,
+      createdAt: project.createdAt,
+      organization: {
+        id: project.organization.id,
+        title: project.organization.title,
+        slug: project.organization.slug,
+        createdAt: project.organization.createdAt,
+      },
+    }));
 
-  return json(result);
+    return json(result);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to list projects", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/api.v1.sessions.$sessionId.snapshot-url.ts b/apps/webapp/app/routes/api.v1.sessions.$sessionId.snapshot-url.ts
new file mode 100644
index 00000000000..537845d8b41
--- /dev/null
+++ b/apps/webapp/app/routes/api.v1.sessions.$sessionId.snapshot-url.ts
@@ -0,0 +1,69 @@
+import { json } from "@remix-run/server-runtime";
+import { z } from "zod";
+import { $replica } from "~/db.server";
+import { chatSnapshotStoragePathForSession } from "~/services/realtime/chatSnapshot.server";
+import { resolveSessionByIdOrExternalId } from "~/services/realtime/sessions.server";
+import {
+  createActionApiRoute,
+  createLoaderApiRoute,
+} from "~/services/routeBuilders/apiBuilder.server";
+import { generatePresignedUrl } from "~/v3/objectStore.server";
+
+const ParamsSchema = z.object({
+  sessionId: z.string(),
+});
+
+// `chatSnapshotStoragePath` is stamped on every new Session at row creation
+// (see api.v1.sessions.ts). The fallback handles sessions created before
+// the column existed — read against the currently-configured default
+// protocol and compute the same path the SDK uploaded under.
+function snapshotKey(session: { friendlyId: string; chatSnapshotStoragePath: string | null }) {
+  return session.chatSnapshotStoragePath ?? chatSnapshotStoragePathForSession(session.friendlyId);
+}
+
+const routeConfig = {
+  params: ParamsSchema,
+  allowJWT: true,
+  corsStrategy: "all" as const,
+  findResource: async (params: z.infer<typeof ParamsSchema>, auth: { environment: { id: string } }) =>
+    resolveSessionByIdOrExternalId($replica, auth.environment.id, params.sessionId),
+};
+
+export const { action } = createActionApiRoute(
+  { ...routeConfig, method: "PUT" },
+  async ({ authentication, resource: session }) => {
+    if (!session) {
+      return json({ error: "Session not found" }, { status: 404 });
+    }
+
+    const signed = await generatePresignedUrl(
+      authentication.environment.project.externalRef,
+      authentication.environment.slug,
+      snapshotKey(session),
+      "PUT"
+    );
+    if (!signed.success) {
+      return json({ error: `Failed to generate presigned URL: ${signed.error}` }, { status: 500 });
+    }
+
+    return json({ presignedUrl: signed.url });
+  }
+);
+
+export const loader = createLoaderApiRoute(routeConfig, async ({ authentication, resource: session }) => {
+  if (!session) {
+    return json({ error: "Session not found" }, { status: 404 });
+  }
+
+  const signed = await generatePresignedUrl(
+    authentication.environment.project.externalRef,
+    authentication.environment.slug,
+    snapshotKey(session),
+    "GET"
+  );
+  if (!signed.success) {
+    return json({ error: `Failed to generate presigned URL: ${signed.error}` }, { status: 500 });
+  }
+
+  return json({ presignedUrl: signed.url });
+});
diff --git a/apps/webapp/app/routes/api.v1.sessions.ts b/apps/webapp/app/routes/api.v1.sessions.ts
index 591a9fe5319..9b67c714127 100644
--- a/apps/webapp/app/routes/api.v1.sessions.ts
+++ b/apps/webapp/app/routes/api.v1.sessions.ts
@@ -17,6 +17,7 @@ import {
   ensureRunForSession,
   type SessionTriggerConfig,
 } from "~/services/realtime/sessionRunManager.server";
+import { chatSnapshotStoragePathForSession } from "~/services/realtime/chatSnapshot.server";
 import { serializeSession } from "~/services/realtime/sessions.server";
 import { SessionsRepository } from "~/services/sessionsRepository/sessionsRepository.server";
 import {
@@ -181,6 +182,7 @@ const { action } = createActionApiRoute(
             environmentType: authentication.environment.type,
             organizationId: authentication.environment.organizationId,
             streamBasinName: authentication.environment.organization.streamBasinName,
+            chatSnapshotStoragePath: chatSnapshotStoragePathForSession(friendlyId),
           },
           update: { triggerConfig: triggerConfigJson },
         });
@@ -201,6 +203,7 @@ const { action } = createActionApiRoute(
             environmentType: authentication.environment.type,
             organizationId: authentication.environment.organizationId,
             streamBasinName: authentication.environment.organization.streamBasinName,
+            chatSnapshotStoragePath: chatSnapshotStoragePathForSession(friendlyId),
           },
         });
       }
diff --git a/apps/webapp/app/routes/api.v1.whoami.ts b/apps/webapp/app/routes/api.v1.whoami.ts
index 0ebb70b4491..f0dea9a7a57 100644
--- a/apps/webapp/app/routes/api.v1.whoami.ts
+++ b/apps/webapp/app/routes/api.v1.whoami.ts
@@ -2,32 +2,39 @@ import type { LoaderFunctionArgs } from "@remix-run/server-runtime";
 import { json } from "@remix-run/server-runtime";
 import { prisma } from "~/db.server";
 import { authenticateApiRequest } from "~/services/apiAuth.server";
+import { logger } from "~/services/logger.server";
 
 export async function loader({ request }: LoaderFunctionArgs) {
-  // Next authenticate the request
-  const authenticationResult = await authenticateApiRequest(request);
+  try {
+    // Next authenticate the request
+    const authenticationResult = await authenticateApiRequest(request);
 
-  if (!authenticationResult) {
-    return json({ error: "Invalid or Missing API key" }, { status: 401 });
-  }
+    if (!authenticationResult) {
+      return json({ error: "Invalid or Missing API key" }, { status: 401 });
+    }
 
-  const environmentWithUser = await prisma.runtimeEnvironment.findUnique({
-    select: {
-      orgMember: {
-        select: {
-          userId: true,
+    const environmentWithUser = await prisma.runtimeEnvironment.findUnique({
+      select: {
+        orgMember: {
+          select: {
+            userId: true,
+          },
         },
       },
-    },
-    where: {
-      id: authenticationResult.environment.id,
-    },
-  });
+      where: {
+        id: authenticationResult.environment.id,
+      },
+    });
 
-  const result = {
-    ...authenticationResult.environment,
-    userId: environmentWithUser?.orgMember?.userId,
-  };
+    const result = {
+      ...authenticationResult.environment,
+      userId: environmentWithUser?.orgMember?.userId,
+    };
 
-  return json(result);
+    return json(result);
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load whoami", { error });
+    return json({ error: "Internal Server Error" }, { status: 500 });
+  }
 }
diff --git a/apps/webapp/app/routes/login.magic/route.tsx b/apps/webapp/app/routes/login.magic/route.tsx
index 3ddbd47a4d0..06523b3d8c5 100644
--- a/apps/webapp/app/routes/login.magic/route.tsx
+++ b/apps/webapp/app/routes/login.magic/route.tsx
@@ -101,17 +101,32 @@ export async function action({ request }: ActionFunctionArgs) {
 
   const payload = Object.fromEntries(await clonedRequest.formData());
 
-  const data = z
+  const result = z
     .discriminatedUnion("action", [
       z.object({
         action: z.literal("send"),
-        email: z.string().trim().toLowerCase(),
+        email: z.string().trim().toLowerCase().email(),
       }),
       z.object({
         action: z.literal("reset"),
       }),
     ])
-    .parse(payload);
+    .safeParse(payload);
+
+  if (!result.success) {
+    const session = await getUserSession(request);
+    session.set("auth:error", {
+      message: "Please enter a valid email address.",
+    });
+
+    return redirect("/login/magic", {
+      headers: {
+        "Set-Cookie": await commitSession(session),
+      },
+    });
+  }
+
+  const data = result.data;
 
   switch (data.action) {
     case "send": {
diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.vercel.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.vercel.tsx
index a37d85b0a56..fdc6dfd8242 100644
--- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.vercel.tsx
+++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.vercel.tsx
@@ -61,7 +61,7 @@ import {
   getAvailableEnvSlugsForBuildSettings,
 } from "~/v3/vercel/vercelProjectIntegrationSchema";
 import { Result, fromPromise } from "neverthrow";
-import { useEffect, useState } from "react";
+import { useEffect, useRef, useState } from "react";
 
 export type ConnectedVercelProject = {
   id: string;
@@ -92,6 +92,12 @@ function parseVercelStagingEnvironment(
   );
 }
 
+// Sentinel values for the clearTriggerVersion hidden input. Used by the schema transform,
+// the input's defaultValue, and the modal's submit helper — keep all three reading the same
+// constants so they cannot drift.
+const CLEAR_TRIGGER_VERSION_YES = "true";
+const CLEAR_TRIGGER_VERSION_NO = "false";
+
 const UpdateVercelConfigFormSchema = z.object({
   action: z.literal("update-config"),
   atomicBuilds: envSlugArrayField,
@@ -99,6 +105,10 @@ const UpdateVercelConfigFormSchema = z.object({
   discoverEnvVars: envSlugArrayField,
   vercelStagingEnvironment: z.string().nullable().optional(),
   autoPromote: z.string().optional().transform((val) => val !== "false"),
+  clearTriggerVersion: z
+    .string()
+    .optional()
+    .transform((val) => val === CLEAR_TRIGGER_VERSION_YES),
 });
 
 const DisconnectVercelFormSchema = z.object({
@@ -243,6 +253,7 @@ export async function action({ request, params }: ActionFunctionArgs) {
         discoverEnvVars,
         vercelStagingEnvironment,
         autoPromote,
+        clearTriggerVersion,
       } = submission.value;
 
       const parsedStagingEnv = parseVercelStagingEnvironment(vercelStagingEnvironment);
@@ -271,6 +282,21 @@ export async function action({ request, params }: ActionFunctionArgs) {
           );
         }
 
+        // When atomic deployments are being disabled and the user confirmed clearing the pin,
+        // remove TRIGGER_VERSION from Vercel production so future deploys don't stay pinned.
+        // If the Vercel API call fails we still consider the settings save itself successful,
+        // but tell the user so they can clear the env var manually from the Vercel dashboard.
+        if (clearTriggerVersion && !atomicBuilds?.includes("prod")) {
+          const cleared = await vercelService.clearTriggerVersionFromVercelProduction(project.id);
+          if (!cleared) {
+            return redirectWithErrorMessage(
+              settingsPath,
+              request,
+              "Vercel settings saved, but failed to clear TRIGGER_VERSION on Vercel — please remove it manually from your Vercel project settings."
+            );
+          }
+        }
+
         return redirectWithSuccessMessage(settingsPath, request, "Vercel settings updated successfully");
       }
 
@@ -573,6 +599,8 @@ function ConnectedVercelProjectForm({
   hasPreviewEnvironment,
   customEnvironments,
   autoAssignCustomDomains,
+  currentTriggerVersion,
+  currentTriggerVersionFetchFailed,
   organizationSlug,
   projectSlug,
   environmentSlug,
@@ -582,6 +610,8 @@ function ConnectedVercelProjectForm({
   hasPreviewEnvironment: boolean;
   customEnvironments: Array<{ id: string; slug: string }>;
   autoAssignCustomDomains: boolean | null;
+  currentTriggerVersion: string | null;
+  currentTriggerVersionFetchFailed: boolean;
   organizationSlug: string;
   projectSlug: string;
   environmentSlug: string;
@@ -645,6 +675,34 @@ function ConnectedVercelProjectForm({
     },
   });
 
+  const saveButtonRef = useRef<HTMLButtonElement>(null);
+  const clearTriggerVersionInputRef = useRef<HTMLInputElement>(null);
+  const [showClearDialog, setShowClearDialog] = useState(false);
+
+  // Modal trigger uses the page-load state of atomicBuilds, not whatever changed in-session,
+  // because clearing TRIGGER_VERSION only makes sense when atomic was actually on at load time.
+  // If the Vercel lookup failed we still prompt — we don't know whether a pin exists, so the
+  // user needs to make the call explicitly rather than silently leaving prod pinned.
+  const wasAtomicEnabledAtLoad = originalAtomicBuilds.includes("prod");
+  const isAtomicNowDisabled = !configValues.atomicBuilds.includes("prod");
+  const shouldPromptClearOnSave =
+    wasAtomicEnabledAtLoad &&
+    isAtomicNowDisabled &&
+    (Boolean(currentTriggerVersion) || currentTriggerVersionFetchFailed);
+
+  const submitWithClearChoice = (clear: boolean) => {
+    if (clearTriggerVersionInputRef.current) {
+      clearTriggerVersionInputRef.current.value = clear
+        ? CLEAR_TRIGGER_VERSION_YES
+        : CLEAR_TRIGGER_VERSION_NO;
+    }
+    setShowClearDialog(false);
+    // Conform owns the form's React ref via {...configForm.props}, so look it up by id
+    // (set via useForm({ id: "update-vercel-config" })) rather than fighting for the ref.
+    const form = document.getElementById("update-vercel-config") as HTMLFormElement | null;
+    form?.requestSubmit(saveButtonRef.current ?? undefined);
+  };
+
   const isConfigLoading =
     navigation.formData?.get("action") === "update-config" &&
     (navigation.state === "submitting" || navigation.state === "loading");
@@ -742,6 +800,13 @@ function ConnectedVercelProjectForm({
           name="autoPromote"
           value={String(configValues.autoPromote)}
         />
+        {/* Flipped to CLEAR_TRIGGER_VERSION_YES by the clear-pinned-version modal on submit. */}
+        <input
+          type="hidden"
+          name="clearTriggerVersion"
+          defaultValue={CLEAR_TRIGGER_VERSION_NO}
+          ref={clearTriggerVersionInputRef}
+        />
 
         <Fieldset>
           <InputGroup fullWidth>
@@ -819,6 +884,8 @@ function ConnectedVercelProjectForm({
                 onAutoPromoteChange={(value) =>
                   setConfigValues((prev) => ({ ...prev, autoPromote: value }))
                 }
+                currentTriggerVersion={currentTriggerVersion}
+                currentTriggerVersionFetchFailed={currentTriggerVersionFetchFailed}
                 hideSectionToggles
               />
 
@@ -862,12 +929,19 @@ function ConnectedVercelProjectForm({
           <FormButtons
             confirmButton={
               <Button
+                ref={saveButtonRef}
                 type="submit"
                 name="action"
                 value="update-config"
                 variant="secondary/small"
                 disabled={isConfigLoading || !hasConfigChanges}
                 LeadingIcon={isConfigLoading ? SpinnerWhite : undefined}
+                onClick={(event) => {
+                  if (shouldPromptClearOnSave) {
+                    event.preventDefault();
+                    setShowClearDialog(true);
+                  }
+                }}
               >
                 Save
               </Button>
@@ -875,6 +949,58 @@ function ConnectedVercelProjectForm({
           />
         </Fieldset>
       </Form>
+
+      <Dialog open={showClearDialog} onOpenChange={setShowClearDialog}>
+        <DialogContent className="max-w-md">
+          <DialogHeader>Clear TRIGGER_VERSION from Vercel?</DialogHeader>
+          <div className="flex flex-col gap-3 pt-3">
+            {currentTriggerVersion ? (
+              <Paragraph className="mb-1">
+                Atomic deployments are being turned off. The{" "}
+                <span className="font-mono text-text-bright">TRIGGER_VERSION</span> env var on
+                your Vercel production environment is currently set to{" "}
+                <span className="font-mono text-text-bright">{currentTriggerVersion}</span>.
+              </Paragraph>
+            ) : (
+              <Paragraph className="mb-1">
+                Atomic deployments are being turned off. We couldn't reach Vercel to confirm
+                whether{" "}
+                <span className="font-mono text-text-bright">TRIGGER_VERSION</span> is currently
+                set on your Vercel production environment, so please verify in the Vercel
+                dashboard.
+              </Paragraph>
+            )}
+            <Paragraph className="mb-1">
+              If you leave it, your Vercel project will stay pinned to this version. Since atomic
+              deployments will be off, Trigger.dev will no longer update this variable, and future
+              Vercel deploys will continue using this pinned version. We recommend clearing it.
+            </Paragraph>
+            <FormButtons
+              confirmButton={
+                <div className="flex gap-2">
+                  <Button
+                    variant="secondary/medium"
+                    onClick={() => submitWithClearChoice(false)}
+                  >
+                    Keep pinned
+                  </Button>
+                  <Button
+                    variant="primary/medium"
+                    onClick={() => submitWithClearChoice(true)}
+                  >
+                    Clear and disable
+                  </Button>
+                </div>
+              }
+              cancelButton={
+                <DialogClose asChild>
+                  <Button variant="tertiary/medium">Cancel</Button>
+                </DialogClose>
+              }
+            />
+          </div>
+        </DialogContent>
+      </Dialog>
     </>
   );
 }
@@ -948,6 +1074,8 @@ function VercelSettingsPanel({
           hasPreviewEnvironment={data.hasPreviewEnvironment}
           customEnvironments={data.customEnvironments}
           autoAssignCustomDomains={data.autoAssignCustomDomains ?? null}
+          currentTriggerVersion={data.currentTriggerVersion ?? null}
+          currentTriggerVersionFetchFailed={data.currentTriggerVersionFetchFailed ?? false}
           organizationSlug={organizationSlug}
           projectSlug={projectSlug}
           environmentSlug={environmentSlug}
diff --git a/apps/webapp/app/routes/resources.platform-changelogs.tsx b/apps/webapp/app/routes/resources.platform-changelogs.tsx
index 8eeabd6b047..ed62de3c1df 100644
--- a/apps/webapp/app/routes/resources.platform-changelogs.tsx
+++ b/apps/webapp/app/routes/resources.platform-changelogs.tsx
@@ -2,6 +2,7 @@ import { json } from "@remix-run/node";
 import type { LoaderFunctionArgs } from "@remix-run/node";
 import { useFetcher, type ShouldRevalidateFunction } from "@remix-run/react";
 import { useEffect, useRef } from "react";
+import { logger } from "~/services/logger.server";
 import { requireUserId } from "~/services/session.server";
 import { getRecentChangelogs, verifyOrgMembership } from "~/services/platformNotifications.server";
 
@@ -12,20 +13,29 @@ export type PlatformChangelogsLoaderData = {
 };
 
 export async function loader({ request }: LoaderFunctionArgs) {
-  const userId = await requireUserId(request);
-  const url = new URL(request.url);
-  const rawOrganizationId = url.searchParams.get("organizationId") ?? undefined;
-  const rawProjectId = url.searchParams.get("projectId") ?? undefined;
+  try {
+    const userId = await requireUserId(request);
+    const url = new URL(request.url);
+    const rawOrganizationId = url.searchParams.get("organizationId") ?? undefined;
+    const rawProjectId = url.searchParams.get("projectId") ?? undefined;
 
-  const { organizationId, projectId } = await verifyOrgMembership({
-    userId,
-    organizationId: rawOrganizationId,
-    projectId: rawProjectId,
-  });
+    const { organizationId, projectId } = await verifyOrgMembership({
+      userId,
+      organizationId: rawOrganizationId,
+      projectId: rawProjectId,
+    });
 
-  const changelogs = await getRecentChangelogs({ userId, organizationId, projectId });
+    const changelogs = await getRecentChangelogs({ userId, organizationId, projectId });
 
-  return json<PlatformChangelogsLoaderData>({ changelogs });
+    return json<PlatformChangelogsLoaderData>({ changelogs });
+  } catch (error) {
+    if (error instanceof Response) throw error;
+    logger.error("Failed to load platform changelogs", { error });
+    // Polling widget — degrade silently so a transient DB blip doesn't paint
+    // the dashboard with errors every 60s. Empty payload keeps the consumer's
+    // fetcher.data shape stable; the fault is recorded server-side.
+    return json<PlatformChangelogsLoaderData>({ changelogs: [] });
+  }
 }
 
 const POLL_INTERVAL_MS = 60_000;
diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts
index bbfdc3956c2..2d9eeec0943 100644
--- a/apps/webapp/app/runEngine/services/triggerTask.server.ts
+++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts
@@ -40,6 +40,18 @@ import type {
   TriggerTaskRequest,
   TriggerTaskValidator,
 } from "../types";
+import { env } from "~/env.server";
+import {
+  evaluateGate as defaultEvaluateGate,
+  type GateOutcome,
+  type MollifierEvaluateGate,
+} from "~/v3/mollifier/mollifierGate.server";
+import {
+  getMollifierBuffer as defaultGetMollifierBuffer,
+  type MollifierGetBuffer,
+} from "~/v3/mollifier/mollifierBuffer.server";
+import { buildBufferedTriggerPayload } from "~/v3/mollifier/bufferedTriggerPayload.server";
+import { serialiseSnapshot } from "@trigger.dev/redis-worker";
 import { QueueSizeLimitExceededError, ServiceValidationError } from "~/v3/services/common.server";
 
 class NoopTriggerRacepointSystem implements TriggerRacepointSystem {
@@ -59,6 +71,14 @@ export class RunEngineTriggerTaskService {
   private readonly traceEventConcern: TraceEventConcern;
   private readonly triggerRacepointSystem: TriggerRacepointSystem;
   private readonly metadataMaximumSize: number;
+  // Mollifier hooks are DI'd so tests can drive the call-site's mollify branch
+  // deterministically (stub the gate to return mollify, inject a real or fake
+  // buffer, force the global-enabled predicate to true so the call site
+  // doesn't short-circuit on an unset env). In production all three default
+  // to the live module-level singletons + env read.
+  private readonly evaluateGate: MollifierEvaluateGate;
+  private readonly getMollifierBuffer: MollifierGetBuffer;
+  private readonly isMollifierGloballyEnabled: () => boolean;
 
   constructor(opts: {
     prisma: PrismaClientOrTransaction;
@@ -71,6 +91,9 @@ export class RunEngineTriggerTaskService {
     tracer: Tracer;
     metadataMaximumSize: number;
     triggerRacepointSystem?: TriggerRacepointSystem;
+    evaluateGate?: MollifierEvaluateGate;
+    getMollifierBuffer?: MollifierGetBuffer;
+    isMollifierGloballyEnabled?: () => boolean;
   }) {
     this.prisma = opts.prisma;
     this.engine = opts.engine;
@@ -82,6 +105,10 @@ export class RunEngineTriggerTaskService {
     this.traceEventConcern = opts.traceEventConcern;
     this.metadataMaximumSize = opts.metadataMaximumSize;
     this.triggerRacepointSystem = opts.triggerRacepointSystem ?? new NoopTriggerRacepointSystem();
+    this.evaluateGate = opts.evaluateGate ?? defaultEvaluateGate;
+    this.getMollifierBuffer = opts.getMollifierBuffer ?? defaultGetMollifierBuffer;
+    this.isMollifierGloballyEnabled =
+      opts.isMollifierGloballyEnabled ?? (() => env.TRIGGER_MOLLIFIER_ENABLED === "1");
   }
 
   public async call({
@@ -316,6 +343,25 @@ export class RunEngineTriggerTaskService {
         taskKind: taskKind ?? "STANDARD",
       };
 
+      // Short-circuit before the gate when mollifier is globally off (the
+      // default for every deployment that hasn't opted in). Avoids the
+      // GateInputs allocation, the deps spread inside `evaluateGate`, and
+      // the `mollifier.decisions{outcome=pass_through}` OTel increment on
+      // every trigger — `triggerTask` is the highest-throughput code path
+      // in the system. The check goes through a DI'd predicate so unit
+      // tests that inject a custom `evaluateGate` can also override the
+      // gate-on check (the default reads `env.TRIGGER_MOLLIFIER_ENABLED`,
+      // which is "0" in CI where no .env file is present).
+      const mollifierOutcome: GateOutcome | null = this.isMollifierGloballyEnabled()
+        ? await this.evaluateGate({
+            envId: environment.id,
+            orgId: environment.organizationId,
+            taskId,
+            orgFeatureFlags:
+              (environment.organization.featureFlags as Record<string, unknown> | null) ?? null,
+          })
+        : null;
+
       try {
         return await this.traceEventConcern.traceRun(
           triggerRequest,
@@ -328,6 +374,74 @@ export class RunEngineTriggerTaskService {
 
             const payloadPacket = await this.payloadProcessor.process(triggerRequest);
 
+            // Phase 1 dual-write: if the org has the mollifier feature flag
+            // enabled and the per-env trip evaluator says divert, write the
+            // canonical replay payload to the buffer AND continue through
+            // engine.trigger as normal. The buffer entry is an audit/preview
+            // copy; the drainer's no-op handler consumes it to prove the
+            // dequeue mechanism works. Phase 2 will replace engine.trigger
+            // (below) with a synthesised 200 response and rely on the
+            // drainer to perform the Postgres write via replay.
+            if (mollifierOutcome?.action === "mollify") {
+              const buffer = this.getMollifierBuffer();
+              if (buffer) {
+                const canonicalPayload = buildBufferedTriggerPayload({
+                  runFriendlyId,
+                  taskId,
+                  envId: environment.id,
+                  envType: environment.type,
+                  envSlug: environment.slug,
+                  orgId: environment.organizationId,
+                  orgSlug: environment.organization.slug,
+                  projectId: environment.projectId,
+                  projectRef: environment.project.externalRef,
+                  body,
+                  idempotencyKey: idempotencyKey ?? null,
+                  idempotencyKeyExpiresAt: idempotencyKey
+                    ? idempotencyKeyExpiresAt ?? null
+                    : null,
+                  tags,
+                  parentRunFriendlyId: parentRun?.friendlyId ?? null,
+                  traceContext: event.traceContext,
+                  triggerSource,
+                  triggerAction,
+                  serviceOptions: options,
+                  createdAt: new Date(),
+                });
+
+                try {
+                  const serialisedPayload = serialiseSnapshot(canonicalPayload);
+                  await buffer.accept({
+                    runId: runFriendlyId,
+                    envId: environment.id,
+                    orgId: environment.organizationId,
+                    payload: serialisedPayload,
+                  });
+                  // Light log on the hot path — keep this synchronous work
+                  // O(1) per trigger. The drainer computes the payload hash
+                  // off-path; operators correlate `mollifier.buffered` →
+                  // `mollifier.drained` by runId.
+                  logger.debug("mollifier.buffered", {
+                    runId: runFriendlyId,
+                    envId: environment.id,
+                    orgId: environment.organizationId,
+                    taskId,
+                    payloadBytes: serialisedPayload.length,
+                  });
+                } catch (err) {
+                  // Fail-open: buffer write must never block the customer's
+                  // trigger. engine.trigger below is the primary write path
+                  // in Phase 1 — the customer still gets a valid run.
+                  logger.error("mollifier.buffer_accept_failed", {
+                    runId: runFriendlyId,
+                    envId: environment.id,
+                    taskId,
+                    err: err instanceof Error ? err.message : String(err),
+                  });
+                }
+              }
+            }
+
             const taskRun = await this.engine.trigger(
               {
                 friendlyId: runFriendlyId,
diff --git a/apps/webapp/app/services/apiRateLimit.server.ts b/apps/webapp/app/services/apiRateLimit.server.ts
index 3618806fce7..4621146bd66 100644
--- a/apps/webapp/app/services/apiRateLimit.server.ts
+++ b/apps/webapp/app/services/apiRateLimit.server.ts
@@ -70,6 +70,7 @@ export const apiRateLimiter = authorizationRateLimitMiddleware({
     // customer-facing surface so customer rate limits shouldn't apply.
     /^\/api\/v1\/packets\//,
     /^\/api\/v2\/packets\//,
+    /^\/api\/v1\/sessions\/[^\/]+\/snapshot-url$/,
   ],
   log: {
     rejections: env.API_RATE_LIMIT_REJECTION_LOGS_ENABLED === "1",
diff --git a/apps/webapp/app/services/realtime/chatSnapshot.server.ts b/apps/webapp/app/services/realtime/chatSnapshot.server.ts
new file mode 100644
index 00000000000..83db0d94197
--- /dev/null
+++ b/apps/webapp/app/services/realtime/chatSnapshot.server.ts
@@ -0,0 +1,13 @@
+import { env } from "~/env.server";
+
+/**
+ * Canonical storage URI for a session's chat.agent snapshot. Stamped on
+ * `Session.chatSnapshotStoragePath` at row creation so PUT/GET presigns
+ * resolve to the same store even if `OBJECT_STORE_DEFAULT_PROTOCOL`
+ * changes later.
+ */
+export function chatSnapshotStoragePathForSession(friendlyId: string): string {
+  const path = `sessions/${friendlyId}/snapshot.json`;
+  const protocol = env.OBJECT_STORE_DEFAULT_PROTOCOL;
+  return protocol ? `${protocol}://${path}` : path;
+}
diff --git a/apps/webapp/app/services/realtime/s2realtimeStreams.server.ts b/apps/webapp/app/services/realtime/s2realtimeStreams.server.ts
index 0553ef77f9b..07061071446 100644
--- a/apps/webapp/app/services/realtime/s2realtimeStreams.server.ts
+++ b/apps/webapp/app/services/realtime/s2realtimeStreams.server.ts
@@ -33,6 +33,16 @@ export type S2RealtimeStreamsOptions = {
   }>;
 };
 
+// Ops the issued S2 access token is scoped to. `trim` is a distinct op
+// from `append` even though trim records are appended like any other —
+// without it, `AppendRecord.trim()` 403s with "Operation not permitted".
+// `chat.agent`'s per-turn trim chain depends on it.
+//
+// The fingerprint folds the ops list into the cache key, so any future
+// scope change auto-invalidates pre-deploy cached tokens.
+const S2_TOKEN_OPS = ["append", "create-stream", "trim"] as const;
+const S2_TOKEN_OPS_FINGERPRINT = [...S2_TOKEN_OPS].sort().join(",");
+
 type S2IssueAccessTokenResponse = { access_token: string };
 type S2AppendInput = { records: { body: string }[] };
 type S2AppendAck = {
@@ -564,8 +574,10 @@ export class S2RealtimeStreams implements StreamResponder, StreamIngestor {
     }
 
     // Cache key includes basin so per-org basins never collide on
-    // cached tokens. `${basin}:${prefix}` is unique per (org-basin, env).
-    const cacheKey = `${this.basin}:${this.streamPrefix}`;
+    // cached tokens, and the ops fingerprint so a scope change in code
+    // (e.g. adding `trim` in #3644) auto-invalidates pre-deploy entries
+    // instead of returning stale tokens for up to 24h.
+    const cacheKey = `${this.basin}:${this.streamPrefix}:${S2_TOKEN_OPS_FINGERPRINT}`;
     const result = await this.cache.accessToken.swr(cacheKey, async () => {
       return this.s2IssueAccessToken(id);
     });
@@ -591,12 +603,7 @@ export class S2RealtimeStreams implements StreamResponder, StreamIngestor {
           basins: {
             exact: this.basin,
           },
-          // S2 treats `trim` as a separate op from `append` even though
-          // trim records are appended like any other record. Verified
-          // empirically: without `"trim"` here, `AppendRecord.trim()`
-          // writes 403 with "Operation not permitted". `chat.agent`'s
-          // per-turn trim chain depends on this.
-          ops: ["append", "create-stream", "trim"],
+          ops: [...S2_TOKEN_OPS],
           streams: {
             prefix: this.streamPrefix,
           },
diff --git a/apps/webapp/app/services/runsReplicationService.server.ts b/apps/webapp/app/services/runsReplicationService.server.ts
index 167564572eb..6b2437cf3db 100644
--- a/apps/webapp/app/services/runsReplicationService.server.ts
+++ b/apps/webapp/app/services/runsReplicationService.server.ts
@@ -875,8 +875,8 @@ export class RunsReplicationService {
       organizationId, // organization_id
       run.projectId, // project_id
       run.id, // run_id
-      run.updatedAt.getTime(), // updated_at
-      run.createdAt.getTime(), // created_at
+      run.updatedAt?.getTime() ?? Date.now(), // updated_at
+      run.createdAt?.getTime() ?? Date.now(), // created_at
       run.status, // status
       environmentType, // environment_type
       run.friendlyId, // friendly_id
@@ -936,7 +936,7 @@ export class RunsReplicationService {
     // Return array matching PAYLOAD_COLUMNS order
     return [
       run.id, // run_id
-      run.createdAt.getTime(), // created_at
+      run.createdAt?.getTime() ?? Date.now(), // created_at
       payload, // payload
     ];
   }
diff --git a/apps/webapp/app/services/sessionsReplicationService.server.ts b/apps/webapp/app/services/sessionsReplicationService.server.ts
index f7f384faffc..1dde83c01b9 100644
--- a/apps/webapp/app/services/sessionsReplicationService.server.ts
+++ b/apps/webapp/app/services/sessionsReplicationService.server.ts
@@ -750,8 +750,8 @@ function toSessionInsertArray(
     session.closedAt ? session.closedAt.getTime() : null,
     session.closedReason ?? "",
     session.expiresAt ? session.expiresAt.getTime() : null,
-    session.createdAt.getTime(),
-    session.updatedAt.getTime(),
+    session.createdAt?.getTime() ?? Date.now(),
+    session.updatedAt?.getTime() ?? Date.now(),
     version.toString(),
     isDeleted ? 1 : 0,
   ];
diff --git a/apps/webapp/app/services/vercelIntegration.server.ts b/apps/webapp/app/services/vercelIntegration.server.ts
index cf9e634afbb..286e9974054 100644
--- a/apps/webapp/app/services/vercelIntegration.server.ts
+++ b/apps/webapp/app/services/vercelIntegration.server.ts
@@ -409,7 +409,7 @@ export class VercelIntegrationService {
         key: "TRIGGER_SECRET_KEY",
         value: stagingEnv.apiKey,
         customEnvironmentId: newCustomEnvironmentId,
-        type: "encrypted",
+        type: "sensitive",
       });
 
       if (upsertResult.isErr()) {
@@ -714,6 +714,102 @@ export class VercelIntegrationService {
     });
   }
 
+  /**
+   * Returns true when TRIGGER_VERSION is no longer pinned on Vercel production after the call
+   * (either we cleared it or it wasn't set to begin with). Returns false when we failed to
+   * verify or perform the delete — callers should surface that to the user so they can clear
+   * it manually.
+   */
+  async clearTriggerVersionFromVercelProduction(projectId: string): Promise<boolean> {
+    const orgIntegration =
+      await VercelIntegrationRepository.findVercelOrgIntegrationForProject(projectId);
+    if (!orgIntegration) {
+      return false;
+    }
+
+    const clientResult = await VercelIntegrationRepository.getVercelClient(orgIntegration);
+    if (clientResult.isErr()) {
+      logger.error("Failed to get Vercel client for TRIGGER_VERSION clear", {
+        projectId,
+        error: clientResult.error.message,
+      });
+      return false;
+    }
+    const client = clientResult.value;
+    const teamId = await VercelIntegrationRepository.getTeamIdFromIntegration(orgIntegration);
+
+    const projectIntegration = await this.#prismaClient.organizationProjectIntegration.findFirst({
+      where: {
+        projectId,
+        organizationIntegrationId: orgIntegration.id,
+        deletedAt: null,
+      },
+      select: {
+        externalEntityId: true,
+      },
+    });
+
+    if (!projectIntegration) {
+      return false;
+    }
+
+    const vercelProjectId = projectIntegration.externalEntityId;
+
+    const envVarsResult = await VercelIntegrationRepository.getVercelEnvironmentVariables(
+      client,
+      vercelProjectId,
+      teamId
+    );
+
+    if (envVarsResult.isErr()) {
+      logger.warn("Failed to fetch Vercel env vars for TRIGGER_VERSION clear", {
+        projectId,
+        vercelProjectId,
+        error: envVarsResult.error.message,
+      });
+      return false;
+    }
+
+    const existingTriggerVersion = envVarsResult.value.find(
+      (env) => env.key === "TRIGGER_VERSION" && env.target.includes("production")
+    );
+
+    if (!existingTriggerVersion) {
+      logger.info("TRIGGER_VERSION not present on Vercel production — nothing to clear", {
+        projectId,
+        vercelProjectId,
+      });
+      return true;
+    }
+
+    const removeResult = await ResultAsync.fromPromise(
+      client.projects.batchRemoveProjectEnv({
+        idOrName: vercelProjectId,
+        ...(teamId && { teamId }),
+        requestBody: { ids: [existingTriggerVersion.id] },
+      }),
+      (error) => error
+    );
+
+    if (removeResult.isErr()) {
+      logger.error("Failed to clear TRIGGER_VERSION from Vercel production", {
+        projectId,
+        vercelProjectId,
+        error:
+          removeResult.error instanceof Error
+            ? removeResult.error.message
+            : String(removeResult.error),
+      });
+      return false;
+    }
+
+    logger.info("Cleared TRIGGER_VERSION from Vercel production", {
+      projectId,
+      vercelProjectId,
+    });
+    return true;
+  }
+
   async disconnectVercelProject(projectId: string): Promise<boolean> {
     const existing = await this.getVercelProjectIntegration(projectId);
     if (!existing) {
diff --git a/apps/webapp/app/services/worker.server.ts b/apps/webapp/app/services/worker.server.ts
index 902d752ed0a..7de2c7cb2e7 100644
--- a/apps/webapp/app/services/worker.server.ts
+++ b/apps/webapp/app/services/worker.server.ts
@@ -1,3 +1,24 @@
+/**
+ * ⚠️ LEGACY — Graphile-worker / ZodWorker setup. Do not touch.
+ *
+ * This file wires the original background-job system the webapp was
+ * built on (`@internal/zod-worker` → graphile-worker → Postgres). It is
+ * now in deprecation mode: every task in `workerCatalog` below is
+ * annotated with `@deprecated, moved to <new home>` and the live jobs
+ * for new features all run on `@trigger.dev/redis-worker` instead.
+ *
+ * Where to put new things:
+ *   - Background jobs / queues → use redis-worker, alongside
+ *     `~/v3/commonWorker.server.ts`, `~/v3/alertsWorker.server.ts`, or
+ *     `~/v3/batchTriggerWorker.server.ts`.
+ *   - Run lifecycle → `@internal/run-engine` via `~/v3/runEngine.server`.
+ *   - Custom polling loops with their own Redis connection → keep them
+ *     in their own lifecycle module (e.g. `~/v3/mollifierDrainerWorker.server.ts`)
+ *     and wire the bootstrap from `entry.server.tsx`. Don't reach into
+ *     `init()` below.
+ *
+ * Edit only when removing legacy paths.
+ */
 import { ZodWorker } from "@internal/zod-worker";
 import { DeliverEmailSchema } from "emails";
 import { z } from "zod";
diff --git a/apps/webapp/app/utils/detectBadJsonStrings.ts b/apps/webapp/app/utils/detectBadJsonStrings.ts
index 4a000b54293..99ffec4b9b0 100644
--- a/apps/webapp/app/utils/detectBadJsonStrings.ts
+++ b/apps/webapp/app/utils/detectBadJsonStrings.ts
@@ -1,3 +1,17 @@
+/**
+ * Detects unpaired UTF-16 surrogate escape sequences in JSON-encoded text.
+ *
+ * Returns true if the input contains a `\uD8XX`/`\uD9XX`/`\uDAXX`/`\uDBXX`
+ * high-surrogate escape not immediately followed by a `\uDC..`–`\uDF..` low
+ * surrogate, or a `\uDC..`–`\uDF..` low surrogate not immediately preceded by
+ * a high surrogate. Strict JSON parsers (e.g. ClickHouse `JSONEachRow`)
+ * reject input containing such sequences.
+ *
+ * Surrogate hex ranges (case-insensitive — inputs from `JSON.stringify` are
+ * lowercase):
+ *   - High surrogate (U+D800–U+DBFF):  `\uD[8-B][0-9A-F][0-9A-F]`
+ *   - Low surrogate  (U+DC00–U+DFFF):  `\uD[C-F][0-9A-F][0-9A-F]`
+ */
 export function detectBadJsonStrings(jsonString: string): boolean {
   // Fast path: skip everything if no \u
   let idx = jsonString.indexOf("\\u");
@@ -13,7 +27,7 @@ export function detectBadJsonStrings(jsonString: string): boolean {
     if (jsonString[idx + 1] === "u" && jsonString[idx + 2] === "d") {
       const third = jsonString[idx + 3];
 
-      // High surrogate check
+      // High surrogate check — third nibble is 8, 9, a, or b (U+D800–U+DBFF)
       if (
         /[89ab]/.test(third) &&
         /[0-9a-f]/.test(jsonString[idx + 4]) &&
@@ -28,7 +42,7 @@ export function detectBadJsonStrings(jsonString: string): boolean {
           jsonString[idx + 6] !== "\\" ||
           jsonString[idx + 7] !== "u" ||
           jsonString[idx + 8] !== "d" ||
-          !/[cd]/.test(jsonString[idx + 9]) ||
+          !/[c-f]/.test(jsonString[idx + 9]) ||
           !/[0-9a-f]/.test(jsonString[idx + 10]) ||
           !/[0-9a-f]/.test(jsonString[idx + 11])
         ) {
@@ -36,9 +50,9 @@ export function detectBadJsonStrings(jsonString: string): boolean {
         }
       }
 
-      // Low surrogate check
+      // Low surrogate check — third nibble is c, d, e, or f (U+DC00–U+DFFF)
       if (
-        (third === "c" || third === "d") &&
+        /[c-f]/.test(third) &&
         /[0-9a-f]/.test(jsonString[idx + 4]) &&
         /[0-9a-f]/.test(jsonString[idx + 5])
       ) {
diff --git a/apps/webapp/app/utils/httpErrors.ts b/apps/webapp/app/utils/httpErrors.ts
index 5131730e3bb..2e41aa67eff 100644
--- a/apps/webapp/app/utils/httpErrors.ts
+++ b/apps/webapp/app/utils/httpErrors.ts
@@ -1,3 +1,7 @@
+export function throwNotFound(statusText: string): never {
+  throw new Response(undefined, { status: 404, statusText });
+}
+
 export function friendlyErrorDisplay(statusCode: number, statusText?: string) {
   switch (statusCode) {
     case 400:
diff --git a/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts b/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts
index f2ca46d4d3a..9cc41995664 100644
--- a/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts
+++ b/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts
@@ -2,7 +2,7 @@ import { Prisma, type PrismaClient, type RuntimeEnvironmentType } from "@trigger
 import type { AuthenticatedEnvironment } from "@trigger.dev/core/v3/auth/environment";
 import { z } from "zod";
 import { environmentFullTitle } from "~/components/environments/EnvironmentLabel";
-import { $transaction, prisma } from "~/db.server";
+import { $replica, $transaction, prisma, type PrismaReplicaClient } from "~/db.server";
 import { env } from "~/env.server";
 import { getSecretStore } from "~/services/secrets/secretStore.server";
 import { generateFriendlyId } from "../friendlyIdentifiers";
@@ -47,7 +47,10 @@ function parseSecretKey(key: string) {
 const SecretValue = z.object({ secret: z.string() });
 
 export class EnvironmentVariablesRepository implements Repository {
-  constructor(private prismaClient: PrismaClient = prisma) {}
+  constructor(
+    private prismaClient: PrismaClient = prisma,
+    private replicaClient: PrismaReplicaClient = $replica
+  ) {}
 
   async create(projectId: string, options: CreateEnvironmentVariables): Promise<CreateResult> {
     const project = await this.prismaClient.project.findFirst({
@@ -582,7 +585,7 @@ export class EnvironmentVariablesRepository implements Repository {
     const variables = await this.getEnvironment(projectId, environmentId, parentEnvironmentId);
 
     // Get the keys of all secret variables
-    const secretValues = await this.prismaClient.environmentVariableValue.findMany({
+    const secretValues = await this.replicaClient.environmentVariableValue.findMany({
       where: {
         environmentId: parentEnvironmentId
           ? { in: [environmentId, parentEnvironmentId] }
diff --git a/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts b/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts
index 27b96ed7d37..67efea6847c 100644
--- a/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts
+++ b/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts
@@ -45,6 +45,11 @@ import {
   removePrivateProperties,
   isEmptyObject,
 } from "./common.server";
+import {
+  isClickHouseJsonParseError,
+  parseRowNumberFromError,
+  sanitizeRows,
+} from "./sanitizeRowsOnParseError.server";
 import type {
   CompleteableTaskRun,
   CreateEventInput,
@@ -104,6 +109,13 @@ export class ClickhouseEventRepository implements IEventRepository {
   private readonly _llmMetricsFlushScheduler: DynamicFlushScheduler<LlmMetricsV1Input>;
   private _tracer: Tracer;
   private _version: "v1" | "v2";
+  /**
+   * Counts batches that hit a ClickHouse JSON parse failure that survived
+   * one sanitize-retry. These batches are dropped on the floor (the scheduler
+   * is told the flush "succeeded" so its queue counter doesn't leak), and we
+   * track the drop count for observability.
+   */
+  private _permanentlyDroppedBatches = 0;
 
   constructor(config: ClickhouseEventRepositoryConfig) {
     this._clickhouse = config.clickhouse;
@@ -147,6 +159,11 @@ export class ClickhouseEventRepository implements IEventRepository {
     return this._config.maximumLiveReloadingSetting ?? 1000;
   }
 
+  /** Exposed for tests and metrics — total batches lost to unrecoverable parse errors. */
+  get permanentlyDroppedBatches() {
+    return this._permanentlyDroppedBatches;
+  }
+
   /**
    * Clamps a start time (in nanoseconds) to now if it's too far in the past.
    * Returns the clamped value as a bigint.
@@ -215,19 +232,32 @@ export class ClickhouseEventRepository implements IEventRepository {
           ? this._clickhouse.taskEventsV2.insert
           : this._clickhouse.taskEvents.insert;
 
-      const [insertError, insertResult] = await insertFn(events, {
-        params: {
-          clickhouse_settings: this.#getClickhouseInsertSettings(),
-        },
-      });
+      const doInsert = async () => {
+        const [insertError, insertResult] = await insertFn(events, {
+          params: {
+            clickhouse_settings: this.#getClickhouseInsertSettings(),
+          },
+        });
+        if (insertError) throw insertError;
+        return insertResult;
+      };
+
+      const outcome = await this.#insertWithJsonParseRecovery(
+        flushId,
+        events,
+        doInsert,
+        `task_events_${this._version}`
+      );
 
-      if (insertError) {
-        throw insertError;
+      if (outcome.kind === "dropped") {
+        // Loud log already emitted; nothing landed in ClickHouse — don't publish to Redis.
+        return;
       }
 
       logger.info("ClickhouseEventRepository.flushBatch Inserted batch into clickhouse", {
         events: events.length,
-        insertResult,
+        insertResult: outcome.insertResult,
+        sanitized: outcome.kind === "sanitized",
         version: this._version,
       });
 
@@ -236,22 +266,134 @@ export class ClickhouseEventRepository implements IEventRepository {
   }
 
   async #flushLlmMetricsBatch(flushId: string, rows: LlmMetricsV1Input[]) {
+    const doInsert = async () => {
+      const [insertError, insertResult] = await this._clickhouse.llmMetrics.insert(rows, {
+        params: {
+          clickhouse_settings: this.#getClickhouseInsertSettings(),
+        },
+      });
+      if (insertError) throw insertError;
+      return insertResult;
+    };
 
-    const [insertError] = await this._clickhouse.llmMetrics.insert(rows, {
-      params: {
-        clickhouse_settings: this.#getClickhouseInsertSettings(),
-      },
-    });
+    const outcome = await this.#insertWithJsonParseRecovery(
+      flushId,
+      rows,
+      doInsert,
+      "llm_metrics_v1"
+    );
 
-    if (insertError) {
-      throw insertError;
+    if (outcome.kind === "dropped") {
+      return;
     }
 
     logger.info("ClickhouseEventRepository.flushLlmMetricsBatch Inserted LLM metrics batch", {
       rows: rows.length,
+      sanitized: outcome.kind === "sanitized",
     });
   }
 
+  /**
+   * Wraps a ClickHouse insert callable with reactive UTF-16 sanitization.
+   *
+   * On a `Cannot parse JSON object` failure:
+   *   1. Sanitize the batch from `max(0, parsedRowN - 1)` onwards (rows
+   *      before the failing one parsed fine — known good).
+   *   2. Retry the insert once with the sanitized batch.
+   *   3. If the retry still fails with the same error class, log loudly,
+   *      increment `permanentlyDroppedBatches`, and return without
+   *      throwing — the scheduler's transient-retry path would just repeat
+   *      the same deterministic failure.
+   *
+   * Non-parse errors propagate unchanged so the scheduler's existing
+   * backoff/retry behaviour still handles transient network or CH issues.
+   */
+  async #insertWithJsonParseRecovery<T extends object>(
+    flushId: string,
+    rows: T[],
+    doInsert: () => Promise<unknown>,
+    contextLabel: string
+  ): Promise<
+    | { kind: "inserted"; insertResult: unknown }
+    | { kind: "sanitized"; insertResult: unknown }
+    | { kind: "dropped" }
+  > {
+    try {
+      return { kind: "inserted", insertResult: await doInsert() };
+    } catch (firstError) {
+      if (!isClickHouseJsonParseError(firstError)) throw firstError;
+
+      const firstMessage =
+        typeof firstError === "object" && firstError !== null && "message" in firstError
+          ? String((firstError as { message?: unknown }).message ?? "")
+          : String(firstError);
+
+      // Sanitize the whole batch. ClickHouse's `at row N` index is logged
+      // for observability but not used to slice — its semantics under
+      // parallel parsing are not stable enough to safely skip rows.
+      const rowHint = parseRowNumberFromError(firstMessage);
+      const { rowsTouched, fieldsSanitized } = sanitizeRows(rows);
+
+      // Sanitizer found nothing to fix → retrying the exact same batch is
+      // guaranteed to hit the same deterministic parse failure. Skip the
+      // wasted ClickHouse round-trip and drop loudly. Throwing instead would
+      // hand the failure back to the scheduler's 3× transient-retry loop —
+      // exactly the retry storm this wrapper is designed to avoid.
+      if (fieldsSanitized === 0) {
+        this._permanentlyDroppedBatches += 1;
+        logger.error(
+          "Dropped batch — ClickHouse JSON parse error but sanitizer found nothing to fix",
+          {
+            flushId,
+            contextLabel,
+            batchSize: rows.length,
+            clickhouseRowHint: rowHint,
+            permanentlyDroppedBatches: this._permanentlyDroppedBatches,
+            sampleRow: JSON.stringify(rows[0] ?? null).slice(0, 1024),
+            clickhouseError: firstMessage.split("\n")[0],
+          }
+        );
+        return { kind: "dropped" };
+      }
+
+      logger.warn("Sanitizing batch after ClickHouse JSON parse error", {
+        flushId,
+        contextLabel,
+        batchSize: rows.length,
+        clickhouseRowHint: rowHint,
+        rowsTouched,
+        fieldsSanitized,
+        clickhouseError: firstMessage.split("\n")[0],
+      });
+
+      try {
+        return { kind: "sanitized", insertResult: await doInsert() };
+      } catch (retryError) {
+        if (!isClickHouseJsonParseError(retryError)) throw retryError;
+
+        this._permanentlyDroppedBatches += 1;
+        const retryMessage =
+          typeof retryError === "object" && retryError !== null && "message" in retryError
+            ? String((retryError as { message?: unknown }).message ?? "")
+            : String(retryError);
+        logger.error(
+          "Dropped batch after sanitize-retry still hit ClickHouse JSON parse error",
+          {
+            flushId,
+            contextLabel,
+            batchSize: rows.length,
+            permanentlyDroppedBatches: this._permanentlyDroppedBatches,
+            sampleRow: JSON.stringify(rows[0] ?? null).slice(0, 1024),
+            firstError: firstMessage.split("\n")[0],
+            retryError: retryMessage.split("\n")[0],
+          }
+        );
+
+        return { kind: "dropped" };
+      }
+    }
+  }
+
   #createLlmMetricsInput(event: CreateEventInput): LlmMetricsV1Input {
     const llmMetrics = event._llmMetrics!;
 
diff --git a/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts b/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts
new file mode 100644
index 00000000000..f04f9c02023
--- /dev/null
+++ b/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts
@@ -0,0 +1,118 @@
+import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings";
+
+/**
+ * Replacement string we substitute for any attribute value that contains
+ * a lone UTF-16 surrogate. JSON-safe, distinctly recognisable in logs and
+ * the dashboard so operators can spot affected rows.
+ */
+export const INVALID_UTF16_SENTINEL = "[invalid-utf16]";
+
+export type SanitizeResult = {
+  /** How many rows had at least one string field replaced. */
+  rowsTouched: number;
+  /** Total count of string fields replaced across all sanitized rows. */
+  fieldsSanitized: number;
+};
+
+/**
+ * Recognises ClickHouse's "Cannot parse JSON object" rejection — the
+ * deterministic-failure class our sanitizer is designed for. Bubbles up
+ * from `@clickhouse/client` as an `InsertError` whose `.message` retains
+ * the original ClickHouse error text.
+ */
+export function isClickHouseJsonParseError(err: unknown): boolean {
+  if (!err) return false;
+  const message =
+    typeof err === "object" && err !== null && "message" in err
+      ? String((err as { message?: unknown }).message ?? "")
+      : String(err);
+  return message.includes("Cannot parse JSON object");
+}
+
+/**
+ * Extracts the row index ClickHouse reported as the first to fail
+ * (`(at row N)`). Returns `null` if the message doesn't include one —
+ * caller should treat that as "sanitize from row 0".
+ */
+export function parseRowNumberFromError(errorMessage: string): number | null {
+  const match = errorMessage.match(/at row (\d+)/);
+  return match ? Number.parseInt(match[1], 10) : null;
+}
+
+/**
+ * Walks `value` recursively and replaces any string leaf that contains a
+ * lone UTF-16 surrogate with `INVALID_UTF16_SENTINEL`. Mutates objects
+ * and arrays in place; primitives are returned unchanged.
+ *
+ * Caller passes anything: a row object, a single field, an unknown JSON
+ * payload. The walker doesn't depend on the row's schema — it sanitizes
+ * every string in the structure, which is exactly what ClickHouse cares
+ * about when parsing the row's JSON form.
+ */
+export function sanitizeUnknownInPlace(value: unknown): { value: unknown; fixed: number } {
+  if (typeof value === "string") {
+    // `detectBadJsonStrings` works on JSON-escaped text — feed it the
+    // serialized form so any lone UTF-16 surrogate in the JS string is
+    // emitted as a `\uXXXX` escape it can spot. Valid surrogate pairs
+    // (e.g. emoji) are emitted as raw characters by JSON.stringify and
+    // exit at the function's fast path.
+    if (detectBadJsonStrings(JSON.stringify(value))) {
+      return { value: INVALID_UTF16_SENTINEL, fixed: 1 };
+    }
+    return { value, fixed: 0 };
+  }
+
+  if (Array.isArray(value)) {
+    let fixed = 0;
+    for (let i = 0; i < value.length; i++) {
+      const result = sanitizeUnknownInPlace(value[i]);
+      value[i] = result.value;
+      fixed += result.fixed;
+    }
+    return { value, fixed };
+  }
+
+  if (value !== null && typeof value === "object") {
+    let fixed = 0;
+    const obj = value as Record<string, unknown>;
+    for (const k of Object.keys(obj)) {
+      const result = sanitizeUnknownInPlace(obj[k]);
+      obj[k] = result.value;
+      fixed += result.fixed;
+    }
+    return { value, fixed };
+  }
+
+  return { value, fixed: 0 };
+}
+
+/**
+ * Sanitizes every row in `rows`, mutating each in place so callers can
+ * hand the same array to the retry insert.
+ *
+ * Rationale for scanning the whole batch (instead of starting from the
+ * row index ClickHouse reports): `at row N` semantics under
+ * `input_format_parallel_parsing` aren't well-defined — N can be
+ * chunk-relative rather than batch-global, and 0-vs-1 indexing differs
+ * between formats. Whole-batch scanning is robust to those quirks and
+ * also catches multiple bad rows in one pass (so a single retry covers
+ * the entire failure even if more than one row is poisoned).
+ *
+ * The cost is bounded: this only runs on the rare ClickHouse-rejection
+ * path, and `detectBadJsonStrings` exits in O(1) for clean strings
+ * (the fast `indexOf("\\u")` check), so healthy attributes are effectively
+ * free even when included in the walk.
+ */
+export function sanitizeRows<T extends object>(rows: T[]): SanitizeResult {
+  const result: SanitizeResult = { rowsTouched: 0, fieldsSanitized: 0 };
+
+  for (let i = 0; i < rows.length; i++) {
+    const { fixed } = sanitizeUnknownInPlace(rows[i]);
+    if (fixed > 0) {
+      result.rowsTouched++;
+      result.fieldsSanitized += fixed;
+    }
+  }
+
+  return result;
+}
diff --git a/apps/webapp/app/v3/featureFlags.ts b/apps/webapp/app/v3/featureFlags.ts
index b40a83c3a35..67033a74f8f 100644
--- a/apps/webapp/app/v3/featureFlags.ts
+++ b/apps/webapp/app/v3/featureFlags.ts
@@ -8,6 +8,7 @@ export const FEATURE_FLAG = {
   hasAiAccess: "hasAiAccess",
   hasComputeAccess: "hasComputeAccess",
   hasPrivateConnections: "hasPrivateConnections",
+  mollifierEnabled: "mollifierEnabled",
 } as const;
 
 export const FeatureFlagCatalog = {
@@ -18,6 +19,7 @@ export const FeatureFlagCatalog = {
   [FEATURE_FLAG.hasAiAccess]: z.coerce.boolean(),
   [FEATURE_FLAG.hasComputeAccess]: z.coerce.boolean(),
   [FEATURE_FLAG.hasPrivateConnections]: z.coerce.boolean(),
+  [FEATURE_FLAG.mollifierEnabled]: z.coerce.boolean(),
 };
 
 export type FeatureFlagKey = keyof typeof FeatureFlagCatalog;
diff --git a/apps/webapp/app/v3/mollifier/bufferedTriggerPayload.server.ts b/apps/webapp/app/v3/mollifier/bufferedTriggerPayload.server.ts
new file mode 100644
index 00000000000..d251e9f98e8
--- /dev/null
+++ b/apps/webapp/app/v3/mollifier/bufferedTriggerPayload.server.ts
@@ -0,0 +1,107 @@
+import type { TriggerTaskRequestBody } from "@trigger.dev/core/v3";
+import type { TriggerTaskServiceOptions } from "~/v3/services/triggerTask.server";
+
+// Canonical payload shape written to the mollifier buffer when the gate
+// decides to mollify a trigger. Phase 1 ALSO calls engine.trigger directly
+// (dual-write) so this is currently an audit/preview record. Phase 2 will
+// make the buffer the primary write path: the drainer's handler will read
+// this payload and replay it through engine.trigger to create the run in
+// Postgres, and read-fallback endpoints will synthesise a Run view from it
+// while it is still QUEUED.
+//
+// CONTRACT: this shape must contain everything needed for Phase 2's
+// drainer-replay to reconstruct an equivalent engine.trigger call. Phase 1
+// emits it to logs; Phase 2 will serialise it into Redis and rebuild it on
+// the drain side. Keep it serialisable — no functions, no class instances.
+export type BufferedTriggerPayload = {
+  runFriendlyId: string;
+
+  // Routing identifiers — let the drainer re-fetch full AuthenticatedEnvironment
+  // at replay time rather than embedding it in the payload.
+  envId: string;
+  envType: string;
+  envSlug: string;
+  orgId: string;
+  orgSlug: string;
+  projectId: string;
+  projectRef: string;
+
+  // Task identifier — looked up against the locked BackgroundWorkerTask
+  // at replay time to recover task-defaults.
+  taskId: string;
+
+  // Customer-supplied trigger body (payload, options, context).
+  body: TriggerTaskRequestBody;
+
+  // Resolved values from upstream concerns. The drainer should NOT re-resolve
+  // these — that would create a second idempotency-key check, etc.
+  idempotencyKey: string | null;
+  idempotencyKeyExpiresAt: string | null;
+  tags: string[];
+
+  // Parent/root linkage for nested triggers.
+  parentRunFriendlyId: string | null;
+
+  // Trace context — propagates the original triggering span across the
+  // buffer→drain boundary so the run's lifecycle stays under one trace.
+  traceContext: Record<string, unknown>;
+
+  // Annotations + service options that influence routing/replay.
+  triggerSource: string;
+  triggerAction: string;
+  serviceOptions: TriggerTaskServiceOptions;
+
+  // Wall-clock instants relevant to the run.
+  createdAt: string;
+};
+
+// Assemble the canonical payload from the inputs available at the point
+// `evaluateGate` returns "mollify" in `RunEngineTriggerTaskService.call`.
+// All fields must be derivable from data already in scope at that call site;
+// nothing should require an extra DB lookup.
+export function buildBufferedTriggerPayload(input: {
+  runFriendlyId: string;
+  taskId: string;
+  envId: string;
+  envType: string;
+  envSlug: string;
+  orgId: string;
+  orgSlug: string;
+  projectId: string;
+  projectRef: string;
+  body: TriggerTaskRequestBody;
+  idempotencyKey: string | null;
+  idempotencyKeyExpiresAt: Date | null;
+  tags: string[];
+  parentRunFriendlyId: string | null;
+  traceContext: Record<string, unknown>;
+  triggerSource: string;
+  triggerAction: string;
+  serviceOptions: TriggerTaskServiceOptions;
+  createdAt: Date;
+}): BufferedTriggerPayload {
+  return {
+    runFriendlyId: input.runFriendlyId,
+    envId: input.envId,
+    envType: input.envType,
+    envSlug: input.envSlug,
+    orgId: input.orgId,
+    orgSlug: input.orgSlug,
+    projectId: input.projectId,
+    projectRef: input.projectRef,
+    taskId: input.taskId,
+    body: input.body,
+    idempotencyKey: input.idempotencyKey,
+    idempotencyKeyExpiresAt:
+      input.idempotencyKey && input.idempotencyKeyExpiresAt
+        ? input.idempotencyKeyExpiresAt.toISOString()
+        : null,
+    tags: input.tags,
+    parentRunFriendlyId: input.parentRunFriendlyId,
+    traceContext: input.traceContext,
+    triggerSource: input.triggerSource,
+    triggerAction: input.triggerAction,
+    serviceOptions: input.serviceOptions,
+    createdAt: input.createdAt.toISOString(),
+  };
+}
diff --git a/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts b/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts
new file mode 100644
index 00000000000..9c8917623e4
--- /dev/null
+++ b/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts
@@ -0,0 +1,32 @@
+import { MollifierBuffer } from "@trigger.dev/redis-worker";
+import { env } from "~/env.server";
+import { logger } from "~/services/logger.server";
+import { singleton } from "~/utils/singleton";
+
+// DI seam type for consumers (e.g. triggerTask.server.ts) that need a
+// nullable buffer accessor at construction time.
+export type MollifierGetBuffer = () => MollifierBuffer | null;
+
+function initializeMollifierBuffer(): MollifierBuffer {
+  logger.debug("Initializing mollifier buffer", {
+    host: env.TRIGGER_MOLLIFIER_REDIS_HOST,
+  });
+
+  return new MollifierBuffer({
+    redisOptions: {
+      keyPrefix: "",
+      host: env.TRIGGER_MOLLIFIER_REDIS_HOST,
+      port: env.TRIGGER_MOLLIFIER_REDIS_PORT,
+      username: env.TRIGGER_MOLLIFIER_REDIS_USERNAME,
+      password: env.TRIGGER_MOLLIFIER_REDIS_PASSWORD,
+      enableAutoPipelining: true,
+      ...(env.TRIGGER_MOLLIFIER_REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }),
+    },
+    entryTtlSeconds: env.TRIGGER_MOLLIFIER_ENTRY_TTL_S,
+  });
+}
+
+export function getMollifierBuffer(): MollifierBuffer | null {
+  if (env.TRIGGER_MOLLIFIER_ENABLED !== "1") return null;
+  return singleton("mollifierBuffer", initializeMollifierBuffer);
+}
diff --git a/apps/webapp/app/v3/mollifier/mollifierDrainer.server.ts b/apps/webapp/app/v3/mollifier/mollifierDrainer.server.ts
new file mode 100644
index 00000000000..139aeaf9a6e
--- /dev/null
+++ b/apps/webapp/app/v3/mollifier/mollifierDrainer.server.ts
@@ -0,0 +1,120 @@
+import { createHash } from "node:crypto";
+import { MollifierDrainer, serialiseSnapshot } from "@trigger.dev/redis-worker";
+import { env } from "~/env.server";
+import { logger } from "~/services/logger.server";
+import { singleton } from "~/utils/singleton";
+import { getMollifierBuffer } from "./mollifierBuffer.server";
+import type { BufferedTriggerPayload } from "./bufferedTriggerPayload.server";
+
+// Distinct error class for the deterministic "fail loud at boot" throws
+// below. The bootstrap in `mollifierDrainerWorker.server.ts` catches
+// transient/init errors and logs them so an unrelated Redis blip doesn't
+// crash the webapp, but it RETHROWS this class — a misconfigured
+// shutdown timeout or missing buffer is a deploy-time mistake that
+// should fail health checks and roll back, not silently disable a
+// half-rolled-out feature.
+//
+// The `name` getter is set explicitly so cross-realm `instanceof` checks
+// (e.g. when Remix dev hot-reloads the module and the consumer keeps a
+// reference to the old class) can fall back to `error.name === ...` and
+// still recognise the marker.
+export class MollifierConfigurationError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "MollifierConfigurationError";
+  }
+}
+
+function initializeMollifierDrainer(): MollifierDrainer<BufferedTriggerPayload> {
+  const buffer = getMollifierBuffer();
+  if (!buffer) {
+    // Unreachable in normal config: getMollifierDrainer() gates on the
+    // same env flag as getMollifierBuffer(). If we hit this, fail loud
+    // — the operator has set TRIGGER_MOLLIFIER_ENABLED=1 on a worker pod but
+    // the buffer can't initialise (e.g. TRIGGER_MOLLIFIER_REDIS_HOST resolves
+    // to nothing). Crashing surfaces the misconfig immediately rather
+    // than silently leaving entries un-drained.
+    throw new MollifierConfigurationError(
+      "MollifierDrainer initialised without a buffer — env vars inconsistent",
+    );
+  }
+
+  // Validate BEFORE start() so a misconfigured shutdown timeout fails
+  // loud at module-load time and the singleton is never cached. If start()
+  // ran first and the throw propagated out, the loop would already be
+  // polling with no SIGTERM handler registered by the caller — exactly
+  // the failure mode the validation is supposed to prevent.
+  //
+  // The SIGTERM handler in mollifierDrainerWorker.server.ts is sync fire-and-forget:
+  // `drainer.stop({ timeoutMs })` returns a promise that keeps the event
+  // loop alive, but in cluster mode the primary runs its own
+  // GRACEFUL_SHUTDOWN_TIMEOUT and will call `process.exit(0)`
+  // independently. If the drainer's deadline exceeds the primary's, the
+  // drainer is cut off mid-wait — "log a warning on timeout" turns into
+  // "hard exit with no log". 1s margin gives the primary room to finish
+  // its own teardown after the drainer settles.
+  const shutdownMarginMs = 1_000;
+  if (
+    env.TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS >=
+    env.GRACEFUL_SHUTDOWN_TIMEOUT - shutdownMarginMs
+  ) {
+    throw new MollifierConfigurationError(
+      `TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS (${env.TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS}) must be at least ${shutdownMarginMs}ms below GRACEFUL_SHUTDOWN_TIMEOUT (${env.GRACEFUL_SHUTDOWN_TIMEOUT}); otherwise the primary's hard exit shadows the drainer's deadline.`,
+    );
+  }
+
+  logger.debug("Initializing mollifier drainer", {
+    concurrency: env.TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY,
+    maxAttempts: env.TRIGGER_MOLLIFIER_DRAIN_MAX_ATTEMPTS,
+  });
+
+  // Phase 1 handler: no-op ack. The trigger has ALREADY been written to
+  // Postgres via engine.trigger (dual-write at the call site). Popping +
+  // acking here proves the dequeue mechanism works end-to-end without
+  // duplicating the work. Phase 2 will replace this with an engine.trigger
+  // replay that performs the actual Postgres write.
+  const drainer = new MollifierDrainer<BufferedTriggerPayload>({
+    buffer,
+    handler: async (input) => {
+      // Hash the (re-serialised, canonical) payload on the drain side rather
+      // than on the trigger hot path. Burst-time CPU stays with engine.trigger;
+      // the drainer is the natural place for the audit-equivalence checksum.
+      // Re-serialisation is identity for the BufferedTriggerPayload shape
+      // (only strings/numbers/plain objects), so this hash matches what the
+      // call site wrote into Redis.
+      const reserialised = serialiseSnapshot(input.payload);
+      const payloadHash = createHash("sha256").update(reserialised).digest("hex");
+      logger.info("mollifier.drained", {
+        runId: input.runId,
+        envId: input.envId,
+        orgId: input.orgId,
+        taskId: input.payload.taskId,
+        attempts: input.attempts,
+        ageMs: Date.now() - input.createdAt.getTime(),
+        payloadBytes: reserialised.length,
+        payloadHash,
+      });
+    },
+    concurrency: env.TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY,
+    maxAttempts: env.TRIGGER_MOLLIFIER_DRAIN_MAX_ATTEMPTS,
+    maxOrgsPerTick: env.TRIGGER_MOLLIFIER_DRAIN_MAX_ORGS_PER_TICK,
+    // A no-op handler shouldn't throw, but if something does (e.g. an
+    // unexpected deserialise failure), don't loop — let it FAIL terminally
+    // so the entry is observable in metrics.
+    isRetryable: () => false,
+  });
+
+  return drainer;
+}
+
+// Returns a configured-but-stopped drainer. Callers MUST register their
+// SIGTERM / SIGINT shutdown handlers before invoking `drainer.start()` —
+// see `apps/webapp/app/v3/mollifierDrainerWorker.server.ts`. Starting
+// inside the singleton factory would put the polling loop ahead of
+// handler registration, leaving a narrow window where a SIGTERM landing
+// between `start()` and `process.once("SIGTERM", ...)` would skip the
+// graceful stop. The split is intentional.
+export function getMollifierDrainer(): MollifierDrainer<BufferedTriggerPayload> | null {
+  if (env.TRIGGER_MOLLIFIER_ENABLED !== "1") return null;
+  return singleton("mollifierDrainer", initializeMollifierDrainer);
+}
diff --git a/apps/webapp/app/v3/mollifier/mollifierGate.server.ts b/apps/webapp/app/v3/mollifier/mollifierGate.server.ts
new file mode 100644
index 00000000000..28b0a7f88cf
--- /dev/null
+++ b/apps/webapp/app/v3/mollifier/mollifierGate.server.ts
@@ -0,0 +1,209 @@
+import { env } from "~/env.server";
+import { logger } from "~/services/logger.server";
+import { FEATURE_FLAG, FeatureFlagCatalog } from "~/v3/featureFlags";
+import { getMollifierBuffer } from "./mollifierBuffer.server";
+import { createRealTripEvaluator } from "./mollifierTripEvaluator.server";
+import {
+  recordDecision,
+  type DecisionOutcome,
+  type DecisionReason,
+} from "./mollifierTelemetry.server";
+
+// `count` is the fleet-wide fixed-window counter for the env (INCR with a
+// PEXPIRE armed on the first tick of each window — see
+// `mollifierEvaluateTrip` in `packages/redis-worker/src/mollifier/buffer.ts`).
+// All webapp replicas pointing at the same Redis share the key
+// `mollifier:rate:${envId}`, so the threshold is the fleet-wide ceiling
+// rather than a per-instance one. At a window boundary an env can briefly
+// admit up to ~2x threshold across the fleet before tripping (fixed-window
+// not sliding-window). The tripped marker is refreshed on every overage
+// call, so a sustained burst holds the divert state until the rate falls
+// below threshold within a window.
+export type TripDecision =
+  | { divert: false }
+  | {
+      divert: true;
+      reason: "per_env_rate";
+      count: number;
+      threshold: number;
+      windowMs: number;
+      holdMs: number;
+    };
+
+export type GateOutcome =
+  | { action: "pass_through" }
+  | { action: "mollify"; decision: Extract<TripDecision, { divert: true }> }
+  | { action: "shadow_log"; decision: Extract<TripDecision, { divert: true }> };
+
+export type GateInputs = {
+  envId: string;
+  orgId: string;
+  taskId: string;
+  // Org-scoped flag overrides — taken from `Organization.featureFlags` on the
+  // AuthenticatedEnvironment at the call site. The repo-wide `flag()` helper
+  // queries the global `FeatureFlag` table; passing per-org overrides lets the
+  // mollifier opt in a single org without touching the global row, matching
+  // the pattern used by `canAccessAi`, `canAccessPrivateConnections`, and the
+  // compute-template beta gate.
+  orgFeatureFlags: Record<string, unknown> | null;
+};
+
+export type TripEvaluator = (inputs: GateInputs) => Promise<TripDecision>;
+
+// DI seam type for consumers (e.g. triggerTask.server.ts) that inject the
+// gate at construction time. Deliberately narrower than `evaluateGate`'s
+// real signature — no `deps` param — because consumers only call it with
+// inputs and rely on the module-level defaults.
+export type MollifierEvaluateGate = (inputs: GateInputs) => Promise<GateOutcome>;
+
+export type GateDependencies = {
+  isMollifierEnabled: () => boolean;
+  isShadowModeOn: () => boolean;
+  resolveOrgFlag: (inputs: GateInputs) => Promise<boolean>;
+  evaluator: TripEvaluator;
+  logShadow: (
+    inputs: GateInputs,
+    decision: Extract<TripDecision, { divert: true }>,
+  ) => void;
+  logMollified: (
+    inputs: GateInputs,
+    decision: Extract<TripDecision, { divert: true }>,
+  ) => void;
+  recordDecision: (outcome: DecisionOutcome, reason?: DecisionReason) => void;
+};
+
+// `options` is a thunk so env reads happen per-evaluation, not at module load.
+// Don't "simplify" to a plain object — Phase 2 dynamic config relies on the
+// gate observing whichever env values are live at trigger time.
+const defaultEvaluator = createRealTripEvaluator({
+  getBuffer: () => getMollifierBuffer(),
+  options: () => ({
+    windowMs: env.TRIGGER_MOLLIFIER_TRIP_WINDOW_MS,
+    threshold: env.TRIGGER_MOLLIFIER_TRIP_THRESHOLD,
+    holdMs: env.TRIGGER_MOLLIFIER_HOLD_MS,
+  }),
+});
+
+function logDivertDecision(
+  message: "mollifier.would_mollify" | "mollifier.mollified",
+  inputs: GateInputs,
+  decision: Extract<TripDecision, { divert: true }>,
+): void {
+  logger.debug(message, {
+    envId: inputs.envId,
+    orgId: inputs.orgId,
+    taskId: inputs.taskId,
+    reason: decision.reason,
+    count: decision.count,
+    threshold: decision.threshold,
+    windowMs: decision.windowMs,
+    holdMs: decision.holdMs,
+  });
+}
+
+// Resolve the per-org mollifier flag purely from the in-memory
+// `Organization.featureFlags` JSON. No DB query — `triggerTask` is the
+// trigger hot path and the webapp CLAUDE.md forbids adding Prisma calls
+// there. The fleet-wide kill switch lives in `TRIGGER_MOLLIFIER_ENABLED`; rollout
+// is per-org via the JSON, matching the pattern used by `canAccessAi`,
+// `hasComputeAccess`, etc. There is no global `FeatureFlag` table read
+// in this path by design.
+export function makeResolveMollifierFlag(): (inputs: GateInputs) => Promise<boolean> {
+  return (inputs) => {
+    const override = inputs.orgFeatureFlags?.[FEATURE_FLAG.mollifierEnabled];
+    if (override !== undefined) {
+      const parsed = FeatureFlagCatalog[FEATURE_FLAG.mollifierEnabled].safeParse(override);
+      if (parsed.success) {
+        return Promise.resolve(parsed.data);
+      }
+    }
+    return Promise.resolve(false);
+  };
+}
+
+const resolveMollifierFlag = makeResolveMollifierFlag();
+
+export const defaultGateDependencies: GateDependencies = {
+  isMollifierEnabled: () => env.TRIGGER_MOLLIFIER_ENABLED === "1",
+  isShadowModeOn: () => env.TRIGGER_MOLLIFIER_SHADOW_MODE === "1",
+  resolveOrgFlag: resolveMollifierFlag,
+  evaluator: defaultEvaluator,
+  logShadow: (inputs, decision) =>
+    logDivertDecision("mollifier.would_mollify", inputs, decision),
+  logMollified: (inputs, decision) =>
+    logDivertDecision("mollifier.mollified", inputs, decision),
+  recordDecision,
+};
+
+export async function evaluateGate(
+  inputs: GateInputs,
+  deps: Partial<GateDependencies> = {},
+): Promise<GateOutcome> {
+  const d = { ...defaultGateDependencies, ...deps };
+
+  if (!d.isMollifierEnabled()) {
+    d.recordDecision("pass_through");
+    return { action: "pass_through" };
+  }
+
+  // Fail open: a transient DB error resolving the per-org flag must not
+  // block triggers. Mirror the evaluator's fail-open posture in
+  // `mollifierTripEvaluator.server.ts`.
+  let orgFlagEnabled: boolean;
+  try {
+    orgFlagEnabled = await d.resolveOrgFlag(inputs);
+  } catch (error) {
+    logger.warn("mollifier.resolve_org_flag_failed", {
+      envId: inputs.envId,
+      orgId: inputs.orgId,
+      taskId: inputs.taskId,
+      error: error instanceof Error ? error.message : String(error),
+    });
+    orgFlagEnabled = false;
+  }
+  const shadowOn = d.isShadowModeOn();
+
+  if (!orgFlagEnabled && !shadowOn) {
+    d.recordDecision("pass_through");
+    return { action: "pass_through" };
+  }
+
+  // Fail open on evaluator errors too. The default `createRealTripEvaluator`
+  // catches its own errors and returns `{ divert: false }`, but injected or
+  // future evaluators may not — keep the contract symmetric with the org
+  // flag resolution above so the trigger hot path can never be broken by a
+  // gate-internal failure.
+  //
+  // Note: the evaluator INCRs the per-env Redis counter (`mollifier:rate:${envId}`)
+  // in *both* shadow-only and flag-on modes — shadow mode is observation-only at
+  // the user-visible level (no diversion), but not Redis-passive. It has to write
+  // because the threshold is computed from a counter, and a counter that doesn't
+  // increment isn't a counter. There's no cross-org bleed: `RuntimeEnvironment`
+  // is 1:1 with `Organization`, so the per-env counter is effectively per-org.
+  let decision: TripDecision;
+  try {
+    decision = await d.evaluator(inputs);
+  } catch (error) {
+    logger.warn("mollifier.evaluator_failed", {
+      envId: inputs.envId,
+      orgId: inputs.orgId,
+      taskId: inputs.taskId,
+      error: error instanceof Error ? error.message : String(error),
+    });
+    decision = { divert: false };
+  }
+  if (!decision.divert) {
+    d.recordDecision("pass_through");
+    return { action: "pass_through" };
+  }
+
+  if (orgFlagEnabled) {
+    d.logMollified(inputs, decision);
+    d.recordDecision("mollify", decision.reason);
+    return { action: "mollify", decision };
+  }
+
+  d.logShadow(inputs, decision);
+  d.recordDecision("shadow_log", decision.reason);
+  return { action: "shadow_log", decision };
+}
diff --git a/apps/webapp/app/v3/mollifier/mollifierTelemetry.server.ts b/apps/webapp/app/v3/mollifier/mollifierTelemetry.server.ts
new file mode 100644
index 00000000000..0fe302584ce
--- /dev/null
+++ b/apps/webapp/app/v3/mollifier/mollifierTelemetry.server.ts
@@ -0,0 +1,17 @@
+import { getMeter } from "@internal/tracing";
+
+const meter = getMeter("mollifier");
+
+export const mollifierDecisionsCounter = meter.createCounter("mollifier.decisions", {
+  description: "Count of mollifier gate decisions by outcome",
+});
+
+export type DecisionOutcome = "pass_through" | "shadow_log" | "mollify";
+export type DecisionReason = "per_env_rate";
+
+export function recordDecision(outcome: DecisionOutcome, reason?: DecisionReason): void {
+  mollifierDecisionsCounter.add(1, {
+    outcome,
+    ...(reason ? { reason } : {}),
+  });
+}
diff --git a/apps/webapp/app/v3/mollifier/mollifierTripEvaluator.server.ts b/apps/webapp/app/v3/mollifier/mollifierTripEvaluator.server.ts
new file mode 100644
index 00000000000..4bd9a34d412
--- /dev/null
+++ b/apps/webapp/app/v3/mollifier/mollifierTripEvaluator.server.ts
@@ -0,0 +1,47 @@
+import type { MollifierBuffer } from "@trigger.dev/redis-worker";
+import { logger } from "~/services/logger.server";
+import type { GateInputs, TripDecision, TripEvaluator } from "./mollifierGate.server";
+
+export type TripEvaluatorOptions = {
+  windowMs: number;
+  threshold: number;
+  holdMs: number;
+};
+
+export type CreateRealTripEvaluatorDeps = {
+  getBuffer: () => MollifierBuffer | null;
+  options: () => TripEvaluatorOptions;
+};
+
+export function createRealTripEvaluator(deps: CreateRealTripEvaluatorDeps): TripEvaluator {
+  return async (inputs: GateInputs): Promise<TripDecision> => {
+    const buffer = deps.getBuffer();
+    if (!buffer) return { divert: false };
+
+    const opts = deps.options();
+
+    try {
+      const { tripped, count } = await buffer.evaluateTrip(inputs.envId, opts);
+      if (!tripped) return { divert: false };
+
+      return {
+        divert: true,
+        reason: "per_env_rate",
+        count,
+        threshold: opts.threshold,
+        windowMs: opts.windowMs,
+        holdMs: opts.holdMs,
+      };
+    } catch (err) {
+      // Deliberate: no error counter here. Shadow mode means a silent miss is
+      // harmless — fail-open is the safe direction. The error log + Sentry
+      // capture is sufficient operability for Phase 1. Revisit in Phase 2
+      // when buffer writes are the primary path and a missed evaluation has cost.
+      logger.error("mollifier trip evaluator: fail-open on error", {
+        envId: inputs.envId,
+        err: err instanceof Error ? err.message : String(err),
+      });
+      return { divert: false };
+    }
+  };
+}
diff --git a/apps/webapp/app/v3/mollifier/readFallback.server.ts b/apps/webapp/app/v3/mollifier/readFallback.server.ts
new file mode 100644
index 00000000000..34a8b48f970
--- /dev/null
+++ b/apps/webapp/app/v3/mollifier/readFallback.server.ts
@@ -0,0 +1,16 @@
+import { logger } from "~/services/logger.server";
+
+export type ReadFallbackInput = {
+  runId: string;
+  environmentId: string;
+  organizationId: string;
+};
+
+export async function findRunByIdWithMollifierFallback(
+  input: ReadFallbackInput,
+): Promise<null> {
+  logger.debug("mollifier read-fallback called (phase 1 stub)", {
+    runId: input.runId,
+  });
+  return null;
+}
diff --git a/apps/webapp/app/v3/mollifierDrainerWorker.server.ts b/apps/webapp/app/v3/mollifierDrainerWorker.server.ts
new file mode 100644
index 00000000000..313e9af6719
--- /dev/null
+++ b/apps/webapp/app/v3/mollifierDrainerWorker.server.ts
@@ -0,0 +1,123 @@
+import { env } from "~/env.server";
+import { logger } from "~/services/logger.server";
+import { signalsEmitter } from "~/services/signals.server";
+import {
+  getMollifierDrainer,
+  MollifierConfigurationError,
+} from "./mollifier/mollifierDrainer.server";
+
+declare global {
+  // eslint-disable-next-line no-var
+  var __mollifierShutdownRegistered__: boolean | undefined;
+}
+
+/**
+ * Bootstraps the mollifier drainer.
+ *
+ * Two-step lifecycle:
+ *   1. Construct the drainer via the gated singleton in
+ *      `mollifierDrainer.server.ts`. That factory validates the
+ *      shutdown-timeout reconciliation against `GRACEFUL_SHUTDOWN_TIMEOUT`
+ *      and throws BEFORE returning if it's misconfigured; the returned
+ *      drainer is configured-but-stopped.
+ *   2. Register SIGTERM/SIGINT shutdown handlers, then call
+ *      `drainer.start()`. Doing this in the bootstrap (and not in the
+ *      factory) guarantees a signal landing during boot can never find
+ *      the polling loop running without a graceful-stop path.
+ *
+ * The drainer is intentionally NOT wired through `~/services/worker.server`
+ * — that file is the legacy ZodWorker / graphile-worker setup. The
+ * mollifier drainer is a custom polling loop over `MollifierBuffer`, not
+ * a graphile-worker job, so it gets its own lifecycle file alongside the
+ * redis-worker workers (`commonWorker`, `alertsWorker`,
+ * `batchTriggerWorker`).
+ *
+ * Gating order:
+ *   - `TRIGGER_MOLLIFIER_DRAINER_ENABLED !== "1"`  → early return. Unset defaults
+ *     to `TRIGGER_MOLLIFIER_ENABLED`, so single-container self-hosters still get
+ *     the drainer for free with one flag. In multi-replica deployments,
+ *     set this to "0" explicitly on every replica except the dedicated
+ *     drainer service so the polling loop doesn't race across replicas.
+ *   - `TRIGGER_MOLLIFIER_ENABLED !== "1"`  → `getMollifierDrainer()` returns null
+ *     and the bootstrap is a no-op. `TRIGGER_MOLLIFIER_ENABLED` remains the
+ *     master kill switch; the new flag only controls WHICH replicas
+ *     run the drainer when the system is on.
+ */
+export function initMollifierDrainerWorker(
+  opts: {
+    // Test seams. Production callers pass nothing; the defaults read the
+    // live env and resolve the live singleton. Tests inject overrides so
+    // the misconfig-rethrow / transient-swallow branches can be driven
+    // without manipulating module-level env state.
+    isEnabled?: () => boolean;
+    getDrainer?: typeof getMollifierDrainer;
+  } = {},
+): void {
+  const isEnabled = opts.isEnabled ?? (() => env.TRIGGER_MOLLIFIER_DRAINER_ENABLED === "1");
+  const getDrainer = opts.getDrainer ?? getMollifierDrainer;
+
+  if (!isEnabled()) {
+    return;
+  }
+
+  try {
+    const drainer = getDrainer();
+    if (drainer && !global.__mollifierShutdownRegistered__) {
+      // `__mollifierShutdownRegistered__` guards against double-register
+      // on dev hot-reloads (this bootstrap is called from
+      // entry.server.tsx, which Remix dev re-evaluates on every change).
+      // Same guard owns both the handler registration and the start()
+      // call so the two never get out of sync.
+      //
+      // Registers through `signalsEmitter` (the webapp-wide singleton in
+      // `~/services/signals.server`) rather than `process.once` directly:
+      //  - matches the codebase convention (runsReplicationInstance,
+      //    llmPricingRegistry, dynamicFlushScheduler etc. all listen on
+      //    the same emitter);
+      //  - `.on` (not `.once`) means a second SIGTERM still reaches us if
+      //    the orchestrator delivers more than one signal before SIGKILL;
+      //  - if SIGTERM lands in the gap between this listener attaching
+      //    and `drainer.start()` below, the first invocation no-ops
+      //    (stop() returns early because the drainer isn't running yet)
+      //    but the listener stays attached for a subsequent signal,
+      //    rather than being consumed by `once`.
+      const stopDrainer = () => {
+        drainer
+          .stop({ timeoutMs: env.TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS })
+          .catch((error) => {
+            logger.error("Failed to stop mollifier drainer", { error });
+          });
+      };
+      signalsEmitter.on("SIGTERM", stopDrainer);
+      signalsEmitter.on("SIGINT", stopDrainer);
+      global.__mollifierShutdownRegistered__ = true;
+      drainer.start();
+    }
+  } catch (error) {
+    // Deterministic misconfig (shutdown-timeout vs GRACEFUL_SHUTDOWN_TIMEOUT,
+    // missing buffer client) is a deploy-time mistake the operator must
+    // see immediately — rethrow so the process crashes, health checks
+    // fail, and the orchestrator rolls the deploy back. Phase 1 is
+    // monitoring-only and the silent-fallback was tempting, but Phase 2/3
+    // make the drainer the source of truth for diverted triggers, where a
+    // silently-disabled drainer means data loss. Better to fail loud now
+    // than retrofit later.
+    //
+    // We accept both `instanceof` and `error.name === ...` so Remix dev
+    // hot-reload (where the consumer can hold a stale class reference)
+    // still recognises the marker.
+    if (
+      error instanceof MollifierConfigurationError ||
+      (error instanceof Error && error.name === "MollifierConfigurationError")
+    ) {
+      logger.error("Mollifier drainer misconfiguration — failing loud", {
+        error: error.message,
+      });
+      throw error;
+    }
+    // Anything else (transient Redis blip, unexpected runtime error) is
+    // logged but kept non-fatal — the rest of the webapp shouldn't go
+    // down because the buffer's Redis cluster is briefly unreachable.
+    logger.error("Failed to initialise mollifier drainer", { error });
+  }
+}
diff --git a/apps/webapp/app/v3/otlpExporter.server.ts b/apps/webapp/app/v3/otlpExporter.server.ts
index 7505693e3ab..22dba93f22e 100644
--- a/apps/webapp/app/v3/otlpExporter.server.ts
+++ b/apps/webapp/app/v3/otlpExporter.server.ts
@@ -39,7 +39,6 @@ import { startSpan } from "./tracing.server";
 import { enrichCreatableEvents } from "./utils/enrichCreatableEvents.server";
 import { waitForLlmPricingReady } from "./llmPricingRegistry.server";
 import { env } from "~/env.server";
-import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings";
 import { singleton } from "~/utils/singleton";
 
 class OTLPExporter {
diff --git a/apps/webapp/app/v3/querySchemas.ts b/apps/webapp/app/v3/querySchemas.ts
index 1d2c5467742..947e6e8e468 100644
--- a/apps/webapp/app/v3/querySchemas.ts
+++ b/apps/webapp/app/v3/querySchemas.ts
@@ -176,11 +176,19 @@ export const runsSchema: TableSchema = {
     idempotency_key: {
       name: "idempotency_key",
       clickhouseName: "idempotency_key_user",
-      ...column("String", { description: "Idempotency key (available from 4.3.3)", example: "user-123-action-456" }),
+      ...column("String", {
+        description: "Idempotency key (available from 4.3.3)",
+        example: "user-123-action-456",
+      }),
     },
     idempotency_key_scope: {
       name: "idempotency_key_scope",
-      ...column("String", { description: "The idempotency key scope determines whether a task should be considered unique within a parent run, a specific attempt, or globally. An empty value means there's no idempotency key set (available from 4.3.3).", example: "run", allowedValues: ["global", "run", "attempt"], }),
+      ...column("String", {
+        description:
+          "The idempotency key scope determines whether a task should be considered unique within a parent run, a specific attempt, or globally. An empty value means there's no idempotency key set (available from 4.3.3).",
+        example: "run",
+        allowedValues: ["global", "run", "attempt"],
+      }),
     },
     region: {
       name: "region",
@@ -404,6 +412,13 @@ export const runsSchema: TableSchema = {
       ...column("UInt8", { description: "Whether this is a test run (0 or 1)", example: "0" }),
       expression: "if(is_test > 0, true, false)",
     },
+    is_warm_start: {
+      name: "is_warm_start",
+      ...column("Nullable(UInt8)", {
+        description: "Whether this run used a warm start vs a cold start.",
+        example: "1",
+      }),
+    },
     concurrency_key: {
       name: "concurrency_key",
       ...column("String", {
diff --git a/apps/webapp/test/bufferedTriggerPayload.test.ts b/apps/webapp/test/bufferedTriggerPayload.test.ts
new file mode 100644
index 00000000000..6280acd4c63
--- /dev/null
+++ b/apps/webapp/test/bufferedTriggerPayload.test.ts
@@ -0,0 +1,96 @@
+import { describe, expect, it } from "vitest";
+import { buildBufferedTriggerPayload } from "~/v3/mollifier/bufferedTriggerPayload.server";
+
+describe("buildBufferedTriggerPayload", () => {
+  const baseInput = {
+    runFriendlyId: "run_abc",
+    taskId: "my-task",
+    envId: "env_1",
+    envType: "DEVELOPMENT",
+    envSlug: "dev",
+    orgId: "org_1",
+    orgSlug: "acme",
+    projectId: "proj_db_id",
+    projectRef: "proj_xyz",
+    body: { payload: { hello: "world" }, options: { tags: ["t1"] } } as any,
+    idempotencyKey: null,
+    idempotencyKeyExpiresAt: null,
+    tags: ["t1"],
+    parentRunFriendlyId: null,
+    traceContext: { traceparent: "00-abc-def-01" },
+    triggerSource: "api" as const,
+    triggerAction: "trigger" as const,
+    serviceOptions: {} as any,
+    createdAt: new Date("2026-05-13T09:00:00.000Z"),
+  };
+
+  it("captures all routing identifiers without losing data", () => {
+    const payload = buildBufferedTriggerPayload(baseInput);
+
+    expect(payload.runFriendlyId).toBe("run_abc");
+    expect(payload.envId).toBe("env_1");
+    expect(payload.envType).toBe("DEVELOPMENT");
+    expect(payload.envSlug).toBe("dev");
+    expect(payload.orgId).toBe("org_1");
+    expect(payload.orgSlug).toBe("acme");
+    expect(payload.projectId).toBe("proj_db_id");
+    expect(payload.projectRef).toBe("proj_xyz");
+    expect(payload.taskId).toBe("my-task");
+  });
+
+  it("serialises idempotencyKeyExpiresAt to ISO string only when key is present", () => {
+    const withKey = buildBufferedTriggerPayload({
+      ...baseInput,
+      idempotencyKey: "ik_1",
+      idempotencyKeyExpiresAt: new Date("2026-05-13T10:00:00.000Z"),
+    });
+    expect(withKey.idempotencyKey).toBe("ik_1");
+    expect(withKey.idempotencyKeyExpiresAt).toBe("2026-05-13T10:00:00.000Z");
+
+    const noKey = buildBufferedTriggerPayload(baseInput);
+    expect(noKey.idempotencyKey).toBeNull();
+    expect(noKey.idempotencyKeyExpiresAt).toBeNull();
+
+    // Defensive: an expiresAt without an accompanying key is an impossible
+    // idempotency state — drop the expiresAt rather than serialise it.
+    const orphanExpiry = buildBufferedTriggerPayload({
+      ...baseInput,
+      idempotencyKey: null,
+      idempotencyKeyExpiresAt: new Date("2026-05-13T10:00:00.000Z"),
+    });
+    expect(orphanExpiry.idempotencyKey).toBeNull();
+    expect(orphanExpiry.idempotencyKeyExpiresAt).toBeNull();
+  });
+
+  it("preserves customer body byte-equivalent (drainer replay must match Postgres)", () => {
+    const body = {
+      payload: { quotes: 'a"b', newline: "x\ny", unicode: "🚀", nested: { n: 1 } },
+      options: { tags: ["a"], maxAttempts: 3, machine: "small-1x" },
+    } as any;
+    const payload = buildBufferedTriggerPayload({ ...baseInput, body });
+    expect(payload.body).toEqual(body);
+
+    // JSON round-trip is the storage path; verify no information loss.
+    const roundtripped = JSON.parse(JSON.stringify(payload.body));
+    expect(roundtripped).toEqual(body);
+  });
+
+  it("createdAt is serialised to ISO 8601", () => {
+    const payload = buildBufferedTriggerPayload(baseInput);
+    expect(payload.createdAt).toBe("2026-05-13T09:00:00.000Z");
+  });
+
+  it("preserves traceContext (OTel continuity across buffer→drain boundary)", () => {
+    const traceContext = { traceparent: "00-x-y-01", tracestate: "vendor=foo" };
+    const payload = buildBufferedTriggerPayload({ ...baseInput, traceContext });
+    expect(payload.traceContext).toEqual(traceContext);
+  });
+
+  it("nullable parentRunFriendlyId — present and absent", () => {
+    expect(buildBufferedTriggerPayload(baseInput).parentRunFriendlyId).toBeNull();
+    expect(
+      buildBufferedTriggerPayload({ ...baseInput, parentRunFriendlyId: "run_parent" })
+        .parentRunFriendlyId,
+    ).toBe("run_parent");
+  });
+});
diff --git a/apps/webapp/test/chat-snapshot-integration.test.ts b/apps/webapp/test/chat-snapshot-integration.test.ts
index 1d500e16b90..c2a5dcce98d 100644
--- a/apps/webapp/test/chat-snapshot-integration.test.ts
+++ b/apps/webapp/test/chat-snapshot-integration.test.ts
@@ -26,6 +26,7 @@ import {
 import type { UIMessage } from "ai";
 import { afterEach, describe, expect, vi } from "vitest";
 import { env } from "~/env.server";
+import { chatSnapshotStoragePathForSession } from "~/services/realtime/chatSnapshot.server";
 import { generatePresignedUrl } from "~/v3/objectStore.server";
 
 vi.setConfig({ testTimeout: 60_000 });
@@ -54,22 +55,21 @@ function makeSnapshot(opts: { messages?: UIMessage[]; lastOutEventId?: string }
 
 /**
  * Stub `apiClientManager.clientOrThrow()` so the SDK helpers see a fake
- * api client whose `getPayloadUrl` / `createUploadPayloadUrl` return
- * presigned URLs minted by the webapp's real `generatePresignedUrl`
- * (which signs against MinIO).
- *
- * The SDK helpers internally do `fetch(presignedUrl, ...)` to read/write
- * the blob, so MinIO ends up holding the actual bytes.
+ * api client. Mirrors the snapshot-url route: derive the canonical
+ * `sessions/{id}/snapshot.json` key (with optional default-protocol
+ * prefix) and sign it via `generatePresignedUrl` against MinIO.
  */
 function stubApiClient(opts: { projectRef: string; envSlug: string }) {
   vi.spyOn(apiClientManager, "clientOrThrow").mockReturnValue({
-    async getPayloadUrl(filename: string) {
-      const result = await generatePresignedUrl(opts.projectRef, opts.envSlug, filename, "GET");
+    async getChatSnapshotUrl(sessionId: string) {
+      const key = chatSnapshotStoragePathForSession(sessionId);
+      const result = await generatePresignedUrl(opts.projectRef, opts.envSlug, key, "GET");
       if (!result.success) throw new Error(result.error);
       return { presignedUrl: result.url };
     },
-    async createUploadPayloadUrl(filename: string) {
-      const result = await generatePresignedUrl(opts.projectRef, opts.envSlug, filename, "PUT");
+    async createChatSnapshotUploadUrl(sessionId: string) {
+      const key = chatSnapshotStoragePathForSession(sessionId);
+      const result = await generatePresignedUrl(opts.projectRef, opts.envSlug, key, "PUT");
       if (!result.success) throw new Error(result.error);
       return { presignedUrl: result.url };
     },
diff --git a/apps/webapp/test/detectbadJsonStrings.test.ts b/apps/webapp/test/detectbadJsonStrings.test.ts
index 7d14bf4aee8..f3d10037c47 100644
--- a/apps/webapp/test/detectbadJsonStrings.test.ts
+++ b/apps/webapp/test/detectbadJsonStrings.test.ts
@@ -180,6 +180,72 @@ describe("detectBadJsonStrings", () => {
     // The difference should be reasonable (not more than 5x)
     expect(noUnicodeTime / withUnicodeTime).toBeLessThan(5);
   });
+
+  describe("full UTF-16 low-surrogate range coverage (U+DC00–U+DFFF)", () => {
+    // Regression guard: a previous version of this scanner used `[cd]` to
+    // match the low-surrogate nibble, missing the entire U+DE00–U+DFFF
+    // half of the range. Valid surrogate pairs with low surrogates in that
+    // upper half (which includes most common emoji) were falsely flagged,
+    // and lone surrogates in the upper half were falsely passed.
+
+    it("does NOT flag a valid pair with low surrogate in the c range (U+DC00–U+DCFF)", () => {
+      // 🐍 SNAKE = U+1F40D = 🐍
+      expect(detectBadJsonStrings(`{"s":"\\ud83d\\udc0d"}`)).toBe(false);
+    });
+
+    it("does NOT flag a valid pair with low surrogate in the d range (U+DD00–U+DDFF)", () => {
+      // U+1F540 = 🕀
+      expect(detectBadJsonStrings(`{"s":"\\ud83d\\udd40"}`)).toBe(false);
+    });
+
+    it("does NOT flag a valid pair with low surrogate in the e range (U+DE00–U+DEFF)", () => {
+      // 😀 GRINNING FACE = U+1F600 = 😀 — previously false-flagged
+      expect(detectBadJsonStrings(`{"s":"\\ud83d\\ude00"}`)).toBe(false);
+    });
+
+    it("does NOT flag a valid pair with low surrogate in the f range (U+DF00–U+DFFF)", () => {
+      // U+1F700 = 🜀 — previously false-flagged
+      expect(detectBadJsonStrings(`{"s":"\\ud83d\\udf00"}`)).toBe(false);
+    });
+
+    it("flags a lone low surrogate in the e range (\\uDE00)", () => {
+      // Previously this was NOT flagged because the forward scan only
+      // recognised low surrogates with third nibble === "c" || "d".
+      expect(detectBadJsonStrings(`{"s":"prefix \\ude00 suffix"}`)).toBe(true);
+    });
+
+    it("flags a lone low surrogate in the f range (\\uDFFF)", () => {
+      expect(detectBadJsonStrings(`{"s":"prefix \\udfff suffix"}`)).toBe(true);
+    });
+
+    it("flags a high surrogate followed by something that looks like a low surrogate but is in the e range with a missing prefix", () => {
+      // The previous high-surrogate-then-pair check used `[cd]` for the
+      // matching low surrogate nibble, so any high surrogate followed by
+      // \uDe.. would be falsely flagged as unpaired. Verify the fix works
+      // for the valid case AND still flags genuinely broken inputs.
+      expect(detectBadJsonStrings(`{"s":"\\ud800X"}`)).toBe(true); // truly broken
+      expect(detectBadJsonStrings(`{"s":"\\ud83d\\ude00"}`)).toBe(false); // valid, but used to flag
+    });
+  });
+
+  describe("integration with JSON.stringify", () => {
+    it("does NOT flag JSON.stringify of a valid emoji 😀", () => {
+      // V8 emits the raw character for valid surrogate pairs, so the
+      // fast-path returns false without exercising the regex.
+      expect(detectBadJsonStrings(JSON.stringify("😀"))).toBe(false);
+    });
+
+    it("flags JSON.stringify of a lone high surrogate", () => {
+      expect(detectBadJsonStrings(JSON.stringify("\uD800"))).toBe(true);
+    });
+
+    it("flags JSON.stringify of a lone low surrogate in each of c/d/e/f ranges", () => {
+      expect(detectBadJsonStrings(JSON.stringify("\uDC00"))).toBe(true);
+      expect(detectBadJsonStrings(JSON.stringify("\uDD00"))).toBe(true);
+      expect(detectBadJsonStrings(JSON.stringify("\uDE00"))).toBe(true);
+      expect(detectBadJsonStrings(JSON.stringify("\uDFFF"))).toBe(true);
+    });
+  });
 });
 
 function processPacket(data: string): { data?: string; dataType?: string } {
diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts
index 798e39e0601..d07909d2907 100644
--- a/apps/webapp/test/engine/triggerTask.test.ts
+++ b/apps/webapp/test/engine/triggerTask.test.ts
@@ -1174,6 +1174,576 @@ describe("RunEngineTriggerTaskService", () => {
       await engine.quit();
     }
   );
+
+  // ─── Mollifier integration ──────────────────────────────────────────────────
+  //
+  // These tests pin the call-site behaviour of the mollifier hooks inside
+  // RunEngineTriggerTaskService.call. They use the optional DI ports
+  // (`evaluateGate`, `getMollifierBuffer`) added on the service constructor —
+  // production wiring is unchanged (defaults to the live module-level imports).
+  // Each test's regression intent lives in its own setup comment.
+
+  class CapturingMollifierBuffer {
+    public accepted: Array<{ runId: string; envId: string; orgId: string; payload: string }> = [];
+    async accept(input: { runId: string; envId: string; orgId: string; payload: string }) {
+      this.accepted.push(input);
+      return true;
+    }
+    async pop() { return null; }
+    async ack() {}
+    async requeue() {}
+    async fail() { return false; }
+    async getEntry() { return null; }
+    async listEnvs(): Promise<string[]> { return []; }
+    async getEntryTtlSeconds(): Promise<number> { return -1; }
+    async evaluateTrip() { return { tripped: false, count: 0 }; }
+    async close() {}
+  }
+
+  containerTest(
+    "mollifier · validation throws before the gate is consulted; no buffer write",
+    async ({ prisma, redisOptions }) => {
+      const engine = new RunEngine({
+        prisma,
+        worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 },
+        queue: { redis: redisOptions },
+        runLock: { redis: redisOptions },
+        machines: {
+          defaultMachine: "small-1x",
+          machines: { "small-1x": { name: "small-1x" as const, cpu: 0.5, memory: 0.5, centsPerMs: 0.0001 } },
+          baseCostInCents: 0.0005,
+        },
+        tracer: trace.getTracer("test", "0.0.0"),
+      });
+
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+      const taskIdentifier = "test-task";
+      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+
+      // Validator that fails on maxAttempts. Any validation throw must abort
+      // the call BEFORE the gate runs — otherwise the gate could leak a
+      // buffer write for an invalid request.
+      class FailingMaxAttemptsValidator extends MockTriggerTaskValidator {
+        validateMaxAttempts(): ValidationResult {
+          return { ok: false, error: new Error("synthetic max-attempts failure") };
+        }
+      }
+
+      const buffer = new CapturingMollifierBuffer();
+      const evaluateGateSpy = vi.fn(async () => ({ action: "mollify" as const, decision: {
+        divert: true as const, reason: "per_env_rate" as const, count: 99, threshold: 1, windowMs: 200, holdMs: 500,
+      } }));
+
+      const triggerTaskService = new RunEngineTriggerTaskService({
+        engine,
+        prisma,
+        payloadProcessor: new MockPayloadProcessor(),
+        queueConcern: new DefaultQueueManager(prisma, engine),
+        idempotencyKeyConcern: new IdempotencyKeyConcern(prisma, engine, new MockTraceEventConcern()),
+        validator: new FailingMaxAttemptsValidator(),
+        traceEventConcern: new MockTraceEventConcern(),
+        tracer: trace.getTracer("test", "0.0.0"),
+        metadataMaximumSize: 1024 * 1024,
+        evaluateGate: evaluateGateSpy,
+        getMollifierBuffer: () => buffer as never,
+        isMollifierGloballyEnabled: () => true,
+      });
+
+      await expect(
+        triggerTaskService.call({
+          taskId: taskIdentifier,
+          environment: authenticatedEnvironment,
+          body: { payload: { test: "x" } },
+        }),
+      ).rejects.toThrow(/synthetic max-attempts failure/);
+
+      // Critical: the gate must NEVER be consulted when validation fails.
+      // If this assertion fires, validation has been re-ordered after the
+      // mollifier gate — a regression that would let invalid triggers land
+      // in the buffer.
+      expect(evaluateGateSpy).not.toHaveBeenCalled();
+      expect(buffer.accepted).toHaveLength(0);
+
+      await engine.quit();
+    },
+  );
+
+  containerTest(
+    "mollifier · mollify action triggers dual-write (buffer.accept + engine.trigger)",
+    async ({ prisma, redisOptions }) => {
+      const engine = new RunEngine({
+        prisma,
+        worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 },
+        queue: { redis: redisOptions },
+        runLock: { redis: redisOptions },
+        machines: {
+          defaultMachine: "small-1x",
+          machines: { "small-1x": { name: "small-1x" as const, cpu: 0.5, memory: 0.5, centsPerMs: 0.0001 } },
+          baseCostInCents: 0.0005,
+        },
+        tracer: trace.getTracer("test", "0.0.0"),
+      });
+
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+      const taskIdentifier = "test-task";
+      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+
+      const buffer = new CapturingMollifierBuffer();
+      const trippedDecision = {
+        divert: true as const,
+        reason: "per_env_rate" as const,
+        count: 150,
+        threshold: 100,
+        windowMs: 200,
+        holdMs: 500,
+      };
+
+      const triggerTaskService = new RunEngineTriggerTaskService({
+        engine,
+        prisma,
+        payloadProcessor: new MockPayloadProcessor(),
+        queueConcern: new DefaultQueueManager(prisma, engine),
+        idempotencyKeyConcern: new IdempotencyKeyConcern(prisma, engine, new MockTraceEventConcern()),
+        validator: new MockTriggerTaskValidator(),
+        traceEventConcern: new MockTraceEventConcern(),
+        tracer: trace.getTracer("test", "0.0.0"),
+        metadataMaximumSize: 1024 * 1024,
+        evaluateGate: async () => ({ action: "mollify", decision: trippedDecision }),
+        getMollifierBuffer: () => buffer as never,
+        isMollifierGloballyEnabled: () => true,
+      });
+
+      const result = await triggerTaskService.call({
+        taskId: taskIdentifier,
+        environment: authenticatedEnvironment,
+        body: { payload: { hello: "world" } },
+      });
+
+      // engine.trigger ran — Postgres has the run
+      expect(result).toBeDefined();
+      expect(result?.run.friendlyId).toBeDefined();
+      const pgRun = await prisma.taskRun.findFirst({ where: { id: result!.run.id } });
+      expect(pgRun).not.toBeNull();
+      expect(pgRun!.friendlyId).toBe(result!.run.friendlyId);
+
+      // buffer.accept ran — Redis has the audit copy under the same friendlyId
+      expect(buffer.accepted).toHaveLength(1);
+      expect(buffer.accepted[0]!.runId).toBe(result!.run.friendlyId);
+      expect(buffer.accepted[0]!.envId).toBe(authenticatedEnvironment.id);
+      expect(buffer.accepted[0]!.orgId).toBe(authenticatedEnvironment.organizationId);
+
+      // payload is the canonical replay shape
+      const payload = JSON.parse(buffer.accepted[0]!.payload);
+      expect(payload.runFriendlyId).toBe(result!.run.friendlyId);
+      expect(payload.taskId).toBe(taskIdentifier);
+      expect(payload.envId).toBe(authenticatedEnvironment.id);
+      expect(payload.body).toEqual({ payload: { hello: "world" } });
+
+      await engine.quit();
+    },
+  );
+
+  containerTest(
+    "mollifier · pass_through action does NOT call buffer.accept",
+    async ({ prisma, redisOptions }) => {
+      const engine = new RunEngine({
+        prisma,
+        worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 },
+        queue: { redis: redisOptions },
+        runLock: { redis: redisOptions },
+        machines: {
+          defaultMachine: "small-1x",
+          machines: { "small-1x": { name: "small-1x" as const, cpu: 0.5, memory: 0.5, centsPerMs: 0.0001 } },
+          baseCostInCents: 0.0005,
+        },
+        tracer: trace.getTracer("test", "0.0.0"),
+      });
+
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+      const taskIdentifier = "test-task";
+      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+
+      const buffer = new CapturingMollifierBuffer();
+      const getBufferSpy = vi.fn(() => buffer as never);
+
+      const triggerTaskService = new RunEngineTriggerTaskService({
+        engine,
+        prisma,
+        payloadProcessor: new MockPayloadProcessor(),
+        queueConcern: new DefaultQueueManager(prisma, engine),
+        idempotencyKeyConcern: new IdempotencyKeyConcern(prisma, engine, new MockTraceEventConcern()),
+        validator: new MockTriggerTaskValidator(),
+        traceEventConcern: new MockTraceEventConcern(),
+        tracer: trace.getTracer("test", "0.0.0"),
+        metadataMaximumSize: 1024 * 1024,
+        evaluateGate: async () => ({ action: "pass_through" }),
+        getMollifierBuffer: getBufferSpy,
+        isMollifierGloballyEnabled: () => true,
+      });
+
+      const result = await triggerTaskService.call({
+        taskId: taskIdentifier,
+        environment: authenticatedEnvironment,
+        body: { payload: { test: "x" } },
+      });
+
+      expect(result).toBeDefined();
+      // Postgres has the run, no buffer side-effects
+      expect(buffer.accepted).toHaveLength(0);
+      // getMollifierBuffer must not be called either — the call site short-circuits
+      // before touching the singleton when the gate says pass_through.
+      expect(getBufferSpy).not.toHaveBeenCalled();
+
+      await engine.quit();
+    },
+  );
+
+  containerTest(
+    "mollifier · engine.trigger throwing AFTER buffer.accept leaves an orphan entry (documented behaviour)",
+    async ({ prisma, redisOptions }) => {
+      // SCENARIO: dual-write where buffer.accept succeeds but engine.trigger
+      // throws. The throw propagates to the caller (correct: customer sees
+      // the same 4xx as today), and the buffer entry remains as an "orphan"
+      // — Phase 1's no-op drainer will pop+ack it on its next poll, so the
+      // orphan is bounded (~drainer pollIntervalMs) but observable in the
+      // audit trail (mollifier.buffered with no matching TaskRun).
+      //
+      // Why engine.trigger can throw post-buffer:
+      //   - RunDuplicateIdempotencyKeyError (Prisma P2002 on idempotencyKey):
+      //     a concurrent non-mollified trigger with the same idempotencyKey
+      //     wins the DB UNIQUE constraint between IdempotencyKeyConcern's
+      //     pre-check and engine.trigger's INSERT.
+      //   - RunOneTimeUseTokenError (Prisma P2002 on oneTimeUseToken).
+      //   - Transient Prisma errors (FK constraint, connection drop, etc.).
+      //
+      // Why we don't "fix" this race in Phase 1:
+      //   The customer correctly gets the error. State eventually converges
+      //   (drainer pops the orphan). The audit-trail explicitly surfaces
+      //   "buffered without TaskRun" entries to operators. A real fix is
+      //   Phase 2's responsibility once the buffer becomes the primary write
+      //   — at that point we add the mollifier-specific idempotency index.
+      //
+      // This test pins the current ordering: buffer.accept fires synchronously
+      // BEFORE engine.trigger, and engine.trigger failure does NOT roll back
+      // the buffer write. Any future change that reverses the order or adds
+      // a silent rollback will fail this assertion and force a design
+      // decision rather than a silent behaviour change.
+
+      const engine = new RunEngine({
+        prisma,
+        worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 },
+        queue: { redis: redisOptions },
+        runLock: { redis: redisOptions },
+        machines: {
+          defaultMachine: "small-1x",
+          machines: { "small-1x": { name: "small-1x" as const, cpu: 0.5, memory: 0.5, centsPerMs: 0.0001 } },
+          baseCostInCents: 0.0005,
+        },
+        tracer: trace.getTracer("test", "0.0.0"),
+      });
+
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+      const taskIdentifier = "test-task";
+      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+
+      const buffer = new CapturingMollifierBuffer();
+
+      // Force engine.trigger to throw on this single call. We spy AFTER
+      // setupBackgroundWorker so the worker setup still uses the real
+      // engine.trigger (which has its own engine.trigger-ish calls for
+      // worker bootstrap — though in practice setupBackgroundWorker doesn't
+      // call trigger).
+      const simulatedFailure = new Error("simulated engine.trigger failure post-buffer");
+      vi.spyOn(engine, "trigger").mockRejectedValueOnce(simulatedFailure);
+
+      const triggerTaskService = new RunEngineTriggerTaskService({
+        engine,
+        prisma,
+        payloadProcessor: new MockPayloadProcessor(),
+        queueConcern: new DefaultQueueManager(prisma, engine),
+        idempotencyKeyConcern: new IdempotencyKeyConcern(prisma, engine, new MockTraceEventConcern()),
+        validator: new MockTriggerTaskValidator(),
+        traceEventConcern: new MockTraceEventConcern(),
+        tracer: trace.getTracer("test", "0.0.0"),
+        metadataMaximumSize: 1024 * 1024,
+        evaluateGate: async () => ({
+          action: "mollify",
+          decision: {
+            divert: true,
+            reason: "per_env_rate",
+            count: 150,
+            threshold: 100,
+            windowMs: 200,
+            holdMs: 500,
+          },
+        }),
+        getMollifierBuffer: () => buffer as never,
+        isMollifierGloballyEnabled: () => true,
+      });
+
+      await expect(
+        triggerTaskService.call({
+          taskId: taskIdentifier,
+          environment: authenticatedEnvironment,
+          body: { payload: { test: "x" } },
+        }),
+      ).rejects.toThrow(/simulated engine.trigger failure post-buffer/);
+
+      // The buffer write happened BEFORE engine.trigger threw. The orphan
+      // remains; the audit-trail will surface it (mollifier.buffered with
+      // no matching TaskRun row). Phase 1's no-op drainer cleans it up.
+      expect(buffer.accepted).toHaveLength(1);
+      const orphanPayload = JSON.parse(buffer.accepted[0]!.payload);
+      expect(orphanPayload.taskId).toBe(taskIdentifier);
+
+      await engine.quit();
+    },
+  );
+
+  containerTest(
+    "mollifier · idempotency-key match short-circuits BEFORE the gate is consulted",
+    async ({ prisma, redisOptions }) => {
+      // SCENARIO: a trigger arrives with an idempotency key matching an
+      // already-created run. `IdempotencyKeyConcern.handleTriggerRequest`
+      // (line 236 of triggerTask.server.ts) detects the match BEFORE the
+      // mollifier gate runs and returns `{ isCached: true, run }`. The
+      // service early-returns. The gate is never consulted, buffer.accept
+      // never fires, no orphan entry is created.
+      //
+      // Regression intent: if IdempotencyKeyConcern were re-ordered to run
+      // AFTER evaluateGate, every idempotent retry on a flagged org would
+      // produce an orphan buffer entry — the audit-trail invariant ("every
+      // buffered runId has a matching TaskRun") would silently start failing
+      // for retries. This test pins the current order.
+
+      const engine = new RunEngine({
+        prisma,
+        worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 },
+        queue: { redis: redisOptions },
+        runLock: { redis: redisOptions },
+        machines: {
+          defaultMachine: "small-1x",
+          machines: { "small-1x": { name: "small-1x" as const, cpu: 0.5, memory: 0.5, centsPerMs: 0.0001 } },
+          baseCostInCents: 0.0005,
+        },
+        tracer: trace.getTracer("test", "0.0.0"),
+      });
+
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+      const taskIdentifier = "test-task";
+      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+
+      const idempotencyKeyConcern = new IdempotencyKeyConcern(
+        prisma,
+        engine,
+        new MockTraceEventConcern(),
+      );
+
+      // Setup: normal trigger to create the cached run (no mollifier).
+      const baseline = new RunEngineTriggerTaskService({
+        engine,
+        prisma,
+        payloadProcessor: new MockPayloadProcessor(),
+        queueConcern: new DefaultQueueManager(prisma, engine),
+        idempotencyKeyConcern,
+        validator: new MockTriggerTaskValidator(),
+        traceEventConcern: new MockTraceEventConcern(),
+        tracer: trace.getTracer("test", "0.0.0"),
+        metadataMaximumSize: 1024 * 1024,
+      });
+      const first = await baseline.call({
+        taskId: taskIdentifier,
+        environment: authenticatedEnvironment,
+        body: { payload: { test: "x" }, options: { idempotencyKey: "regression-key-5" } },
+      });
+      expect(first?.isCached).toBe(false);
+
+      // Action: same idempotency key, with a mollify-stub gate that WOULD
+      // create an orphan if reached. The concern must short-circuit first.
+      const buffer = new CapturingMollifierBuffer();
+      const evaluateGateSpy = vi.fn(async () => ({
+        action: "mollify" as const,
+        decision: {
+          divert: true as const,
+          reason: "per_env_rate" as const,
+          count: 150,
+          threshold: 100,
+          windowMs: 200,
+          holdMs: 500,
+        },
+      }));
+
+      const mollifierService = new RunEngineTriggerTaskService({
+        engine,
+        prisma,
+        payloadProcessor: new MockPayloadProcessor(),
+        queueConcern: new DefaultQueueManager(prisma, engine),
+        idempotencyKeyConcern,
+        validator: new MockTriggerTaskValidator(),
+        traceEventConcern: new MockTraceEventConcern(),
+        tracer: trace.getTracer("test", "0.0.0"),
+        metadataMaximumSize: 1024 * 1024,
+        evaluateGate: evaluateGateSpy,
+        getMollifierBuffer: () => buffer as never,
+        isMollifierGloballyEnabled: () => true,
+      });
+
+      const cached = await mollifierService.call({
+        taskId: taskIdentifier,
+        environment: authenticatedEnvironment,
+        body: { payload: { test: "x" }, options: { idempotencyKey: "regression-key-5" } },
+      });
+
+      // Customer sees the cached run, isCached=true
+      expect(cached).toBeDefined();
+      expect(cached?.isCached).toBe(true);
+      expect(cached?.run.friendlyId).toBe(first?.run.friendlyId);
+
+      // Critical: the gate must NEVER be consulted on a cached-idempotency replay.
+      expect(evaluateGateSpy).not.toHaveBeenCalled();
+      expect(buffer.accepted).toHaveLength(0);
+
+      await engine.quit();
+    },
+  );
+
+  containerTest(
+    "mollifier · debounce match produces an orphan buffer entry (documented behaviour)",
+    async ({ prisma, redisOptions }) => {
+      // SCENARIO: a trigger with a debounce key arrives while a matching
+      // debounced run already exists. `debounceSystem.handleDebounce` runs
+      // INSIDE `engine.trigger` (line ~514 of run-engine/src/engine/index.ts),
+      // AFTER buffer.accept has already written the new friendlyId. The
+      // service correctly returns the existing run id to the customer, but
+      // the buffer is left with an orphan entry for the new friendlyId.
+      //
+      // Why this is acceptable in Phase 1:
+      //   - Customer-facing behaviour is unchanged from today: they receive
+      //     the existing run id, same as the non-mollified path.
+      //   - The orphan is bounded — the drainer's no-op-ack handler pops
+      //     and acks it on its next poll.
+      //   - The audit-trail surfaces it: a `mollifier.buffered` log line
+      //     with `runId` that has no matching TaskRun in Postgres.
+      //
+      // Why Phase 2 cares:
+      //   - When the buffer becomes the primary write path, debounce can
+      //     no longer be allowed to run AFTER buffer.accept. The drainer's
+      //     engine.trigger replay would observe "existing" and skip the
+      //     persist — the customer's synthesised 200 (with the new
+      //     friendlyId) would never get a TaskRun, and the audit-trail
+      //     divergence becomes a real data-loss bug.
+      //   - Phase 2 must lift `handleDebounce` into the call site BEFORE
+      //     buffer.accept:
+      //       1. handleDebounce → if existing, return existing run; do NOT
+      //          touch the buffer.
+      //       2. Otherwise, accept with `claimId` threaded into the
+      //          canonical payload so the drainer's replay can
+      //          `registerDebouncedRun` after persisting.
+      //
+      // This test pins the current ordering. A future change that "fixes"
+      // it by lifting handleDebounce upfront will fail the orphan
+      // assertion below and force an explicit choice (update the test,
+      // remove this scenario, or stage the lift behind a flag).
+
+      const engine = new RunEngine({
+        prisma,
+        worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 },
+        queue: { redis: redisOptions },
+        runLock: { redis: redisOptions },
+        machines: {
+          defaultMachine: "small-1x",
+          machines: { "small-1x": { name: "small-1x" as const, cpu: 0.5, memory: 0.5, centsPerMs: 0.0001 } },
+          baseCostInCents: 0.0005,
+        },
+        tracer: trace.getTracer("test", "0.0.0"),
+      });
+
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+      const taskIdentifier = "test-task";
+      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+
+      const idempotencyKeyConcern = new IdempotencyKeyConcern(
+        prisma,
+        engine,
+        new MockTraceEventConcern(),
+      );
+
+      // Setup: trigger with debounce — creates the existing run + Redis claim.
+      const baseline = new RunEngineTriggerTaskService({
+        engine,
+        prisma,
+        payloadProcessor: new MockPayloadProcessor(),
+        queueConcern: new DefaultQueueManager(prisma, engine),
+        idempotencyKeyConcern,
+        validator: new MockTriggerTaskValidator(),
+        traceEventConcern: new MockTraceEventConcern(),
+        tracer: trace.getTracer("test", "0.0.0"),
+        metadataMaximumSize: 1024 * 1024,
+      });
+      const first = await baseline.call({
+        taskId: taskIdentifier,
+        environment: authenticatedEnvironment,
+        body: {
+          payload: { test: "x" },
+          options: { debounce: { key: "regression-debounce-6", delay: "30s" } },
+        },
+      });
+      expect(first?.run.friendlyId).toBeDefined();
+
+      // Action: same debounce key, mollify-stub gate.
+      const buffer = new CapturingMollifierBuffer();
+      const mollifierService = new RunEngineTriggerTaskService({
+        engine,
+        prisma,
+        payloadProcessor: new MockPayloadProcessor(),
+        queueConcern: new DefaultQueueManager(prisma, engine),
+        idempotencyKeyConcern,
+        validator: new MockTriggerTaskValidator(),
+        traceEventConcern: new MockTraceEventConcern(),
+        tracer: trace.getTracer("test", "0.0.0"),
+        metadataMaximumSize: 1024 * 1024,
+        evaluateGate: async () => ({
+          action: "mollify",
+          decision: {
+            divert: true,
+            reason: "per_env_rate",
+            count: 150,
+            threshold: 100,
+            windowMs: 200,
+            holdMs: 500,
+          },
+        }),
+        getMollifierBuffer: () => buffer as never,
+        isMollifierGloballyEnabled: () => true,
+      });
+
+      const debounced = await mollifierService.call({
+        taskId: taskIdentifier,
+        environment: authenticatedEnvironment,
+        body: {
+          payload: { test: "x" },
+          options: { debounce: { key: "regression-debounce-6", delay: "30s" } },
+        },
+      });
+
+      // Customer-facing behaviour: the existing run is returned (correct).
+      expect(debounced).toBeDefined();
+      expect(debounced?.run.friendlyId).toBe(first?.run.friendlyId);
+
+      // Orphan: buffer.accept fired with the new friendlyId we generated
+      // upfront, and that friendlyId has no matching TaskRun in Postgres
+      // because engine.trigger returned the existing run via debounce.
+      expect(buffer.accepted).toHaveLength(1);
+      expect(buffer.accepted[0]!.runId).not.toBe(first?.run.friendlyId);
+      const orphanFriendlyId = buffer.accepted[0]!.runId;
+      const orphanRow = await prisma.taskRun.findFirst({
+        where: { friendlyId: orphanFriendlyId },
+      });
+      expect(orphanRow).toBeNull();
+
+      await engine.quit();
+    },
+  );
 });
 
 describe("DefaultQueueManager task metadata cache", () => {
diff --git a/apps/webapp/test/httpErrors.test.ts b/apps/webapp/test/httpErrors.test.ts
new file mode 100644
index 00000000000..dab90bdb024
--- /dev/null
+++ b/apps/webapp/test/httpErrors.test.ts
@@ -0,0 +1,28 @@
+import { describe, expect, it } from "vitest";
+import { throwNotFound } from "~/utils/httpErrors";
+
+describe("throwNotFound", () => {
+  it("throws a Response with status 404 and the provided statusText", () => {
+    let thrown: unknown;
+    try {
+      throwNotFound("Environment not found");
+    } catch (e) {
+      thrown = e;
+    }
+
+    expect(thrown).toBeInstanceOf(Response);
+    expect((thrown as Response).status).toBe(404);
+    expect((thrown as Response).statusText).toBe("Environment not found");
+  });
+
+  it("passes through whatever statusText the caller provides", () => {
+    let thrown: unknown;
+    try {
+      throwNotFound("Project not found");
+    } catch (e) {
+      thrown = e;
+    }
+
+    expect((thrown as Response).statusText).toBe("Project not found");
+  });
+});
diff --git a/apps/webapp/test/mollifierDrainerWorker.test.ts b/apps/webapp/test/mollifierDrainerWorker.test.ts
new file mode 100644
index 00000000000..e5f38229d8f
--- /dev/null
+++ b/apps/webapp/test/mollifierDrainerWorker.test.ts
@@ -0,0 +1,72 @@
+import { describe, expect, it } from "vitest";
+import { MollifierConfigurationError } from "~/v3/mollifier/mollifierDrainer.server";
+import { initMollifierDrainerWorker } from "~/v3/mollifierDrainerWorker.server";
+
+// Pins the error-classification policy inside the bootstrap's catch:
+// deterministic misconfig errors propagate (so a deploy fails loud
+// rather than silently disabling the drainer), and anything else is
+// logged-and-swallowed (so a transient Redis blip during boot doesn't
+// take the whole webapp down). The corresponding production-path
+// integration is the call at `entry.server.tsx`: a sync throw out of
+// `initMollifierDrainerWorker` propagates to the module top level
+// BEFORE `process.on("uncaughtException", ...)` is registered, so Node
+// crashes with a stack trace and exit code 1 — which is exactly what we
+// want from the orchestrator's health-check perspective.
+describe("initMollifierDrainerWorker error classification", () => {
+  it("rethrows MollifierConfigurationError so the process can crash on misconfig", () => {
+    const misconfig = new MollifierConfigurationError(
+      "TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS must be at least 1000ms below GRACEFUL_SHUTDOWN_TIMEOUT",
+    );
+
+    expect(() =>
+      initMollifierDrainerWorker({
+        isEnabled: () => true,
+        getDrainer: () => {
+          throw misconfig;
+        },
+      }),
+    ).toThrow(MollifierConfigurationError);
+  });
+
+  it("rethrows when the error carries the marker name even if instanceof fails (dev-realm hot-reload fallback)", () => {
+    // Simulate the cross-realm case where the consumer's instanceof
+    // check sees a different class instance from the one the throw
+    // site used. The bootstrap's `.name === "MollifierConfigurationError"`
+    // fallback must catch this so dev hot-reload doesn't silently
+    // suppress misconfig errors.
+    const cousin = new Error("buffer not initialised");
+    cousin.name = "MollifierConfigurationError";
+
+    expect(() =>
+      initMollifierDrainerWorker({
+        isEnabled: () => true,
+        getDrainer: () => {
+          throw cousin;
+        },
+      }),
+    ).toThrow(cousin);
+  });
+
+  it("swallows non-configuration errors so transient init failures don't take the webapp down", () => {
+    expect(() =>
+      initMollifierDrainerWorker({
+        isEnabled: () => true,
+        getDrainer: () => {
+          throw new Error("transient redis blip during buffer init");
+        },
+      }),
+    ).not.toThrow();
+  });
+
+  it("is a no-op when the drainer is disabled for this replica", () => {
+    let factoryCalled = false;
+    initMollifierDrainerWorker({
+      isEnabled: () => false,
+      getDrainer: () => {
+        factoryCalled = true;
+        return null;
+      },
+    });
+    expect(factoryCalled).toBe(false);
+  });
+});
diff --git a/apps/webapp/test/mollifierGate.test.ts b/apps/webapp/test/mollifierGate.test.ts
new file mode 100644
index 00000000000..b81df7f0c5b
--- /dev/null
+++ b/apps/webapp/test/mollifierGate.test.ts
@@ -0,0 +1,434 @@
+import { describe, expect, it, vi } from "vitest";
+
+// Stub `~/db.server` before importing anything that transitively imports it.
+// The real module eagerly calls `prisma.$connect()` at singleton construction
+// (db.server.ts), so loading it under vitest tries to reach localhost:5432
+// and surfaces as an unhandled rejection that fails the whole shard — even
+// though no test in this file actually uses the default prisma client.
+vi.mock("~/db.server", () => ({
+  prisma: {},
+  $replica: {},
+}));
+
+import {
+  evaluateGate,
+  makeResolveMollifierFlag,
+  type GateDependencies,
+  type GateInputs,
+  type TripDecision,
+} from "~/v3/mollifier/mollifierGate.server";
+import type { DecisionOutcome, DecisionReason } from "~/v3/mollifier/mollifierTelemetry.server";
+
+// We deliberately don't use vi.fn here. Per repo policy tests shouldn't lean on
+// mock frameworks for behaviours that are pure functions of the inputs — the
+// gate is pure decision logic, so a hand-rolled "deps + spy log" wired with
+// plain closures gives exactly the assertions we need without the indirection.
+type Spies = {
+  evaluatorCalls: number;
+  logShadowCalls: Array<{ inputs: GateInputs; decision: Extract<TripDecision, { divert: true }> }>;
+  logMollifiedCalls: Array<{ inputs: GateInputs; decision: Extract<TripDecision, { divert: true }> }>;
+  recordDecisionCalls: Array<{ outcome: DecisionOutcome; reason?: DecisionReason }>;
+};
+
+type Toggles = {
+  enabled: boolean;
+  shadow: boolean;
+  flag: boolean;
+  decision: TripDecision;
+};
+
+function makeDeps(toggles: Toggles): { deps: GateDependencies; spies: Spies } {
+  const spies: Spies = {
+    evaluatorCalls: 0,
+    logShadowCalls: [],
+    logMollifiedCalls: [],
+    recordDecisionCalls: [],
+  };
+  const deps: GateDependencies = {
+    isMollifierEnabled: () => toggles.enabled,
+    isShadowModeOn: () => toggles.shadow,
+    resolveOrgFlag: async () => toggles.flag,
+    evaluator: async () => {
+      spies.evaluatorCalls += 1;
+      return toggles.decision;
+    },
+    logShadow: (inputs, decision) => {
+      spies.logShadowCalls.push({ inputs, decision });
+    },
+    logMollified: (inputs, decision) => {
+      spies.logMollifiedCalls.push({ inputs, decision });
+    },
+    recordDecision: (outcome, reason) => {
+      spies.recordDecisionCalls.push({ outcome, reason });
+    },
+  };
+  return { deps, spies };
+}
+
+const trippedDecision = {
+  divert: true as const,
+  reason: "per_env_rate" as const,
+  count: 150,
+  threshold: 100,
+  windowMs: 200,
+  holdMs: 500,
+};
+
+const passDecision: TripDecision = { divert: false };
+
+const inputs: GateInputs = {
+  envId: "e1",
+  orgId: "o1",
+  taskId: "t1",
+  orgFeatureFlags: null,
+};
+
+// Cascade truth table. Every combination of (enabled, shadow, flag, divert) is
+// enumerated. `evaluatorCalls` is the expected count, not arbitrary: the gate
+// short-circuits before the evaluator if `!enabled` or (`!flag && !shadow`).
+// `expectedReason` is the optional second arg to `recordDecision` — only
+// divert-true paths attach a reason.
+type Row = {
+  id: number;
+  enabled: boolean;
+  shadow: boolean;
+  flag: boolean;
+  divert: boolean;
+  expected: {
+    action: "pass_through" | "shadow_log" | "mollify";
+    evaluatorCalls: 0 | 1;
+    logShadowCalls: 0 | 1;
+    logMollifiedCalls: 0 | 1;
+    recordedOutcome: "pass_through" | "shadow_log" | "mollify";
+    expectedReason: "per_env_rate" | undefined;
+  };
+};
+
+// 16 rows = 2^4 input combinations. Comment column shows which gate branch
+// each row exercises so reviewers can map row → code at a glance.
+const cascade: Row[] = [
+  // enabled=F → kill-switch wins; evaluator+flag never consulted (rows 1-8)
+  { id: 1, enabled: false, shadow: false, flag: false, divert: false, expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 2, enabled: false, shadow: false, flag: false, divert: true,  expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 3, enabled: false, shadow: false, flag: true,  divert: false, expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 4, enabled: false, shadow: false, flag: true,  divert: true,  expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 5, enabled: false, shadow: true,  flag: false, divert: false, expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 6, enabled: false, shadow: true,  flag: false, divert: true,  expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 7, enabled: false, shadow: true,  flag: true,  divert: false, expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 8, enabled: false, shadow: true,  flag: true,  divert: true,  expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  // enabled=T, flag=F, shadow=F → both opt-ins off; evaluator never called (rows 9-10)
+  { id: 9, enabled: true,  shadow: false, flag: false, divert: false, expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 10, enabled: true, shadow: false, flag: false, divert: true,  expected: { action: "pass_through", evaluatorCalls: 0, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  // enabled=T, flag=F, shadow=T → shadow path; divert routes outcome (rows 11-12)
+  { id: 11, enabled: true, shadow: true,  flag: false, divert: false, expected: { action: "pass_through", evaluatorCalls: 1, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 12, enabled: true, shadow: true,  flag: false, divert: true,  expected: { action: "shadow_log",   evaluatorCalls: 1, logShadowCalls: 1, logMollifiedCalls: 0, recordedOutcome: "shadow_log",   expectedReason: "per_env_rate" } },
+  // enabled=T, flag=T, shadow=F → mollify path (rows 13-14)
+  { id: 13, enabled: true, shadow: false, flag: true,  divert: false, expected: { action: "pass_through", evaluatorCalls: 1, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 14, enabled: true, shadow: false, flag: true,  divert: true,  expected: { action: "mollify",      evaluatorCalls: 1, logShadowCalls: 0, logMollifiedCalls: 1, recordedOutcome: "mollify",      expectedReason: "per_env_rate" } },
+  // enabled=T, flag=T, shadow=T → flag wins over shadow (rows 15-16)
+  { id: 15, enabled: true, shadow: true,  flag: true,  divert: false, expected: { action: "pass_through", evaluatorCalls: 1, logShadowCalls: 0, logMollifiedCalls: 0, recordedOutcome: "pass_through", expectedReason: undefined } },
+  { id: 16, enabled: true, shadow: true,  flag: true,  divert: true,  expected: { action: "mollify",      evaluatorCalls: 1, logShadowCalls: 0, logMollifiedCalls: 1, recordedOutcome: "mollify",      expectedReason: "per_env_rate" } },
+];
+
+describe("evaluateGate cascade — exhaustive truth table", () => {
+  it.each(cascade)(
+    "row $id: enabled=$enabled shadow=$shadow flag=$flag divert=$divert → action=$expected.action",
+    async (row) => {
+      const { deps, spies } = makeDeps({
+        enabled: row.enabled,
+        shadow: row.shadow,
+        flag: row.flag,
+        decision: row.divert ? trippedDecision : passDecision,
+      });
+
+      const outcome = await evaluateGate(inputs, deps);
+
+      expect(outcome.action).toBe(row.expected.action);
+      expect(spies.evaluatorCalls).toBe(row.expected.evaluatorCalls);
+      expect(spies.logShadowCalls).toHaveLength(row.expected.logShadowCalls);
+      expect(spies.logMollifiedCalls).toHaveLength(row.expected.logMollifiedCalls);
+
+      // Every evaluation records exactly one decision.
+      expect(spies.recordDecisionCalls).toHaveLength(1);
+      expect(spies.recordDecisionCalls[0].outcome).toBe(row.expected.recordedOutcome);
+      expect(spies.recordDecisionCalls[0].reason).toBe(row.expected.expectedReason);
+    },
+  );
+
+  it("divert log carries the full decision (envId, orgId, taskId, reason, count, threshold, windowMs, holdMs)", async () => {
+    const { deps, spies } = makeDeps({
+      enabled: true,
+      shadow: true,
+      flag: false,
+      decision: trippedDecision,
+    });
+
+    await evaluateGate(inputs, deps);
+
+    expect(spies.logShadowCalls).toEqual([{ inputs, decision: trippedDecision }]);
+  });
+
+  it("mollify log carries the full decision (mirrors shadow log)", async () => {
+    const { deps, spies } = makeDeps({
+      enabled: true,
+      shadow: false,
+      flag: true,
+      decision: trippedDecision,
+    });
+
+    await evaluateGate(inputs, deps);
+
+    expect(spies.logMollifiedCalls).toEqual([{ inputs, decision: trippedDecision }]);
+  });
+});
+
+// Hot-path guard: `triggerTask.server.ts` calls `evaluateGate` on every
+// trigger when `TRIGGER_MOLLIFIER_ENABLED=1`. The per-org override path must resolve
+// without a Prisma round-trip — otherwise the gate adds a DB query to the
+// highest-throughput code path in the system (see apps/webapp/CLAUDE.md).
+describe("resolveMollifierFlag — hot path", () => {
+  it("returns the per-org override when it's set", async () => {
+    const resolve = makeResolveMollifierFlag();
+
+    const enabled = await resolve({
+      envId: "e",
+      orgId: "o",
+      taskId: "t",
+      orgFeatureFlags: { mollifierEnabled: true },
+    });
+    const disabled = await resolve({
+      envId: "e",
+      orgId: "o",
+      taskId: "t",
+      orgFeatureFlags: { mollifierEnabled: false },
+    });
+
+    expect(enabled).toBe(true);
+    expect(disabled).toBe(false);
+  });
+
+  it("returns false when the org has no override for the key — no DB query, ever", async () => {
+    // Regression intent: the resolver MUST NOT call `flag()` (which would
+    // query `FeatureFlag` via Prisma) on the trigger hot path. Per-org
+    // rollout via `Organization.featureFlags` JSON is the only enable
+    // path; the fleet-wide kill switch is `TRIGGER_MOLLIFIER_ENABLED`.
+    const resolve = makeResolveMollifierFlag();
+
+    const fromNull = await resolve({
+      envId: "e",
+      orgId: "o",
+      taskId: "t",
+      orgFeatureFlags: null,
+    });
+    const fromUnrelatedKeys = await resolve({
+      envId: "e",
+      orgId: "o",
+      taskId: "t",
+      orgFeatureFlags: { hasAiAccess: true },
+    });
+
+    expect(fromNull).toBe(false);
+    expect(fromUnrelatedKeys).toBe(false);
+  });
+});
+
+describe("evaluateGate — fail open on evaluator error", () => {
+  it("treats a throwing evaluator as no-divert (pass_through), and never blocks the trigger", async () => {
+    const spies: Spies = {
+      evaluatorCalls: 0,
+      logShadowCalls: [],
+      logMollifiedCalls: [],
+      recordDecisionCalls: [],
+    };
+    const deps: Partial<GateDependencies> = {
+      isMollifierEnabled: () => true,
+      isShadowModeOn: () => false,
+      resolveOrgFlag: async () => true,
+      evaluator: async () => {
+        spies.evaluatorCalls += 1;
+        throw new Error("simulated evaluator failure");
+      },
+      logShadow: (inputs, decision) => {
+        spies.logShadowCalls.push({ inputs, decision });
+      },
+      logMollified: (inputs, decision) => {
+        spies.logMollifiedCalls.push({ inputs, decision });
+      },
+      recordDecision: (outcome, reason) => {
+        spies.recordDecisionCalls.push({ outcome, reason });
+      },
+    };
+
+    const outcome = await evaluateGate(inputs, deps);
+
+    expect(outcome.action).toBe("pass_through");
+    expect(spies.evaluatorCalls).toBe(1);
+    expect(spies.logMollifiedCalls).toHaveLength(0);
+    expect(spies.logShadowCalls).toHaveLength(0);
+    expect(spies.recordDecisionCalls).toEqual([{ outcome: "pass_through", reason: undefined }]);
+  });
+});
+
+describe("evaluateGate — fail open on resolveOrgFlag error", () => {
+  it("treats org flag as false when resolveOrgFlag throws, and does not block triggers", async () => {
+    const spies: Spies = {
+      evaluatorCalls: 0,
+      logShadowCalls: [],
+      logMollifiedCalls: [],
+      recordDecisionCalls: [],
+    };
+    const deps: Partial<GateDependencies> = {
+      isMollifierEnabled: () => true,
+      isShadowModeOn: () => false,
+      resolveOrgFlag: async () => {
+        throw new Error("simulated prisma timeout");
+      },
+      evaluator: async () => {
+        spies.evaluatorCalls += 1;
+        return trippedDecision;
+      },
+      logShadow: (inputs, decision) => {
+        spies.logShadowCalls.push({ inputs, decision });
+      },
+      logMollified: (inputs, decision) => {
+        spies.logMollifiedCalls.push({ inputs, decision });
+      },
+      recordDecision: (outcome, reason) => {
+        spies.recordDecisionCalls.push({ outcome, reason });
+      },
+    };
+
+    const outcome = await evaluateGate(inputs, deps);
+
+    expect(outcome.action).toBe("pass_through");
+    expect(spies.evaluatorCalls).toBe(0);
+    expect(spies.recordDecisionCalls).toEqual([{ outcome: "pass_through", reason: undefined }]);
+  });
+});
+
+describe("evaluateGate — per-org isolation via Organization.featureFlags", () => {
+  function makeIsolationDeps(
+    resolveOrgFlag: GateDependencies["resolveOrgFlag"],
+  ): { deps: Partial<GateDependencies>; spies: Spies } {
+    const spies: Spies = {
+      evaluatorCalls: 0,
+      logShadowCalls: [],
+      logMollifiedCalls: [],
+      recordDecisionCalls: [],
+    };
+    // Override lifecycle bits and inject the production resolveOrgFlag.
+    // Evaluator returns a fixed tripped decision so the outcome is purely a
+    // function of the flag resolution (which is what we're isolating on).
+    const deps: Partial<GateDependencies> = {
+      isMollifierEnabled: () => true,
+      isShadowModeOn: () => false,
+      resolveOrgFlag,
+      evaluator: async () => {
+        spies.evaluatorCalls += 1;
+        return trippedDecision;
+      },
+      logShadow: (inputs, decision) => {
+        spies.logShadowCalls.push({ inputs, decision });
+      },
+      logMollified: (inputs, decision) => {
+        spies.logMollifiedCalls.push({ inputs, decision });
+      },
+      recordDecision: (outcome, reason) => {
+        spies.recordDecisionCalls.push({ outcome, reason });
+      },
+    };
+    return { deps, spies };
+  }
+
+  // The production resolver — purely in-memory, no Prisma. Mirrors
+  // `defaultGateDependencies.resolveOrgFlag` exactly.
+  const resolve = makeResolveMollifierFlag();
+
+  it("opts in only the org whose featureFlags has mollifierEnabled=true", async () => {
+    const orgA = { ...inputs, orgId: "org_a", orgFeatureFlags: { mollifierEnabled: true } };
+    const orgB = { ...inputs, orgId: "org_b", orgFeatureFlags: { mollifierEnabled: false } };
+    const orgC = { ...inputs, orgId: "org_c", orgFeatureFlags: null };
+
+    const a = makeIsolationDeps(resolve);
+    const b = makeIsolationDeps(resolve);
+    const c = makeIsolationDeps(resolve);
+
+    const [outcomeA, outcomeB, outcomeC] = await Promise.all([
+      evaluateGate(orgA, a.deps),
+      evaluateGate(orgB, b.deps),
+      evaluateGate(orgC, c.deps),
+    ]);
+
+    // Only org A's flag is on → only org A mollifies. Orgs B and C never
+    // reach the evaluator because both flag and shadow-mode are off.
+    expect(outcomeA.action).toBe("mollify");
+    expect(outcomeB.action).toBe("pass_through");
+    expect(outcomeC.action).toBe("pass_through");
+
+    expect(a.spies.evaluatorCalls).toBe(1);
+    expect(b.spies.evaluatorCalls).toBe(0);
+    expect(c.spies.evaluatorCalls).toBe(0);
+
+    expect(a.spies.logMollifiedCalls).toHaveLength(1);
+    expect(b.spies.logMollifiedCalls).toHaveLength(0);
+    expect(c.spies.logMollifiedCalls).toHaveLength(0);
+  });
+
+  it("another org's beta flags must not opt them into mollifier", async () => {
+    // Org A has mollifier on (plus an unrelated beta).
+    const orgA = {
+      ...inputs,
+      orgId: "org_a",
+      orgFeatureFlags: { mollifierEnabled: true, hasComputeAccess: true },
+    };
+    // Org B has *other* betas on but mollifier remains off — keys that gate
+    // compute/AI/query must not bleed across into the mollifier decision.
+    const orgB = {
+      ...inputs,
+      orgId: "org_b",
+      orgFeatureFlags: { hasComputeAccess: true, hasAiAccess: true },
+    };
+
+    const a = makeIsolationDeps(resolve);
+    const b = makeIsolationDeps(resolve);
+
+    const outcomeA = await evaluateGate(orgA, a.deps);
+    const outcomeB = await evaluateGate(orgB, b.deps);
+
+    expect(outcomeA.action).toBe("mollify");
+    expect(outcomeB.action).toBe("pass_through");
+  });
+
+  it("orgs without an explicit override stay off — no global FeatureFlag fallback", async () => {
+    // Regression intent: the resolver MUST NOT consult the global
+    // `FeatureFlag` table on the hot path. An org with `orgFeatureFlags`
+    // unset (the default for almost every org during rollout) gets
+    // pass_through, period. The fleet-wide kill switch lives in
+    // `TRIGGER_MOLLIFIER_ENABLED`, not the FeatureFlag table.
+    const orgInherits = { ...inputs, orgId: "org_inherits", orgFeatureFlags: null };
+    const orgEmpty = { ...inputs, orgId: "org_empty", orgFeatureFlags: {} };
+    const orgUnrelated = {
+      ...inputs,
+      orgId: "org_unrelated",
+      orgFeatureFlags: { hasAiAccess: true },
+    };
+
+    const inheritsDeps = makeIsolationDeps(resolve);
+    const emptyDeps = makeIsolationDeps(resolve);
+    const unrelatedDeps = makeIsolationDeps(resolve);
+
+    const [outInherits, outEmpty, outUnrelated] = await Promise.all([
+      evaluateGate(orgInherits, inheritsDeps.deps),
+      evaluateGate(orgEmpty, emptyDeps.deps),
+      evaluateGate(orgUnrelated, unrelatedDeps.deps),
+    ]);
+
+    expect(outInherits.action).toBe("pass_through");
+    expect(outEmpty.action).toBe("pass_through");
+    expect(outUnrelated.action).toBe("pass_through");
+    // None of these reached the evaluator (flag off, shadow off).
+    expect(inheritsDeps.spies.evaluatorCalls).toBe(0);
+    expect(emptyDeps.spies.evaluatorCalls).toBe(0);
+    expect(unrelatedDeps.spies.evaluatorCalls).toBe(0);
+  });
+});
diff --git a/apps/webapp/test/mollifierTripEvaluator.test.ts b/apps/webapp/test/mollifierTripEvaluator.test.ts
new file mode 100644
index 00000000000..b9a9bf8c94a
--- /dev/null
+++ b/apps/webapp/test/mollifierTripEvaluator.test.ts
@@ -0,0 +1,90 @@
+import { redisTest } from "@internal/testcontainers";
+import { MollifierBuffer } from "@trigger.dev/redis-worker";
+import { describe, expect, vi } from "vitest";
+import { createRealTripEvaluator } from "~/v3/mollifier/mollifierTripEvaluator.server";
+
+vi.setConfig({ testTimeout: 30_000 });
+
+// Use a real MollifierBuffer backed by a Redis testcontainer — repo policy
+// is no mocks for Redis. Per-test envIds keep keys disjoint without explicit
+// cleanup. We close() the buffer in a finally to release the client.
+const inputs = { envId: "env_a", orgId: "org_1", taskId: "t1" } as const;
+
+describe("createRealTripEvaluator", () => {
+  redisTest(
+    "returns divert=false when the sliding window stays under threshold",
+    async ({ redisOptions }) => {
+      const buffer = new MollifierBuffer({ redisOptions, entryTtlSeconds: 600 });
+      try {
+        const evaluator = createRealTripEvaluator({
+          getBuffer: () => buffer,
+          options: () => ({ windowMs: 1000, threshold: 100, holdMs: 500 }),
+        });
+
+        const decision = await evaluator({ ...inputs, envId: "env_under" });
+        expect(decision).toEqual({ divert: false });
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "returns divert=true with reason per_env_rate once the window trips",
+    async ({ redisOptions }) => {
+      const buffer = new MollifierBuffer({ redisOptions, entryTtlSeconds: 600 });
+      try {
+        // threshold=2 → the 3rd call within windowMs is the first that trips.
+        const options = { windowMs: 5000, threshold: 2, holdMs: 5000 } as const;
+        const evaluator = createRealTripEvaluator({
+          getBuffer: () => buffer,
+          options: () => options,
+        });
+
+        const envId = "env_trip";
+        await evaluator({ ...inputs, envId });
+        await evaluator({ ...inputs, envId });
+        const decision = await evaluator({ ...inputs, envId });
+
+        expect(decision.divert).toBe(true);
+        if (decision.divert) {
+          expect(decision.reason).toBe("per_env_rate");
+          expect(decision.threshold).toBe(options.threshold);
+          expect(decision.windowMs).toBe(options.windowMs);
+          expect(decision.holdMs).toBe(options.holdMs);
+          expect(decision.count).toBeGreaterThan(options.threshold);
+        }
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest("returns divert=false when getBuffer returns null (fail-open)", async () => {
+    const evaluator = createRealTripEvaluator({
+      getBuffer: () => null,
+      options: () => ({ windowMs: 200, threshold: 100, holdMs: 500 }),
+    });
+
+    const decision = await evaluator(inputs);
+    expect(decision).toEqual({ divert: false });
+  });
+
+  redisTest(
+    "returns divert=false when buffer throws (fail-open)",
+    async ({ redisOptions }) => {
+      const buffer = new MollifierBuffer({ redisOptions, entryTtlSeconds: 600 });
+      // Closing the client up front means evaluateTrip will throw on the first
+      // Redis command — a real failure mode, not a stub.
+      await buffer.close();
+
+      const evaluator = createRealTripEvaluator({
+        getBuffer: () => buffer,
+        options: () => ({ windowMs: 200, threshold: 100, holdMs: 500 }),
+      });
+
+      const decision = await evaluator(inputs);
+      expect(decision).toEqual({ divert: false });
+    },
+  );
+});
diff --git a/apps/webapp/test/otlpUtf16Sanitization.integration.test.ts b/apps/webapp/test/otlpUtf16Sanitization.integration.test.ts
new file mode 100644
index 00000000000..7897ecf73fd
--- /dev/null
+++ b/apps/webapp/test/otlpUtf16Sanitization.integration.test.ts
@@ -0,0 +1,197 @@
+import { clickhouseTest } from "@internal/testcontainers";
+import { describe, expect } from "vitest";
+import {
+  INVALID_UTF16_SENTINEL,
+  isClickHouseJsonParseError,
+  parseRowNumberFromError,
+  sanitizeRows,
+} from "~/v3/eventRepository/sanitizeRowsOnParseError.server";
+
+/**
+ * Integration test that proves the reactive sanitize-and-retry flow works
+ * against a real ClickHouse instance. Boots a CH container (via testcontainers)
+ * and reproduces the prod failure path end-to-end.
+ *
+ * Three contracts are verified:
+ *
+ * 1. **Happy retry path** — insert a row with a lone UTF-16 surrogate, observe
+ *    the parse error, recover via `parseRowNumberFromError` +
+ *    `sanitizeRowsFrom`, retry once, and confirm the row lands with the
+ *    sentinel substituted.
+ *
+ * 2. **Real CH error shape** — confirm `isClickHouseJsonParseError` correctly
+ *    recognises the error string we get back from a real CH (not just synthetic
+ *    test fixtures) and that `parseRowNumberFromError` extracts the right
+ *    integer from the same string.
+ *
+ * 3. **Non-parse errors don't get swallowed** — push a row past the CH per-row
+ *    size cap and confirm the resulting `Size of JSON object ... is extremely
+ *    large` error is NOT misclassified as a JSON parse error by our predicate.
+ */
+
+const HIGH_SURROGATE = "\uD800";
+const LOW_SURROGATE = "\uDC00";
+
+// ClickHouse container boot + image pull on first run can take well past
+// vitest's 5 s default. Match what `internal-packages/clickhouse/vitest.config.ts`
+// uses for its own clickhouseTest specs.
+const INTEGRATION_TIMEOUT_MS = 60_000;
+
+describe("OTel attribute UTF-16 sanitization → ClickHouse insert", () => {
+  clickhouseTest(
+    "lone surrogate is rejected by CH, then sanitized and retried successfully",
+    async ({ clickhouseClient }) => {
+      const table = "trigger_dev_test.utf16_repro";
+
+      await clickhouseClient.command({
+        query: "CREATE DATABASE IF NOT EXISTS trigger_dev_test",
+      });
+      await clickhouseClient.command({ query: `DROP TABLE IF EXISTS ${table}` });
+      await clickhouseClient.command({
+        query: `
+          CREATE TABLE ${table} (
+            id String,
+            attributes JSON
+          ) ENGINE = MergeTree() ORDER BY id
+          SETTINGS allow_experimental_json_type = 1
+        `,
+      });
+
+      const rows = [
+        {
+          id: "row-clean-prefix",
+          attributes: { ai: { prompt: { messages: "valid prompt 1" } } },
+        },
+        {
+          id: "row-poisoned",
+          attributes: {
+            ai: { prompt: { messages: `valid prefix ${HIGH_SURROGATE} broken tail` } },
+          },
+        },
+        {
+          id: "row-clean-suffix",
+          attributes: { ai: { prompt: { messages: "valid prompt 3" } } },
+        },
+      ];
+
+      // --- Contract 1: real CH rejects the raw insert with our recognisable error ---
+      const firstError = await clickhouseClient
+        .insert({
+          table,
+          values: rows,
+          format: "JSONEachRow",
+          clickhouse_settings: { async_insert: 0, input_format_parallel_parsing: 1 },
+        })
+        .then(
+          () => null,
+          (e: unknown) => e as Error
+        );
+
+      expect(firstError, "first insert must be rejected").not.toBeNull();
+      expect(
+        isClickHouseJsonParseError(firstError),
+        "our predicate must recognise the real CH parse error"
+      ).toBe(true);
+      const rowN = parseRowNumberFromError(firstError!.message);
+      expect(rowN, "real CH error must include `at row N`").not.toBeNull();
+      expect(rowN! >= 0).toBe(true);
+
+      // --- Recovery: sanitize the whole batch, retry ---
+      // We don't slice on `rowN` even though we logged it — `at row N`
+      // semantics under parallel parsing aren't stable enough to skip rows.
+      const { rowsTouched, fieldsSanitized } = sanitizeRows(rows);
+      expect(fieldsSanitized, "exactly one field should have been replaced").toBe(1);
+      expect(rowsTouched).toBe(1);
+
+      // Confirm the targeted row was sanitized and the clean ones were left alone.
+      expect(rows[1].attributes.ai.prompt.messages).toBe(INVALID_UTF16_SENTINEL);
+      expect(rows[0].attributes.ai.prompt.messages).toBe("valid prompt 1");
+      expect(rows[2].attributes.ai.prompt.messages).toBe("valid prompt 3");
+
+      // --- Contract 1 (cont'd): retry now lands cleanly ---
+      await clickhouseClient.insert({
+        table,
+        values: rows,
+        format: "JSONEachRow",
+        clickhouse_settings: { async_insert: 0, input_format_parallel_parsing: 1 },
+      });
+
+      const result = await clickhouseClient
+        .query({
+          query: `
+            SELECT id, toJSONString(attributes) AS attributes_text
+            FROM ${table}
+            ORDER BY id
+          `,
+          format: "JSONEachRow",
+        })
+        .then((r) => r.json<{ id: string; attributes_text: string }>());
+
+      expect(result).toHaveLength(3);
+      const byId = Object.fromEntries(result.map((r) => [r.id, r]));
+      expect(byId["row-clean-prefix"].attributes_text).toContain("valid prompt 1");
+      expect(byId["row-clean-suffix"].attributes_text).toContain("valid prompt 3");
+      expect(byId["row-poisoned"].attributes_text).toContain(INVALID_UTF16_SENTINEL);
+
+      // --- Contract 2: lone LOW surrogate also recognised + recoverable ---
+      const lowSurrogateRow = {
+        id: "row-low-surrogate",
+        attributes: {
+          ai: { prompt: { messages: `valid prefix ${LOW_SURROGATE} broken tail` } },
+        },
+      };
+      const lowSurrogateError = await clickhouseClient
+        .insert({
+          table,
+          values: [lowSurrogateRow],
+          format: "JSONEachRow",
+          clickhouse_settings: { async_insert: 0, input_format_parallel_parsing: 1 },
+        })
+        .then(
+          () => null,
+          (e: unknown) => e as Error
+        );
+      expect(lowSurrogateError).not.toBeNull();
+      expect(isClickHouseJsonParseError(lowSurrogateError)).toBe(true);
+
+      sanitizeRows([lowSurrogateRow]);
+      expect(lowSurrogateRow.attributes.ai.prompt.messages).toBe(INVALID_UTF16_SENTINEL);
+
+      await clickhouseClient.insert({
+        table,
+        values: [lowSurrogateRow],
+        format: "JSONEachRow",
+        clickhouse_settings: { async_insert: 0, input_format_parallel_parsing: 1 },
+      });
+    },
+    INTEGRATION_TIMEOUT_MS
+  );
+
+  clickhouseTest(
+    "non-parse-error rejections (e.g. missing table) are NOT misclassified as JSON parse errors",
+    async ({ clickhouseClient }) => {
+      // Pick an error class that is unambiguously NOT a JSON parse failure —
+      // inserting into a table that doesn't exist. CH returns
+      // `Table doesn't exist` (UNKNOWN_TABLE). If our predicate ever started
+      // matching it we'd wastefully sanitize-and-retry an unrelated failure.
+      const error = await clickhouseClient
+        .insert({
+          table: "trigger_dev_test_nonexistent.utf16_does_not_exist",
+          values: [{ id: "1", attributes: { ok: "yes" } }],
+          format: "JSONEachRow",
+          clickhouse_settings: { async_insert: 0 },
+        })
+        .then(
+          () => null,
+          (e: unknown) => e as Error
+        );
+
+      expect(error, "missing-table insert should be rejected").not.toBeNull();
+      expect(
+        isClickHouseJsonParseError(error),
+        "non-parse error must not be misclassified as JSON parse error"
+      ).toBe(false);
+    },
+    INTEGRATION_TIMEOUT_MS
+  );
+});
diff --git a/apps/webapp/test/replay-after-crash.test.ts b/apps/webapp/test/replay-after-crash.test.ts
index 576ced2ab2a..fdd5274b5e7 100644
--- a/apps/webapp/test/replay-after-crash.test.ts
+++ b/apps/webapp/test/replay-after-crash.test.ts
@@ -33,6 +33,7 @@ import {
 import type { UIMessageChunk } from "ai";
 import { afterEach, describe, expect, vi } from "vitest";
 import { env } from "~/env.server";
+import { chatSnapshotStoragePathForSession } from "~/services/realtime/chatSnapshot.server";
 import { generatePresignedUrl } from "~/v3/objectStore.server";
 
 vi.setConfig({ testTimeout: 60_000 });
@@ -55,8 +56,11 @@ function textTurn(id: string, text: string): UIMessageChunk[] {
  *     via the webapp's real `generatePresignedUrl` (so snapshot reads
  *     hit a real S3-compatible backend).
  *   - `readSessionStreamRecords` returns the canonical
- *     `{ records: [{ data, id, seqNum }] }` shape — `data` is the
- *     JSON-encoded chunk body, mirroring the webapp's S2 record shape.
+ *     `{ records: [{ data, id, seqNum }] }` shape. `data` is the parsed
+ *     chunk OBJECT — the SDK writer puts the chunk object directly into
+ *     the record envelope and the webapp route forwards it as-is, so
+ *     the schema now declares `data: z.unknown()` and consumers use it
+ *     without an extra `JSON.parse` step.
  */
 function stubApiClient(opts: {
   projectRef: string;
@@ -64,7 +68,7 @@ function stubApiClient(opts: {
   sessionOutChunks: unknown[];
 }) {
   const records = opts.sessionOutChunks.map((chunk, i) => ({
-    data: typeof chunk === "string" ? chunk : JSON.stringify(chunk),
+    data: chunk,
     id: `evt-${i + 1}`,
     seqNum: i + 1,
   }));
@@ -74,13 +78,15 @@ function stubApiClient(opts: {
     })
   );
   vi.spyOn(apiClientManager, "clientOrThrow").mockReturnValue({
-    async getPayloadUrl(filename: string) {
-      const result = await generatePresignedUrl(opts.projectRef, opts.envSlug, filename, "GET");
+    async getChatSnapshotUrl(sessionId: string) {
+      const key = chatSnapshotStoragePathForSession(sessionId);
+      const result = await generatePresignedUrl(opts.projectRef, opts.envSlug, key, "GET");
       if (!result.success) throw new Error(result.error);
       return { presignedUrl: result.url };
     },
-    async createUploadPayloadUrl(filename: string) {
-      const result = await generatePresignedUrl(opts.projectRef, opts.envSlug, filename, "PUT");
+    async createChatSnapshotUploadUrl(sessionId: string) {
+      const key = chatSnapshotStoragePathForSession(sessionId);
+      const result = await generatePresignedUrl(opts.projectRef, opts.envSlug, key, "PUT");
       if (!result.success) throw new Error(result.error);
       return { presignedUrl: result.url };
     },
diff --git a/apps/webapp/test/sanitizeRowsOnParseError.test.ts b/apps/webapp/test/sanitizeRowsOnParseError.test.ts
new file mode 100644
index 00000000000..fafa6ca4790
--- /dev/null
+++ b/apps/webapp/test/sanitizeRowsOnParseError.test.ts
@@ -0,0 +1,161 @@
+import { describe, it, expect } from "vitest";
+import {
+  INVALID_UTF16_SENTINEL,
+  isClickHouseJsonParseError,
+  parseRowNumberFromError,
+  sanitizeRows,
+  sanitizeUnknownInPlace,
+} from "~/v3/eventRepository/sanitizeRowsOnParseError.server";
+
+const HIGH_SURROGATE = "\uD800";
+const LOW_SURROGATE = "\uDC00";
+
+describe("isClickHouseJsonParseError", () => {
+  it("recognises ClickHouse's parse-error string", () => {
+    const err = new Error(
+      "Cannot parse JSON object here: {...}: (while reading the value of key attributes): (at row 15)\n: While executing ParallelParsingBlockInputFormat. "
+    );
+    expect(isClickHouseJsonParseError(err)).toBe(true);
+  });
+
+  it("returns false for unrelated errors", () => {
+    expect(isClickHouseJsonParseError(new Error("Connection refused"))).toBe(false);
+    expect(
+      isClickHouseJsonParseError(
+        new Error("Size of JSON object at position 999 is extremely large.")
+      )
+    ).toBe(false);
+  });
+
+  it("returns false for null / undefined / strings", () => {
+    expect(isClickHouseJsonParseError(null)).toBe(false);
+    expect(isClickHouseJsonParseError(undefined)).toBe(false);
+    expect(isClickHouseJsonParseError("Cannot parse JSON object")).toBe(true);
+  });
+});
+
+describe("parseRowNumberFromError", () => {
+  it("extracts the row index from a typical ClickHouse error message", () => {
+    expect(
+      parseRowNumberFromError(
+        "Cannot parse JSON object here: { ... }: (while reading the value of key attributes): (at row 1942)\n: While executing ParallelParsingBlockInputFormat."
+      )
+    ).toBe(1942);
+  });
+
+  it("returns null when no row index is present", () => {
+    expect(parseRowNumberFromError("Some other error without a row hint")).toBeNull();
+  });
+
+  it("returns the first match when multiple `at row N` substrings exist", () => {
+    expect(parseRowNumberFromError("at row 1, oops also at row 2")).toBe(1);
+  });
+});
+
+describe("sanitizeUnknownInPlace", () => {
+  it("returns the string unchanged when it has no surrogates", () => {
+    const result = sanitizeUnknownInPlace("hello world");
+    expect(result).toEqual({ value: "hello world", fixed: 0 });
+  });
+
+  it("replaces a lone-surrogate string with the sentinel", () => {
+    const result = sanitizeUnknownInPlace(`prefix ${HIGH_SURROGATE} suffix`);
+    expect(result.value).toBe(INVALID_UTF16_SENTINEL);
+    expect(result.fixed).toBe(1);
+  });
+
+  it("leaves valid surrogate pairs (emoji) intact", () => {
+    const result = sanitizeUnknownInPlace("hello 😀 world");
+    expect(result.value).toBe("hello 😀 world");
+    expect(result.fixed).toBe(0);
+  });
+
+  it("walks nested objects and mutates string leaves in place", () => {
+    const row = {
+      id: "row-1",
+      attributes: {
+        ai: {
+          prompt: { messages: `bad ${HIGH_SURROGATE} string` },
+          usage: { input_tokens: 42 },
+        },
+        clean: "untouched",
+      },
+    };
+    const result = sanitizeUnknownInPlace(row);
+    expect(result.fixed).toBe(1);
+    expect((row.attributes.ai.prompt as any).messages).toBe(INVALID_UTF16_SENTINEL);
+    expect(row.attributes.clean).toBe("untouched");
+    expect((row.attributes.ai.usage as any).input_tokens).toBe(42);
+    expect(row.id).toBe("row-1");
+  });
+
+  it("walks arrays recursively", () => {
+    const value = ["ok", `bad ${LOW_SURROGATE} value`, "also ok", { nested: `also bad ${HIGH_SURROGATE}` }];
+    const result = sanitizeUnknownInPlace(value);
+    expect(result.fixed).toBe(2);
+    expect(value[1]).toBe(INVALID_UTF16_SENTINEL);
+    expect((value[3] as any).nested).toBe(INVALID_UTF16_SENTINEL);
+    expect(value[0]).toBe("ok");
+    expect(value[2]).toBe("also ok");
+  });
+
+  it("leaves non-string primitives untouched", () => {
+    expect(sanitizeUnknownInPlace(42)).toEqual({ value: 42, fixed: 0 });
+    expect(sanitizeUnknownInPlace(true)).toEqual({ value: true, fixed: 0 });
+    expect(sanitizeUnknownInPlace(null)).toEqual({ value: null, fixed: 0 });
+    expect(sanitizeUnknownInPlace(undefined)).toEqual({ value: undefined, fixed: 0 });
+  });
+});
+
+describe("sanitizeRows", () => {
+  function makeRow(suffix: string, badField?: string) {
+    return {
+      id: `row-${suffix}`,
+      attributes: { foo: badField ?? "clean" },
+    };
+  }
+
+  it("sanitizes every row that has bad strings", () => {
+    const rows = [
+      makeRow("0", `bad-0-${HIGH_SURROGATE}`),
+      makeRow("1", `bad-1-${HIGH_SURROGATE}`),
+      makeRow("2", "clean"),
+      makeRow("3", `bad-3-${HIGH_SURROGATE}`),
+    ];
+
+    const result = sanitizeRows(rows);
+
+    expect(rows[0].attributes.foo).toBe(INVALID_UTF16_SENTINEL);
+    expect(rows[1].attributes.foo).toBe(INVALID_UTF16_SENTINEL);
+    expect(rows[2].attributes.foo).toBe("clean");
+    expect(rows[3].attributes.foo).toBe(INVALID_UTF16_SENTINEL);
+    expect(result.rowsTouched).toBe(3);
+    expect(result.fieldsSanitized).toBe(3);
+  });
+
+  it("returns zero counts when no row has bad strings", () => {
+    const rows = [makeRow("0"), makeRow("1"), makeRow("2")];
+    const result = sanitizeRows(rows);
+    expect(result).toEqual({ rowsTouched: 0, fieldsSanitized: 0 });
+  });
+
+  it("returns zero counts for an empty batch", () => {
+    expect(sanitizeRows([])).toEqual({ rowsTouched: 0, fieldsSanitized: 0 });
+  });
+
+  it("counts multiple sanitized fields on the same row as one rowTouched but multiple fields", () => {
+    const rows = [
+      {
+        id: "r0",
+        attributes: {
+          a: `bad ${HIGH_SURROGATE}`,
+          b: `also bad ${LOW_SURROGATE}`,
+          c: "fine",
+        },
+      },
+    ];
+    const result = sanitizeRows(rows);
+    expect(result.rowsTouched).toBe(1);
+    expect(result.fieldsSanitized).toBe(2);
+  });
+});
diff --git a/apps/webapp/test/setup.ts b/apps/webapp/test/setup.ts
new file mode 100644
index 00000000000..607ad78f3a9
--- /dev/null
+++ b/apps/webapp/test/setup.ts
@@ -0,0 +1,6 @@
+// Load apps/webapp/.env into process.env so env.server's top-level
+// EnvironmentSchema.parse(process.env) succeeds in vitest workers.
+import { config } from "dotenv";
+import path from "node:path";
+
+config({ path: path.resolve(__dirname, "../.env") });
diff --git a/apps/webapp/vitest.config.ts b/apps/webapp/vitest.config.ts
index 66f697706a5..6a6b550fc64 100644
--- a/apps/webapp/vitest.config.ts
+++ b/apps/webapp/vitest.config.ts
@@ -10,6 +10,7 @@ export default defineConfig({
     exclude: ["test/**/*.e2e.test.ts", "test/**/*.e2e.full.test.ts"],
     globals: true,
     pool: "forks",
+    setupFiles: ["./test/setup.ts"], // load apps/webapp/.env
   },
   // @ts-ignore
   plugins: [tsconfigPaths({ projects: ["./tsconfig.json"] })],
diff --git a/docs/ai-chat/actions.mdx b/docs/ai-chat/actions.mdx
new file mode 100644
index 00000000000..e6b894a3059
--- /dev/null
+++ b/docs/ai-chat/actions.mdx
@@ -0,0 +1,115 @@
+---
+title: "Actions"
+sidebarTitle: "Actions"
+description: "Custom commands sent from the frontend that mutate chat state without consuming a turn — undo, rollback, edit, regenerate."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## Overview
+
+Custom actions let the frontend send structured commands (undo, rollback, edit, regenerate) that modify the conversation state. **Actions are not turns**: they fire `hydrateMessages` (if set) and `onAction` only. No turn lifecycle hooks (`onTurnStart` / `prepareMessages` / `onBeforeTurnComplete` / `onTurnComplete`), no `run()`, no turn-counter increment. The trace span is named `chat action`.
+
+Actions wake the agent from suspension the same way a new message does, run their handler against the latest accumulator state, and emit a `trigger:turn-complete` chunk so the frontend's `useChat` knows the action has been applied.
+
+## Defining an action handler
+
+Define an `actionSchema` for validation and an `onAction` handler that uses [`chat.history`](/ai-chat/backend#chat-history) to modify state:
+
+```ts
+import { z } from "zod";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  actionSchema: z.discriminatedUnion("type", [
+    z.object({ type: z.literal("undo") }),
+    z.object({ type: z.literal("rollback"), targetMessageId: z.string() }),
+    z.object({ type: z.literal("edit"), messageId: z.string(), text: z.string() }),
+  ]),
+
+  onAction: async ({ action }) => {
+    switch (action.type) {
+      case "undo":
+        chat.history.slice(0, -2); // Remove last user + assistant exchange
+        break;
+      case "rollback":
+        chat.history.rollbackTo(action.targetMessageId);
+        break;
+      case "edit":
+        chat.history.replace(action.messageId, {
+          id: action.messageId,
+          role: "user",
+          parts: [{ type: "text", text: action.text }],
+        });
+        break;
+    }
+    // returning void → side-effect-only, no model call
+  },
+
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+**Lifecycle flow:** Wake → parse action against `actionSchema` → `hydrateMessages` (if set) → **`onAction`** → apply `chat.history` mutations → emit `trigger:turn-complete` → wait for next message.
+
+## Returning a model response from an action
+
+`onAction` can return a `StreamTextResult`, `string`, or `UIMessage` to produce a response. The returned stream is auto-piped to the frontend just like a normal turn, but the rest of the turn machinery (`onTurnStart`, `onTurnComplete`, etc.) still does not fire.
+
+```ts
+onAction: async ({ action, messages }) => {
+  if (action.type === "regenerate") {
+    chat.history.slice(0, -1); // drop the last assistant
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      stopWhen: stepCountIs(15),
+    });
+  }
+  // other actions return void → side-effect only
+}
+```
+
+This is useful for actions that both mutate state and want a fresh model response (regenerate-from-here, retry-with-different-style). Persistence is your responsibility inside `onAction` itself; you have access to the streamed response object.
+
+## Gating actions on HITL state
+
+If you have a [human-in-the-loop](/ai-chat/patterns/human-in-the-loop) tool waiting on `addToolOutput`, you usually want to refuse competing actions like `regenerate` until the answer arrives. [`chat.history.getPendingToolCalls()`](/ai-chat/backend#chat-history) gives you exactly that signal:
+
+```ts
+onAction: async ({ action, messages, signal }) => {
+  if (action.type === "regenerate") {
+    if (chat.history.getPendingToolCalls().length > 0) return; // gated
+    chat.history.slice(0, -1);
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  }
+},
+```
+
+## Sending actions from the frontend
+
+```ts
+// Browser — TriggerChatTransport
+const stream = await transport.sendAction(chatId, { type: "undo" });
+
+// Server — AgentChat
+const stream = await agentChat.sendAction({ type: "rollback", targetMessageId: "msg-3" });
+```
+
+The action payload is validated against `actionSchema` on the backend; invalid actions throw and surface as a stream error. The `action` parameter in `onAction` is fully typed from the schema.
+
+<Note>
+  For silent state changes that should never appear as a turn (e.g. injecting background context), use [`chat.inject()`](/ai-chat/background-injection) instead. Actions are explicit user-driven mutations; injections are agent-side context updates.
+</Note>
+
+## See also
+
+- [`chat.history`](/ai-chat/backend#chat-history) — the imperative API actions use to mutate state
+- [Sending actions from the frontend](/ai-chat/frontend#sending-actions) — `transport.sendAction` ergonomics
+- [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) — fires before `onAction` when set
+- [Branching conversations](/ai-chat/patterns/branching-conversations) — pairs action handlers with backend-controlled history
+- [Human-in-the-loop](/ai-chat/patterns/human-in-the-loop) — gating fresh actions while a tool is waiting
diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
new file mode 100644
index 00000000000..67c5b920f9a
--- /dev/null
+++ b/docs/ai-chat/backend.mdx
@@ -0,0 +1,1018 @@
+---
+title: "Backend"
+sidebarTitle: "Backend"
+description: "Three approaches to building your chat backend — chat.agent(), session iterator, or raw task primitives."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## chat.agent()
+
+The highest-level approach. Handles message accumulation, stop signals, turn lifecycle, and auto-piping automatically.
+
+<Tip>
+  To fix a **custom** `UIMessage` subtype or typed client data schema, use the [ChatBuilder](/ai-chat/types#chatbuilder) via `chat.withUIMessage<...>()` and/or `chat.withClientData({ schema })`. Builder-level hooks can also be chained before `.agent()`. See [Types](/ai-chat/types).
+</Tip>
+
+<Info>
+  Every `chat.agent` conversation is backed by a durable Session — `externalId` is your `chatId`, `type` is `"chat.agent"`, `taskIdentifier` is the agent's task ID. The session is the run manager: it owns the chat's runs, persists across run lifecycles, and orchestrates handoffs (idle continuation, `chat.requestUpgrade`). You rarely need to touch the session directly (`chat.stream`, `chat.messages`, `chat.stopSignal` wrap everything), but `payload.sessionId` is available if you want to reach in — e.g. `sessions.open(payload.sessionId)` to write from a sub-agent or from outside the turn loop.
+</Info>
+
+<Warning>
+  **Always spread `chat.toStreamTextOptions()` into every `streamText` call.** It wires up the `prepareStep` callback that drives [compaction](/ai-chat/compaction), [steering](/ai-chat/pending-messages), and [background injection](/ai-chat/background-injection) — features that silently no-op if the spread is missing. It also injects the system prompt set via `chat.prompt()`, the resolved model (when a registry is provided), and telemetry metadata.
+
+  Spread it **first** in the options object so any explicit overrides win:
+
+  ```ts
+  streamText({
+    ...chat.toStreamTextOptions(),     // or: chat.toStreamTextOptions({ registry, tools }) — see below
+    messages,
+    abortSignal: signal,
+    // any explicit overrides go here
+    stopWhen: stepCountIs(15),
+  });
+  ```
+
+  Examples in this doc keep the spread implicit for brevity, but you should include it in real code.
+</Warning>
+
+### Simple: return a StreamTextResult
+
+Return the `streamText` result from `run` and it's automatically piped to the frontend:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+export const simpleChat = chat.agent({
+  id: "simple-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions(), // prepareStep, system, telemetry — see callout above
+      model: anthropic("claude-sonnet-4-5"),
+      system: "You are a helpful assistant.",
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+### Using chat.pipe() for complex flows
+
+For complex agent flows where `streamText` is called deep inside your code, use `chat.pipe()`. It works from **anywhere inside a task** — even nested function calls.
+
+```ts trigger/agent-chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import type { ModelMessage } from "ai";
+
+export const agentChat = chat.agent({
+  id: "agent-chat",
+  run: async ({ messages }) => {
+    // Don't return anything — chat.pipe is called inside
+    await runAgentLoop(messages);
+  },
+});
+
+async function runAgentLoop(messages: ModelMessage[]) {
+  // ... agent logic, tool calls, etc.
+
+  const result = streamText({
+    model: anthropic("claude-sonnet-4-5"),
+    messages,
+    stopWhen: stepCountIs(15),
+  });
+
+  // Pipe from anywhere — no need to return it
+  await chat.pipe(result);
+}
+```
+
+### Custom data parts
+
+Add custom `data-*` parts to the assistant's response message via `chat.response.write()` (from `run()`) or the `writer` parameter in lifecycle hooks. Non-transient `data-*` chunks are automatically added to `responseMessage.parts` and surface in `onTurnComplete` for persistence:
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onBeforeTurnComplete: async ({ writer, turn }) => {
+    // This data part will be in responseMessage.parts in onTurnComplete
+    writer.write({
+      type: "data-metadata",
+      data: { turn, model: "gpt-4o", timestamp: Date.now() },
+    });
+  },
+  onTurnComplete: async ({ responseMessage }) => {
+    // responseMessage.parts includes the data-metadata part
+    await db.messages.save(responseMessage);
+  },
+  run: async ({ messages, signal }) => {
+    // Also works from run() via chat.response
+    chat.response.write({
+      type: "data-context",
+      data: { searchResults: results },
+    });
+
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+Add `transient: true` to data chunks that should stream to the frontend but NOT persist in the response message. Use this for progress indicators, loading states, and other temporary UI:
+
+```ts
+// Transient — frontend sees it, but NOT in onTurnComplete's responseMessage
+writer.write({
+  type: "data-progress",
+  id: "search",
+  data: { percent: 50 },
+  transient: true,
+});
+```
+
+<Info>
+  This matches the AI SDK's semantics: `data-*` chunks persist to `message.parts` by default. Only `transient: true` chunks are ephemeral. Non-data chunks (`text-delta`, `tool-*`, etc.) are handled by `streamText` and captured via `onFinish` — they don't need `chat.response`.
+</Info>
+
+<Note>
+  `chat.response` and the `writer` accumulation behavior work with `chat.agent` and `chat.createSession`. If you're using [`chat.customAgent`](#raw-task-with-primitives), you own the accumulator — see the raw-task example for the manual pattern.
+</Note>
+
+### Raw streaming with `chat.stream`
+
+For low-level stream access (piping from subtasks, reading streams by run ID), use `chat.stream`. Chunks written via `chat.stream` go directly to the realtime output — they are **NOT** accumulated into the response message regardless of the `transient` flag.
+
+```ts
+// Raw stream — always ephemeral, never in responseMessage
+const { waitUntilComplete } = chat.stream.writer({
+  execute: ({ write }) => {
+    write({ type: "data-status", data: { message: "Processing..." } });
+  },
+});
+await waitUntilComplete();
+```
+
+<Tip>
+  Use `data-*` chunk types (e.g. `data-status`, `data-progress`) for custom data. The AI SDK processes these into `DataUIPart` objects in `message.parts` on the frontend. Writing the same `type` + `id` again updates the existing part instead of creating a new one — useful for live progress.
+</Tip>
+
+`chat.stream` exposes the full stream API:
+
+| Method | Description |
+|--------|-------------|
+| `chat.stream.writer(options)` | Write individual chunks via a callback |
+| `chat.stream.pipe(stream, options?)` | Pipe a `ReadableStream` or `AsyncIterable` |
+| `chat.stream.append(value, options?)` | Append raw data |
+| `chat.stream.read(runId, options?)` | Read the stream by run ID |
+
+For piping streams from subtasks to the parent chat (via `target: "root"`), see the [Sub-agents pattern](/ai-chat/patterns/sub-agents).
+
+### Lifecycle hooks
+
+`chat.agent({ ... })` accepts hooks that fire in a fixed order around each turn, plus dedicated suspend/resume hooks. The full reference lives on its own page:
+
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks) — `onPreload`, `onChatStart`, `onValidateMessages`, `hydrateMessages`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`, `onChatSuspend` / `onChatResume`, `exitAfterPreloadIdle`, plus how `ctx` plumbs through every callback.
+
+**Per-turn order:** `onValidateMessages` → `hydrateMessages` → `onChatStart` (chat's first message only) → `onTurnStart` → `run()` → `onBeforeTurnComplete` → `onTurnComplete`.
+
+### Using prompts
+
+Use [AI Prompts](/ai/prompts) to manage your system prompt as versioned, overridable config. Store the resolved prompt in a lifecycle hook with `chat.prompt.set()`, then spread `chat.toStreamTextOptions()` into `streamText` — it includes the system prompt, model, config, and telemetry automatically.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, createProviderRegistry } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+
+const registry = createProviderRegistry({ anthropic });
+
+const systemPrompt = prompts.define({
+  id: "my-chat-system",
+  model: "anthropic:claude-sonnet-4-5",
+  config: { temperature: 0.7 },
+  variables: z.object({ name: z.string() }),
+  content: `You are a helpful assistant for {{name}}.`,
+});
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onChatStart: async ({ clientData }) => {
+    const user = await db.user.findUnique({ where: { id: clientData.userId } });
+    const resolved = await systemPrompt.resolve({ name: user.name });
+    chat.prompt.set(resolved);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }), // system, model, config, telemetry
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+`chat.toStreamTextOptions()` returns an object with `system`, `model` (resolved via the registry), `temperature`, and `experimental_telemetry` — all from the stored prompt. Properties you set after the spread (like a client-selected model) take precedence.
+
+**Which form to call:**
+
+| Form | Use when |
+|---|---|
+| `chat.toStreamTextOptions()` | Default. Wires up `prepareStep` (compaction, steering, background injection), the stored prompt's `system` / `model` / `config`, and telemetry metadata. |
+| `chat.toStreamTextOptions({ registry })` | You're using [Prompts](/ai/prompts) with a provider-prefixed model string (e.g. `"anthropic:claude-sonnet-4-5"`). The registry resolves the prefix to a real model instance via `createProviderRegistry({ anthropic, openai, ... })`. |
+| `chat.toStreamTextOptions({ tools })` | You want HITL tool approvals — pass the same `tools` object you give to `streamText`. The SDK then knows which tool calls need to pause on `needsApproval: true`. |
+| `chat.toStreamTextOptions({ registry, tools })` | Both of the above. |
+
+<Tip>
+  See [Prompts](/ai/prompts) for the full guide — defining templates, variable schemas, dashboard
+  overrides, and the management SDK.
+</Tip>
+
+### Stop generation
+
+#### How stop works
+
+Calling `stop()` from `useChat` sends a stop signal to the running task via input streams. The task's `streamText` call aborts (if you passed `signal` or `stopSignal`), but the **run stays alive** and waits for the next message. The partial response is captured and accumulated normally.
+
+#### Abort signals
+
+The `run` function receives three abort signals:
+
+| Signal         | Fires when                                  | Use for                                                                |
+| -------------- | ------------------------------------------- | ---------------------------------------------------------------------- |
+| `signal`       | Stop **or** cancel                          | Pass to `streamText` — handles both cases. **Use this in most cases.** |
+| `stopSignal`   | Stop only (per-turn, reset each turn)       | Custom logic that should only run on user stop, not cancellation       |
+| `cancelSignal` | Run cancel, expire, or maxDuration exceeded | Cleanup that should only happen on full cancellation                   |
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  run: async ({ messages, signal, stopSignal, cancelSignal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      abortSignal: signal, // Handles both stop and cancel
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+<Tip>
+  Use `signal` (the combined signal) in most cases. The separate `stopSignal` and `cancelSignal` are
+  only needed if you want different behavior for stop vs cancel.
+</Tip>
+
+#### Detecting stop in callbacks
+
+The `onTurnComplete` event includes a `stopped` boolean that indicates whether the user stopped generation during that turn:
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnComplete: async ({ chatId, uiMessages, stopped }) => {
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages, lastStoppedAt: stopped ? new Date() : undefined },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+You can also check stop status from **anywhere** during a turn using `chat.isStopped()`. This is useful inside `streamText`'s `onFinish` callback where the AI SDK's `isAborted` flag can be unreliable (e.g. when using `createUIMessageStream` + `writer.merge()`):
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      abortSignal: signal,
+      onFinish: ({ isAborted }) => {
+        // isAborted may be false even after stop when using createUIMessageStream
+        const wasStopped = isAborted || chat.isStopped();
+        if (wasStopped) {
+          // handle stop — e.g. log analytics
+        }
+      },
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+#### Cleaning up aborted messages
+
+When stop happens mid-stream, the captured response message can contain parts in an incomplete state — tool calls stuck in `partial-call`, reasoning blocks still marked as `streaming`, etc. These can cause UI issues like permanent spinners.
+
+`chat.agent` automatically cleans up the `responseMessage` when stop is detected before passing it to `onTurnComplete`. If you use `chat.pipe()` manually and capture response messages yourself, use `chat.cleanupAbortedParts()`:
+
+```ts
+const cleaned = chat.cleanupAbortedParts(rawResponseMessage);
+```
+
+This removes tool invocation parts stuck in `partial-call` state and marks any `streaming` text or reasoning parts as `done`.
+
+<Note>
+  Stop signal delivery is best-effort. There is a small race window where the model may finish
+  before the stop signal arrives, in which case the turn completes normally with `stopped: false`.
+  This is expected and does not require special handling.
+</Note>
+
+### Tool approvals
+
+Tools with `needsApproval: true` pause execution until the user approves or denies via the frontend. Define the tool as normal and pass it to `streamText` — `chat.agent` handles the rest:
+
+```ts
+const sendEmail = tool({
+  description: "Send an email. Requires human approval.",
+  inputSchema: z.object({ to: z.string(), subject: z.string(), body: z.string() }),
+  needsApproval: true,
+  execute: async ({ to, subject, body }) => {
+    await emailService.send({ to, subject, body });
+    return { sent: true };
+  },
+});
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      tools: { sendEmail },
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+When the model calls an approval-required tool, the turn completes with the tool in `approval-requested` state. After the user approves on the frontend, the updated message is sent back and `chat.agent` replaces it in the conversation accumulator by matching the message ID. `streamText` then executes the approved tool and continues.
+
+See [Tool approvals](/ai-chat/frontend#tool-approvals) in the frontend docs for the UI setup.
+
+### Persistence
+
+To build a chat app that survives page refreshes you persist two things, both server-side from inside the agent:
+
+1. **Conversation state.** Full `UIMessage[]` keyed by `chatId`. Written from `onTurnStart` (so the user message is durable before streaming begins) and `onTurnComplete` (so the assistant reply lands).
+2. **Session state.** The transport's reconnect metadata: `publicAccessToken` and `lastEventId`. Written alongside the messages from the same hooks.
+
+<Note>
+  Sessions let the transport reconnect to an existing run after a page refresh. Without them, every page load would start a new run, losing the conversation context that was accumulated in the previous run.
+</Note>
+
+For the full per-hook breakdown, race-condition warnings (atomic `lastEventId` writes, why not to use `chat.defer` in `onTurnStart`), token renewal via the `accessToken` callback, and an end-to-end three-file example, see [Database persistence](/ai-chat/patterns/database-persistence).
+
+### Pending messages (steering)
+
+Users can send messages while the agent is executing tool calls. With `pendingMessages`, these messages are injected between tool-call steps, steering the agent mid-execution:
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  pendingMessages: {
+    shouldInject: ({ steps }) => steps.length > 0,
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      tools: {
+        /* ... */
+      },
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+On the frontend, the `usePendingMessages` hook handles sending, tracking, and rendering injection points.
+
+<Tip>
+  See [Pending Messages](/ai-chat/pending-messages) for the full guide — backend configuration,
+  frontend hook, queuing vs steering, and how injection works with all three chat variants.
+</Tip>
+
+### Background injection
+
+Inject context from background work into the conversation using `chat.inject()`. Combine with `chat.defer()` to run analysis between turns and inject results before the next response — self-review, RAG augmentation, safety checks, etc.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    chat.defer(
+      (async () => {
+        const review = await generateObject({
+          /* ... */
+        });
+        if (review.object.needsImprovement) {
+          chat.inject([
+            {
+              role: "system",
+              content: `[Self-review]\n${review.object.suggestions.join("\n")}`,
+            },
+          ]);
+        }
+      })()
+    );
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ ...chat.toStreamTextOptions({ registry }), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  See [Background Injection](/ai-chat/background-injection) for the full guide — timing, self-review
+  example, and how it differs from pending messages.
+</Tip>
+
+### Actions
+
+Custom actions let the frontend send structured commands (undo, rollback, edit, regenerate) that modify the conversation state. **Actions are not turns**: they fire `hydrateMessages` (if set) and `onAction` only. The full surface (defining `actionSchema`, returning a model response from `onAction`, gating against pending HITL tool calls, and sending actions from the frontend) lives on its own page.
+
+See [Actions](/ai-chat/actions).
+
+### Chat history
+
+Imperative API for reading and modifying the accumulated message history. Works from any hook (`onAction`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`, `hydrateMessages`) or from `run()` and AI SDK tools.
+
+<Note>
+  The agent's accumulator — not `session.out` — is the source of truth for the full conversation. The `.out` stream is a bounded sliding window (roughly one turn at steady state, see [Records on `session.out`](/ai-chat/client-protocol#records-on-session-out)); the durable history lives in the agent's accumulator and is persisted to S3 between turns for fast next-run boots. `chat.history` reads and mutates that accumulator directly.
+</Note>
+
+**Reads.** Synchronous against the current accumulator state.
+
+| Method | Description |
+|--------|-------------|
+| `chat.history.all()` | Returns a copy of the current accumulated UI messages. |
+| `chat.history.getChain()` | Same as `all()`. Use whichever name reads better in context. |
+| `chat.history.findMessage(messageId)` | Returns the message with that id, or `undefined`. |
+| `chat.history.getPendingToolCalls()` | Tool calls on the most recent assistant message that are still in `input-available` state (waiting on `addToolOutput`). |
+| `chat.history.getResolvedToolCalls()` | All tool calls in the chain in `output-available` or `output-error` state. |
+| `chat.history.extractNewToolResults(message)` | Tool results in `message` whose `toolCallId` is not already resolved in the chain. Most useful in `hydrateMessages` against an incoming wire message, before the runtime merges it. |
+
+Each pending and resolved entry is shaped `{ toolCallId, toolName, messageId }`. Each new-result entry is `{ toolCallId, toolName, output, errorText? }`, where `errorText` is set only for `output-error` parts.
+
+**Mutations.** Applied at lifecycle checkpoints (after hooks return). Multiple mutations in the same hook compose correctly.
+
+| Method | Description |
+|--------|-------------|
+| `chat.history.set(messages)` | Replace all messages. Same as `chat.setMessages()`. |
+| `chat.history.remove(messageId)` | Remove a specific message by ID. |
+| `chat.history.rollbackTo(messageId)` | Keep messages up to and including the given ID (undo). |
+| `chat.history.replace(messageId, message)` | Replace a specific message by ID (edit). |
+| `chat.history.slice(start, end?)` | Keep only messages in the given range. |
+
+```ts
+// Undo the last exchange in onAction
+onAction: async ({ action }) => {
+  if (action.type === "undo") {
+    chat.history.slice(0, -2);
+  }
+},
+
+// Trim history in onTurnComplete
+onTurnComplete: async ({ uiMessages }) => {
+  if (uiMessages.length > 50) {
+    chat.history.slice(-20);
+  }
+},
+```
+
+The HITL reads let an action or hook decide what to do without walking the accumulator manually:
+
+```ts
+// Refuse a regenerate while a tool call is still awaiting an answer
+onAction: async ({ action }) => {
+  if (action.type === "regenerate") {
+    if (chat.history.getPendingToolCalls().length > 0) return;
+    chat.history.slice(0, -1);
+  }
+},
+
+// Side-effect once per net-new tool result when wire messages come in
+hydrateMessages: async ({ incomingMessages }) => {
+  for (const msg of incomingMessages) {
+    for (const r of chat.history.extractNewToolResults(msg)) {
+      await onToolResolved({ id: r.toolCallId, output: r.output, errorText: r.errorText });
+    }
+  }
+  return incomingMessages;
+},
+```
+
+`extractNewToolResults` compares against the *current* chain. Inside `onTurnComplete`, the chain already contains the just-finished `responseMessage`, so it returns `[]`. Use it where the message is from outside the accumulator: `hydrateMessages` (incoming wire), `onAction` if the action carries a message, or any custom pre-merge code path.
+
+### prepareMessages
+
+Transform model messages before they're used anywhere — in `run()`, in compaction rebuilds, and in compaction results. Define once, applied everywhere.
+
+Use this for Anthropic cache breaks, injecting system context, stripping PII, etc.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  prepareMessages: ({ messages, reason }) => {
+    // Add Anthropic cache breaks to the last message
+    if (messages.length === 0) return messages;
+    const last = messages[messages.length - 1];
+    return [
+      ...messages.slice(0, -1),
+      {
+        ...last,
+        providerOptions: {
+          ...last.providerOptions,
+          anthropic: { cacheControl: { type: "ephemeral" } },
+        },
+      },
+    ];
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+The `reason` field tells you why messages are being prepared:
+
+| Reason                 | Description                                       |
+| ---------------------- | ------------------------------------------------- |
+| `"run"`                | Messages being passed to `run()` for `streamText` |
+| `"compaction-rebuild"` | Rebuilding from a previous compaction summary     |
+| `"compaction-result"`  | Fresh compaction just produced these messages     |
+
+### Version upgrades
+
+Chat agent runs are pinned to the worker version they started on. When you deploy a new version, suspended runs resume on the old code. Call `chat.requestUpgrade()` in `onTurnStart` to skip `run()` and exit immediately — the transport re-triggers the same message on the latest version. See the [Version Upgrades pattern](/ai-chat/patterns/version-upgrades) for the full guide.
+
+### Ending a run on your terms
+
+By default, a chat agent stays idle after each turn waiting for the next user message. Call `chat.endRun()` from `run()`, `chat.defer()`, `onBeforeTurnComplete`, or `onTurnComplete` to exit the loop once the current turn finishes — no upgrade signal, no idle wait.
+
+```ts
+chat.agent({
+  id: "one-shot",
+  run: async ({ messages, signal }) => {
+    // Single-response agent — exit after this turn.
+    chat.endRun();
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+The current turn streams through normally, `onBeforeTurnComplete` / `onTurnComplete` fire, the turn-complete chunk is written, and the run exits instead of suspending. The next user message on the same `chatId` starts a fresh run via the standard continuation flow.
+
+Use this when the agent knows its work is done (budget exhausted, goal achieved, one-shot response) rather than relying on the idle timeout. Unlike `chat.requestUpgrade()`, no `upgrade-required` signal is sent to the client, so there's no version-migration semantics.
+
+### Runtime configuration
+
+#### chat.setTurnTimeout()
+
+Override how long the run stays suspended waiting for the next message. Call from inside `run()`:
+
+```ts
+run: async ({ messages, signal }) => {
+  chat.setTurnTimeout("2h"); // Wait longer for this conversation
+  return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+},
+```
+
+#### chat.setIdleTimeoutInSeconds()
+
+Override how long the run stays idle (active, using compute) after each turn:
+
+```ts
+run: async ({ messages, signal }) => {
+  chat.setIdleTimeoutInSeconds(60); // Stay idle for 1 minute
+  return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+},
+```
+
+<Info>
+  Longer idle timeout means faster responses but more compute usage. Set to `0` to suspend
+  immediately after each turn (minimum latency cost, slight delay on next message).
+</Info>
+
+#### Stream options
+
+Control how `streamText` results are converted to the frontend stream via `toUIMessageStream()`. Set static defaults on the task, or override per-turn.
+
+##### Error handling with onError
+
+When `streamText` encounters an error mid-stream (rate limits, API failures, network errors), the `onError` callback converts it to a string that's sent to the frontend as an `{ type: "error", errorText }` chunk. The AI SDK's `useChat` receives this via its `onError` callback.
+
+By default, the raw error message is sent to the frontend. Use `onError` to sanitize errors and avoid leaking internal details:
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    onError: (error) => {
+      // Log the full error server-side for debugging
+      console.error("Stream error:", error);
+      // Return a sanitized message — this is what the frontend sees
+      if (error instanceof Error && error.message.includes("rate limit")) {
+        return "Rate limited — please wait a moment and try again.";
+      }
+      return "Something went wrong. Please try again.";
+    },
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+`onError` is also called for tool execution errors, so a single handler covers both LLM errors and tool failures.
+
+On the frontend, handle the error in `useChat`:
+
+```tsx
+const { messages, sendMessage } = useChat({
+  transport,
+  onError: (error) => {
+    // error.message contains the string returned by your onError handler
+    toast.error(error.message);
+  },
+});
+```
+
+##### Reasoning and sources
+
+Control which AI SDK features are forwarded to the frontend:
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    sendReasoning: true, // Forward model reasoning (default: true)
+    sendSources: true, // Forward source citations (default: false)
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+##### Custom message IDs
+
+By default, response message IDs are generated using the AI SDK's built-in `generateId`. Pass a custom `generateMessageId` function to use your own ID format (e.g. UUID-v7):
+
+```ts
+import { v7 as uuidv7 } from "uuid";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    generateMessageId: () => uuidv7(),
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+With the `.withUIMessage()` builder, set it under `streamOptions`:
+
+```ts
+import { v7 as uuidv7 } from "uuid";
+
+export const myChat = chat
+  .withUIMessage<MyChatUIMessage>({
+    streamOptions: {
+      generateMessageId: () => uuidv7(),
+      sendReasoning: true,
+    },
+  })
+  .agent({
+    id: "my-chat",
+    run: async ({ messages, signal }) => {
+      return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+    },
+  });
+```
+
+<Info>
+  The generated ID is sent to the frontend in the stream's `start` chunk, so frontend and backend
+  always reference the same ID for each message. This is important for features like tool
+  approvals, where the frontend resends an assistant message and the backend needs to match it
+  by ID in the conversation accumulator.
+</Info>
+
+##### Per-turn overrides
+
+Override per-turn with `chat.setUIMessageStreamOptions()` — per-turn values merge with the static config (per-turn wins on conflicts). The override is cleared automatically after each turn.
+
+```ts
+run: async ({ messages, clientData, signal }) => {
+  // Enable reasoning only for certain models
+  if (clientData.model?.includes("claude")) {
+    chat.setUIMessageStreamOptions({ sendReasoning: true });
+  }
+  return streamText({ model: openai(clientData.model ?? "gpt-4o"), messages, abortSignal: signal });
+},
+```
+
+`chat.setUIMessageStreamOptions()` works across all abstraction levels — `chat.agent()`, `chat.createSession()` / `turn.complete()`, and `chat.pipeAndCapture()`.
+
+See [ChatUIMessageStreamOptions](/ai-chat/reference#chatuimessagestreamoptions) for the full reference.
+
+<Note>
+  `onFinish` is managed internally for response capture and cannot be overridden here. Use
+  `streamText`'s `onFinish` callback for custom finish handling, or use [raw task
+  mode](#raw-task-with-primitives) for full control over `toUIMessageStream()`.
+</Note>
+
+### Manual mode with task()
+
+If you need full control over task options, use the standard `task()` with `ChatTaskPayload` and `chat.pipe()`:
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskPayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+export const manualChat = task({
+  id: "manual-chat",
+  retry: { maxAttempts: 3 },
+  queue: { concurrencyLimit: 10 },
+  run: async (payload: ChatTaskPayload) => {
+    const result = streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages: payload.messages,
+      stopWhen: stepCountIs(15),
+    });
+
+    await chat.pipe(result);
+  },
+});
+```
+
+<Warning>
+  Manual mode does not get automatic message accumulation or the `onTurnComplete`/`onChatStart`
+  lifecycle hooks. The `responseMessage` field in `onTurnComplete` will be `undefined` when using
+  `chat.pipe()` directly. Use `chat.agent()` for the full multi-turn experience.
+</Warning>
+
+---
+
+## chat.createSession()
+
+A middle ground between `chat.agent()` and raw primitives. You get an async iterator that yields `ChatTurn` objects — each turn handles stop signals, message accumulation, and turn-complete signaling automatically. You control initialization, model/tool selection, persistence, and any custom per-turn logic.
+
+Use `chat.createSession()` inside a standard `task()`:
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskWirePayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+export const myChat = task({
+  id: "my-chat",
+  run: async (payload: ChatTaskWirePayload, { signal }) => {
+    // One-time initialization — just code, no hooks
+    const clientData = payload.metadata as { userId: string };
+    await db.chat.create({ data: { id: payload.chatId, userId: clientData.userId } });
+
+    const session = chat.createSession(payload, {
+      signal,
+      idleTimeoutInSeconds: 60,
+      timeout: "1h",
+    });
+
+    for await (const turn of session) {
+      const result = streamText({
+        model: anthropic("claude-sonnet-4-5"),
+        messages: turn.messages,
+        abortSignal: turn.signal,
+        stopWhen: stepCountIs(15),
+      });
+
+      // Pipe, capture, accumulate, and signal turn-complete — all in one call
+      await turn.complete(result);
+
+      // Persist after each turn
+      await db.chat.update({
+        where: { id: turn.chatId },
+        data: { messages: turn.uiMessages },
+      });
+    }
+  },
+});
+```
+
+### ChatSessionOptions
+
+| Option                 | Type          | Default  | Description                                 |
+| ---------------------- | ------------- | -------- | ------------------------------------------- |
+| `signal`               | `AbortSignal` | required | Run-level cancel signal (from task context) |
+| `idleTimeoutInSeconds` | `number`      | `30`     | Seconds to stay idle between turns          |
+| `timeout`              | `string`      | `"1h"`   | Duration string for suspend timeout         |
+| `maxTurns`             | `number`      | `100`    | Max turns before ending                     |
+
+### ChatTurn
+
+Each turn yielded by the iterator provides:
+
+| Field          | Type             | Description                                            |
+| -------------- | ---------------- | ------------------------------------------------------ |
+| `number`       | `number`         | Turn number (0-indexed)                                |
+| `chatId`       | `string`         | Chat session ID                                        |
+| `trigger`      | `string`         | What triggered this turn                               |
+| `clientData`   | `unknown`        | Client data from the transport                         |
+| `messages`     | `ModelMessage[]` | Full accumulated model messages — pass to `streamText` |
+| `uiMessages`   | `UIMessage[]`    | Full accumulated UI messages — use for persistence     |
+| `signal`       | `AbortSignal`    | Combined stop+cancel signal (fresh each turn)          |
+| `stopped`      | `boolean`        | Whether the user stopped generation this turn          |
+| `continuation` | `boolean`        | Whether this is a continuation run                     |
+
+| Method                       | Description                                                         |
+| ---------------------------- | ------------------------------------------------------------------- |
+| `turn.complete(source)`      | Pipe stream, capture response, accumulate, and signal turn-complete |
+| `turn.done()`                | Just signal turn-complete (when you've piped manually)              |
+| `turn.addResponse(response)` | Add a response to the accumulator manually                          |
+
+### turn.complete() vs manual control
+
+`turn.complete(result)` is the easy path — it handles piping, capturing the response, accumulating messages, cleaning up aborted parts, and writing the turn-complete chunk.
+
+For more control, you can do each step manually:
+
+```ts
+for await (const turn of session) {
+  const result = streamText({
+    model: anthropic("claude-sonnet-4-5"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+    stopWhen: stepCountIs(15),
+  });
+
+  // Manual: pipe and capture separately
+  const response = await chat.pipeAndCapture(result, { signal: turn.signal });
+
+  if (response) {
+    // Custom processing before accumulating
+    await turn.addResponse(response);
+  }
+
+  // Custom persistence, analytics, etc.
+  await db.chat.update({ ... });
+
+  // Must call done() when not using complete()
+  await turn.done();
+}
+```
+
+---
+
+## Raw task with primitives
+
+For full control, use a standard `task()` with the composable primitives from the `chat` namespace. You manage everything: the turn loop, stop signals, message accumulation, and turn-complete signaling.
+
+Raw task mode also lets you call `.toUIMessageStream()` yourself with any options — including `onFinish` and `originalMessages`. This is the right choice when you need complete control over the stream conversion beyond what `chat.setUIMessageStreamOptions()` provides.
+
+### Primitives
+
+| Primitive                       | Description                                                                                 |
+| ------------------------------- | ------------------------------------------------------------------------------------------- |
+| `chat.messages`                 | Input stream for incoming messages — use `.waitWithIdleTimeout()` to wait for the next turn |
+| `chat.createStopSignal()`       | Create a managed stop signal wired to the stop input stream                                 |
+| `chat.pipeAndCapture(result)`   | Pipe a `StreamTextResult` to the chat stream and capture the response                       |
+| `chat.writeTurnComplete()`      | Signal the frontend that the current turn is complete                                       |
+| `chat.MessageAccumulator`       | Accumulates conversation messages across turns                                              |
+| `chat.pipe(stream)`             | Pipe a stream to the frontend (no response capture)                                         |
+| `chat.cleanupAbortedParts(msg)` | Clean up incomplete parts from a stopped response                                           |
+
+### Example
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskWirePayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+export const myChat = task({
+  id: "my-chat-raw",
+  run: async (payload: ChatTaskWirePayload, { signal: runSignal }) => {
+    let currentPayload = payload;
+
+    // Handle preload — wait for the first real message
+    if (currentPayload.trigger === "preload") {
+      const result = await chat.messages.waitWithIdleTimeout({
+        idleTimeoutInSeconds: 60,
+        timeout: "1h",
+        spanName: "waiting for first message",
+      });
+      if (!result.ok) return;
+      currentPayload = result.output;
+    }
+
+    const stop = chat.createStopSignal();
+    const conversation = new chat.MessageAccumulator();
+
+    for (let turn = 0; turn < 100; turn++) {
+      stop.reset();
+
+      const messages = await conversation.addIncoming(
+        currentPayload.messages,
+        currentPayload.trigger,
+        turn
+      );
+
+      const combinedSignal = AbortSignal.any([runSignal, stop.signal]);
+
+      const result = streamText({
+        model: anthropic("claude-sonnet-4-5"),
+        messages,
+        abortSignal: combinedSignal,
+        stopWhen: stepCountIs(15),
+      });
+
+      let response;
+      try {
+        response = await chat.pipeAndCapture(result, { signal: combinedSignal });
+      } catch (error) {
+        if (error instanceof Error && error.name === "AbortError") {
+          if (runSignal.aborted) break;
+          // Stop — fall through to accumulate partial
+        } else {
+          throw error;
+        }
+      }
+
+      if (response) {
+        const cleaned =
+          stop.signal.aborted && !runSignal.aborted ? chat.cleanupAbortedParts(response) : response;
+        await conversation.addResponse(cleaned);
+      }
+
+      if (runSignal.aborted) break;
+
+      // Persist, analytics, etc.
+      await db.chat.update({
+        where: { id: currentPayload.chatId },
+        data: { messages: conversation.uiMessages },
+      });
+
+      await chat.writeTurnComplete();
+
+      // Wait for the next message
+      const next = await chat.messages.waitWithIdleTimeout({
+        idleTimeoutInSeconds: 60,
+        timeout: "1h",
+        spanName: "waiting for next message",
+      });
+      if (!next.ok) break;
+      currentPayload = next.output;
+    }
+
+    stop.cleanup();
+  },
+});
+```
+
+### MessageAccumulator
+
+The `MessageAccumulator` handles the transport protocol automatically:
+
+- Turn 0: replaces messages (full history from frontend)
+- Subsequent turns: appends new messages (frontend only sends the new user message)
+- Regenerate: replaces messages (full history minus last assistant message)
+
+```ts
+const conversation = new chat.MessageAccumulator();
+
+// Returns full accumulated ModelMessage[] for streamText
+const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+// After piping, add the response
+const response = await chat.pipeAndCapture(result);
+if (response) await conversation.addResponse(response);
+
+// Access accumulated messages for persistence
+conversation.uiMessages; // UIMessage[]
+conversation.modelMessages; // ModelMessage[]
+```
diff --git a/docs/ai-chat/background-injection.mdx b/docs/ai-chat/background-injection.mdx
new file mode 100644
index 00000000000..567da627f16
--- /dev/null
+++ b/docs/ai-chat/background-injection.mdx
@@ -0,0 +1,221 @@
+---
+title: "Background injection"
+sidebarTitle: "Background injection"
+description: "Inject context from background work into the agent's conversation — self-review, RAG augmentation, or any async analysis."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## Overview
+
+`chat.inject()` queues model messages for injection into the conversation. Messages are picked up at the start of the next turn or at the next `prepareStep` boundary (between tool-call steps).
+
+This is the backend counterpart to [pending messages](/ai-chat/pending-messages) — pending messages come from the user via the frontend, while `chat.inject()` comes from your task code.
+
+## Basic usage
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+// Queue a system message for injection
+chat.inject([
+  {
+    role: "system",
+    content: "The user's account was just upgraded to Pro.",
+  },
+]);
+```
+
+Messages are appended to the model messages before the next LLM inference call. The LLM sees them as part of the conversation context.
+
+## Common pattern: defer + inject
+
+The most powerful pattern combines `chat.defer()` (background work) with `chat.inject()` (inject results). Background work runs in parallel with the idle wait between turns, and results are injected before the next response.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    // Kick off background analysis — doesn't block the turn
+    chat.defer(
+      (async () => {
+        const analysis = await analyzeConversation(messages);
+        chat.inject([
+          {
+            role: "system",
+            content: `[Analysis of conversation so far]\n\n${analysis}`,
+          },
+        ]);
+      })()
+    );
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+### Timing
+
+1. Turn completes, `onTurnComplete` fires
+2. `chat.defer()` registers the background work
+3. The run immediately starts waiting for the next message (no blocking)
+4. Background work completes, `chat.inject()` queues the messages
+5. User sends next message, turn starts
+6. Injected messages are appended before `run()` executes
+7. The LLM sees the injected context alongside the new user message
+
+If the background work finishes *during* a tool-call loop (not between turns), the messages are picked up at the next `prepareStep` boundary instead.
+
+## Example: self-review
+
+A cheap model reviews the agent's response after each turn and injects coaching for the next one. Uses [Prompts](/ai/prompts) for the review prompt and `generateObject` for structured output.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, generateObject, createProviderRegistry, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+
+const registry = createProviderRegistry({ anthropic });
+
+const selfReviewPrompt = prompts.define({
+  id: "self-review",
+  model: "anthropic:claude-haiku-4-5",
+  content: `You are a conversation quality reviewer. Analyze the assistant's most recent response.
+
+Focus on:
+- Whether the response answered the user's question
+- Missed opportunities to use tools or provide more detail
+- Tone mismatches
+
+Be concise. Only flag issues worth fixing.`,
+});
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    chat.defer(
+      (async () => {
+        const resolved = await selfReviewPrompt.resolve({});
+
+        const review = await generateObject({
+          model: registry.languageModel(resolved.model ?? "anthropic:claude-haiku-4-5"),
+          ...resolved.toAISDKTelemetry(),
+          system: resolved.text,
+          prompt: messages
+            .filter((m) => m.role === "user" || m.role === "assistant")
+            .map((m) => {
+              const text =
+                typeof m.content === "string"
+                  ? m.content
+                  : Array.isArray(m.content)
+                    ? m.content
+                        .filter((p: any) => p.type === "text")
+                        .map((p: any) => p.text)
+                        .join("")
+                    : "";
+              return `${m.role}: ${text}`;
+            })
+            .join("\n\n"),
+          schema: z.object({
+            needsImprovement: z.boolean(),
+            suggestions: z.array(z.string()),
+          }),
+        });
+
+        if (review.object.needsImprovement) {
+          chat.inject([
+            {
+              role: "system",
+              content: `[Self-review]\n\n${review.object.suggestions.map((s) => `- ${s}`).join("\n")}\n\nApply these naturally.`,
+            },
+          ]);
+        }
+      })()
+    );
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+The self-review runs on `claude-haiku-4-5` (fast, cheap) in the background. If the user sends another message before it completes, the coaching is still injected — `chat.inject()` persists across the idle wait.
+
+## Other use cases
+
+- **RAG augmentation**: After each turn, fetch relevant documents and inject them as context for the next response
+- **Safety checks**: Run a moderation model on the response, inject warnings if issues are detected
+- **Fact-checking**: Verify claims in the response using search tools, inject corrections
+- **Context enrichment**: Look up user/account data based on what was discussed, inject it as system context
+
+## `chat.defer` standalone
+
+`chat.defer()` is also useful on its own, without `chat.inject()`. Any work whose timing has no resume implication — analytics, audit logs, search-index writes, cache warming — can run in parallel with streaming instead of in the critical path. All deferred promises are awaited (with a 5s timeout) before `onTurnComplete` fires.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnStart: async ({ chatId, runId }) => {
+    // Analytics — fire-and-forget, irrelevant to resume.
+    chat.defer(analytics.track("turn_started", { chatId, runId }));
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+`chat.defer()` can be called from anywhere during a turn — hooks, `run()`, or nested helpers. All deferred promises are collected and awaited together before `onTurnComplete`.
+
+<Warning>
+**Don't use `chat.defer()` for the message-history write in `onTurnStart`.** That write must land *before* the model starts streaming, otherwise a mid-stream page refresh will read `[]` from your DB and lose the user's message from the rendered conversation. See [Database persistence — `onTurnStart`](/ai-chat/patterns/database-persistence#onturnstart). Reserve `chat.defer` for writes whose timing has no resume implication.
+</Warning>
+
+## How it differs from pending messages
+
+| | `chat.inject()` | [Pending messages](/ai-chat/pending-messages) |
+|---|---|---|
+| **Source** | Backend task code | Frontend user input |
+| **Triggered by** | Your code (e.g. `onTurnComplete` + `chat.defer()`) | User sending a message during streaming |
+| **Injection point** | Start of next turn, or next `prepareStep` boundary | Next `prepareStep` boundary only |
+| **Message role** | Any (`system`, `user`, `assistant`) | Typically `user` |
+| **Frontend visibility** | Not visible unless you write custom `data-*` chunks | Visible via `usePendingMessages` hook |
+
+## API reference
+
+### chat.inject()
+
+```ts
+chat.inject(messages: ModelMessage[]): void
+```
+
+Queue model messages for injection at the next opportunity. Messages persist across the idle wait between turns — they are not reset when a new turn starts.
+
+**Parameters:**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `messages` | `ModelMessage[]` | Model messages to inject (from the `ai` package) |
+
+Messages are drained (consumed) when:
+1. A new turn starts — before `run()` executes
+2. A `prepareStep` boundary is reached — between tool-call steps during streaming
+
+<Note>
+  `chat.inject()` writes to an in-memory queue in the current process. It works from any code running in the same task — lifecycle hooks, deferred work, tool execute functions, etc. It does not work from subtasks or other runs.
+</Note>
diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
new file mode 100644
index 00000000000..a972ca368ac
--- /dev/null
+++ b/docs/ai-chat/changelog.mdx
@@ -0,0 +1,690 @@
+---
+title: "Changelog"
+sidebarTitle: "Changelog"
+description: "Pre-release updates for AI chat agents."
+---
+
+<Update label="May 21, 2026" description="4.5.0-rc.1" tags={["SDK", "Bug fix"]}>
+
+## v4.5.0-rc.1 — two bug fixes
+
+Patch release on top of `4.5.0-rc.0`. Upgrade with:
+
+```sh
+npx trigger.dev@4.5.0-rc.1 update              # npm
+pnpm dlx trigger.dev@4.5.0-rc.1 update         # pnpm
+yarn dlx trigger.dev@4.5.0-rc.1 update         # yarn
+bunx trigger.dev@4.5.0-rc.1 update             # bun
+```
+
+### Fixes
+
+- **Agent Skills silently missing in `trigger dev`** for projects whose task files read `process.env` at module top level (e.g. a third-party SDK client initialized at import). [Skill folders](/ai-chat/patterns/skills) now bundle into `.trigger/skills/` reliably regardless of which env vars are set when the CLI launches. ([#3690](https://github.com/triggerdotdev/trigger.dev/pull/3690))
+- **`COULD_NOT_FIND_EXECUTOR`** when a task's definition is loaded via `await import(...)` from inside another task's `run()` — common when lazy-loading sub-agent tasks. Runtime workers now register such tasks with a sentinel file context, and the catalog logs a one-time warning per task id. ([#3688](https://github.com/triggerdotdev/trigger.dev/pull/3688))
+
+</Update>
+
+<Update label="May 21, 2026" description="4.5.0-rc.0" tags={["SDK", "Release"]}>
+
+## v4.5.0-rc.0 — AI Agents graduate from chat-prerelease
+
+First release candidate of v4.5. Everything covered by the `0.0.0-chat-prerelease-*` entries below now ships under a stable semver tag. Install:
+
+```bash
+pnpm add @trigger.dev/sdk@rc
+```
+
+(Or pin `4.5.0-rc.0` explicitly.)
+
+### What's in the box
+
+- **`chat.agent`** — multi-turn AI chat backends as durable Trigger.dev tasks. Lifecycle hooks, recovery from cancel/crash/OOM, version upgrades, all in. See [Overview](/ai-chat/overview) and [Quick Start](/ai-chat/quick-start).
+- **Sessions** — the durable bi-directional stream primitive that backs `chat.agent`. Use it directly for any pattern that needs durable bi-directional streaming across runs. See [Sessions](/ai-chat/sessions).
+- **`useTriggerChatTransport`** — a custom AI SDK `ChatTransport` for `useChat`. No API routes. See [Frontend](/ai-chat/frontend).
+- **Head Start** — opt-in route handler that runs the first `streamText` step in your warm server while the agent boots in parallel. Cuts cold-start TTFC roughly in half. See [Fast starts](/ai-chat/fast-starts#head-start).
+- **AI Prompts** — code-defined, deploy-versioned templates with dashboard overrides for text + model. Integrates with `chat.agent` via `chat.prompt.set()` + `chat.toStreamTextOptions()`. See [Prompts](/ai/prompts).
+- **`ai.toolExecute`** — wire any Trigger subtask in as the `execute` of an AI SDK `tool()`. See [Sub-agents](/ai-chat/patterns/sub-agents).
+
+### Compatibility
+
+`@trigger.dev/sdk@4.5.0-rc.0` requires `ai` `^5.0.0 || ^6.0.0` (Vercel AI SDK), React `^18.0 || ^19.0` (for the `chat/react` subpath), and Node.js `>=18.20.0`. Full matrix on the [API Reference](/ai-chat/reference#compatibility).
+
+### Docs
+
+This release ships with a refreshed AI Agents documentation set covering [Backend](/ai-chat/backend), [Frontend](/ai-chat/frontend), [Sessions](/ai-chat/sessions), [Lifecycle hooks](/ai-chat/lifecycle-hooks), [`chat.local`](/ai-chat/chat-local), the [Patterns](/ai-chat/patterns/sub-agents) library, [Testing](/ai-chat/testing), and a full [API Reference](/ai-chat/reference).
+
+</Update>
+
+<Update label="May 19, 2026" description="0.0.0-chat-prerelease-20260520150857" tags={["SDK"]}>
+
+## Recovery boot — context-preserving continuation after cancel / crash / OOM
+
+When a `chat.agent` run dies mid-stream (the user cancels, the worker OOMs, an unhandled exception kills the process), the next continuation run now reconstructs the conversation context automatically. Follow-ups like "keep going" continue the partial response; fresh follow-ups like "scrap that, what's 7+8?" abandon it and answer the new question. No customer code required.
+
+Under the hood: the boot now reads BOTH stream tails — `session.out` for any partial assistant the dead run was streaming, `session.in` for any user messages it never acknowledged — and splices `[firstInFlightUser, partialAssistant]` onto the chain when both are present. The model sees full prior context plus the latest user message.
+
+For policies different from "preserve context" — drop the partial entirely, synthesize tool results for an interrupted tool call, emit a recovery banner to the UI — register the new `onRecoveryBoot` hook:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  onRecoveryBoot: async ({ partialAssistant, inFlightUsers, writer, cause, previousRunId }) => {
+    writer.write({
+      type: "data-chat-recovery",
+      data: { cause, previousRunId, partialPresent: partialAssistant !== undefined },
+      transient: true,
+    });
+    // return nothing → smart default applies
+  },
+  run: async ({ messages, signal }) => streamText({ model, messages, abortSignal: signal }),
+});
+```
+
+The hook receives `settledMessages`, `inFlightUsers`, `partialAssistant`, `pendingToolCalls`, `previousRunId`, `cause`, and a lazy `writer`. Return any of `chain`, `recoveredTurns`, or `beforeBoot` to override the default. Agents using `hydrateMessages` skip the hook — customer-owned persistence is the source of truth.
+
+Also retracts the OOM resilience caveat: model context on retry is no longer "incomplete" without `hydrateMessages`. The smart default reconstructs full context from `session.out` replay.
+
+See [Recovery boot](/ai-chat/patterns/recovery-boot) for the full guide.
+
+</Update>
+
+<Update label="May 16, 2026" description="0.0.0-chat-prerelease-20260519091352" tags={["SDK", "Breaking"]}>
+
+## `session.out` is now bounded — header-form control records + per-turn trim
+
+Long-lived chats were accumulating `session.out` records forever (every turn appends; nothing trimmed). The Sessions dashboard re-streamed the entire history from `seq_num=0` on every page load, and OOM-retry boot scanned the whole stream to find the last turn-complete.
+
+After this release `session.out` stays roughly **one turn long forever** at steady state. After each `turn-complete`, the agent appends an S2 `trim` command record pointing back to the previous turn-complete's seq_num. Full conversation history continues to live in the durable S3 snapshot, not on the stream. Resume across a single turn boundary still works (the previous `turn-complete` is still on the stream and S2's eventually-consistent trim window gives 10-60s of grace); resume across multiple turns of inactivity falls back to the snapshot.
+
+### What changed on the wire
+
+`trigger:turn-complete` and `trigger:upgrade-required` are no longer JSON data chunks on `session.out`. They're now **header-form control records** under a uniform `trigger-control` namespace:
+
+```
+headers:
+  ["trigger-control", "turn-complete"]
+  ["public-access-token", "eyJ..."]   // optional, refreshed JWT on turn-complete
+body: ""
+```
+
+```
+headers:
+  ["trigger-control", "upgrade-required"]
+body: ""
+```
+
+The control event names ("turn-complete", "upgrade-required") are unchanged conceptually — they just moved from `chunk.type` into a `trigger-control` header value. Body is always empty; metadata that previously rode in the chunk (e.g. `publicAccessToken`) now rides on sibling headers.
+
+`turn-complete` also picks up a new optional sibling header — `["session-in-event-id", "<seq>"]` — carrying the agent's committed-consume cursor on `.in` as of this turn. It's an agent-internal contract that lets the next worker boot seed its `.in` SSE subscription past already-processed user messages, without relying on a wall-clock-derived dedup cutoff. Custom transports should ignore the header; it has no client-side meaning.
+
+### Custom transport implementers
+
+Built-in SDK transports (`TriggerChatTransport`, `AgentChat`) handle this transparently — `onTurnComplete` fires the same way with the same payload. Custom transports filtering on `chunk.type === "trigger:turn-complete"` need to switch to the header-based filter:
+
+```ts
+import { controlSubtype } from "@trigger.dev/core/v3";
+
+const control = controlSubtype(record.headers);
+if (control === "turn-complete") {
+  // refresh token from record.headers, end turn, etc.
+}
+```
+
+The full uniform filter rule (data records vs control records vs S2 command records like `trim`) is documented at [Records on `session.out`](/ai-chat/client-protocol#records-on-session-out).
+
+### Sessions dashboard snapshot read
+
+The Sessions detail page in the trigger.dev dashboard now reads the agent's S3 snapshot first via a presigned URL, then SSE-tails from `snapshot.lastOutEventId`. Bandwidth and time-to-first-render are O(unread turns) instead of O(session lifetime). Sessions that registered a `hydrateMessages` hook (which skips snapshot writes) show only the most recent turn — those customers typically have their own DB-backed dashboards.
+
+### Breaking surface
+
+- Custom transports parsing `chunk.type` for turn-complete / upgrade-required must switch to the `trigger-control` header check.
+- Snapshot consumers should import `ChatSnapshotV1` / `ChatSnapshotV1Schema` from `@trigger.dev/core/v3` (now an exported shape, not SDK-internal).
+
+Hard cutover — no compat shim. v4.5 is prerelease.
+
+### Docs
+
+- [Records on `session.out`](/ai-chat/client-protocol#records-on-session-out) — full filter rule for data / control / command records.
+- [Resuming a stream](/ai-chat/client-protocol#resuming-a-stream) — explicit single-turn vs multi-turn-away semantics.
+- [`turn-complete` control record](/ai-chat/client-protocol#turn-complete-control-record) and [`upgrade-required` control record](/ai-chat/client-protocol#upgrade-required-control-record) — replaced the old chunk-shape docs.
+
+</Update>
+
+<Update label="May 8, 2026" description="0.0.0-chat-prerelease-20260519091352" tags={["SDK", "Breaking"]}>
+
+## 512 KiB `/in/append` ceiling removed for long chats — slim wire + S3 snapshot
+
+`chat.agent` long-running chats with heavy tool results were hitting the realtime API's 512 KiB body cap on `/realtime/v1/sessions/{id}/in/append` once the accumulated `UIMessage[]` history (which the wire shipped in full on every send) crossed the limit. The 413 surfaced as a CORS error in browsers and stalled chats around turn 10–30 with tool use.
+
+The wire is now **delta-only**: each `.in/append` carries at most one new `UIMessage` (the new user turn or a tool-approval response) instead of the full history. The agent rebuilds prior history at run boot from a durable JSON snapshot in object storage plus a replay of the `session.out` tail. The 512 KiB ceiling stops being pressure — slim payloads are normally a few KB regardless of chat length.
+
+```ts
+// Before — full history shipped on every send
+{ messages: [u1, a1, u2, a2, /* ... 30 turns ... */, u31], chatId, trigger: "submit-message" }
+
+// After — only the new turn
+{ message: u31, chatId, trigger: "submit-message" }
+```
+
+### What changed
+
+- **`ChatTaskWirePayload`**: `messages: UIMessage[]` is removed. Replaced by `message?: UIMessage` (singular, optional) and a dedicated `headStartMessages?: UIMessage[]` field used only by `chat.headStart` first-turn handover.
+- **Run boot**: when `hydrateMessages` is not registered, the runtime reads `packets/{projectRef}/{envSlug}/sessions/{sessionId}/snapshot.json` from object storage and replays any `session.out` chunks landed since the snapshot's cursor. Snapshot writes happen after every `onTurnComplete`, awaited so they survive an idle suspend.
+- **`hydrateMessages` short-circuit**: registering the hook skips snapshot read/write and replay entirely. Customer is the source of truth for history, same as today.
+- **`hydrateMessages.incomingMessages`**: now consistently 0-or-1-length across every trigger type. Previously `regenerate-message` and continuations occasionally shipped full history; they now ship none.
+- **`onChatStart` is now once-per-chat**: fires only on the chat's very first user message; does NOT fire on continuation runs (post-`endRun`, post-waitpoint-timeout, post-`chat.requestUpgrade`) or on OOM-retry attempts. The `continuation` and `previousRunId` fields on `ChatStartEvent` are now `@deprecated` (always `false` / `undefined` when the hook fires). Drop any `if (continuation) return;` gates from `onChatStart` — they're now unreachable. For per-turn setup that runs on continuations too, move to `onTurnStart`.
+- **Continuation boot payload**: the server now strips `message` / `messages` / `trigger` from the cached `basePayload` on continuation runs, and the SDK enters a new continuation-wait branch that waits silently on `session.in` for the next user message. Fixes a phantom-turn bug where stale boot-payload fields were replayed on every resume.
+- **OOM-retry boot**: uses the snapshot's `lastOutTimestamp` as the `session.in` cutoff, saving one stream subscription per retry.
+- **Built-in transports**: `TriggerChatTransport`, `AgentChat`, mid-stream pending-message handling, and `chat.headStart` route handler all updated to the slim shape. Existing customer code calling `transport.sendMessage(...)` / `agentChat.sendMessage(...)` is unaffected — the change is below those surfaces.
+
+### Object store configuration
+
+Snapshot read/write reuses Trigger.dev's existing object-store infrastructure — the same presigned-URL routes used for large payloads. Set `OBJECT_STORE_*` env vars on your webapp deployment if you haven't already; MinIO works locally via `OBJECT_STORE_DEFAULT_PROTOCOL`.
+
+If no object store is configured **and** no `hydrateMessages` hook is registered, conversations don't survive run boundaries (the runtime logs a warning at registration time). Either configure an object store or register `hydrateMessages`.
+
+### Breaking surface
+
+- **Custom transports**: any code constructing `ChatTaskWirePayload` directly must drop `messages` and use `message`. See the rewritten [Client Protocol](/ai-chat/client-protocol).
+- **Client-side `setMessages` no longer round-trips**: full-history mutations on the client never reached the agent before this release either, but the slim wire makes that explicit. Use server-side [`chat.history.set()`](/ai-chat/backend#chat-history) inside `onTurnStart` for compaction.
+- **Custom server-to-server senders**: code calling `apiClient.appendToSessionInput(sessionId, ...)` or hitting `/realtime/v1/sessions/{id}/in/append` directly must switch to the slim shape.
+
+Hard cutover — there is no compat shim. v4.5 is prerelease.
+
+### Docs
+
+- Rewritten [Client Protocol](/ai-chat/client-protocol) — slim payload, new `headStartMessages` field, new "How history is rebuilt" and "Head-start protocol caveat" sections.
+- New [Persistence and replay](/ai-chat/patterns/persistence-and-replay) — end-to-end walkthrough of the snapshot model, OOM-retry interaction, crash semantics, `hydrateMessages` short-circuit.
+- New [Tool result auditing](/ai-chat/patterns/tool-result-auditing) — the `extractNewToolResults` + `onTurnComplete` / `hydrateMessages` pattern for HITL audit logging.
+- [v4.5 section of the upgrade guide](/ai-chat/upgrade-guide#v45-wire-format-change) — migration steps for custom transports and `hydrateMessages` consumers.
+- [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages), [`onChatStart`](/ai-chat/lifecycle-hooks#onchatstart) — clarifications on the new `incomingMessages` and `messages` shapes.
+
+</Update>
+
+<Update label="May 7, 2026" description="0.0.0-chat-prerelease-20260507131256" tags={["SDK"]}>
+
+## `chat.history` read primitives for HITL flows
+
+Customers building human-in-the-loop tools were re-implementing the same accumulator-walking logic to figure out which tool calls were pending, which were resolved, and which results in an incoming wire message were actually new. Lifted into the SDK as five new methods on `chat.history`:
+
+| Method                                        | Description                                                                                                                                                                         |
+| --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `chat.history.getPendingToolCalls()`          | Tool calls on the most recent assistant message in `input-available` state — gates fresh user turns during HITL.                                                                    |
+| `chat.history.getResolvedToolCalls()`         | All tool calls in the chain in `output-available` or `output-error` state.                                                                                                          |
+| `chat.history.extractNewToolResults(message)` | Tool results in `message` whose `toolCallId` is not already resolved on the chain. Most useful in `hydrateMessages` against an incoming wire message, before the runtime merges it. |
+| `chat.history.getChain()`                     | Same as `chat.history.all()` — alias that reads better alongside parent-aware APIs.                                                                                                 |
+| `chat.history.findMessage(messageId)`         | Direct lookup; `undefined` if absent.                                                                                                                                               |
+
+```ts
+// Refuse a regenerate while a tool call is awaiting an answer
+onAction: async ({ action }) => {
+  if (action.type === "regenerate") {
+    if (chat.history.getPendingToolCalls().length > 0) return;
+    chat.history.slice(0, -1);
+  }
+},
+
+// Side-effect once per net-new tool result on incoming wire messages
+hydrateMessages: async ({ incomingMessages }) => {
+  for (const msg of incomingMessages) {
+    for (const r of chat.history.extractNewToolResults(msg)) {
+      await auditLog.record({ id: r.toolCallId, output: r.output, errorText: r.errorText });
+    }
+  }
+  return incomingMessages;
+},
+```
+
+See [`chat.history`](/ai-chat/backend#chat-history) and [Human-in-the-loop](/ai-chat/patterns/human-in-the-loop).
+
+## Fix: HITL `addToolOutput` resume preserves the assistant message id
+
+In some HITL flows the AI SDK regenerated the assistant message id when the user's `addToolOutput` answer round-tripped back to the agent. The fresh id slipped past the runtime's id-based merge, leaving the resolved tool answer attached to a sibling assistant message instead of the head, which broke downstream dedup and rendered the tool answer twice.
+
+The runtime now records `toolCallId → head messageId` whenever an assistant with tool parts lands in the accumulator and rewrites the incoming id back via that map before the merge. Customers who had a content-match workaround for this can drop it.
+
+</Update>
+
+<Update label="May 6, 2026" description="0.0.0-chat-prerelease-20260506093419" tags={["SDK", "Breaking"]}>
+
+## `chat.agent` actions are no longer turns
+
+Submitting an action via `transport.sendAction()` previously fell through to the regular turn machinery, calling `onTurnStart`, `run()`, `onTurnComplete`, etc. — meaning every action fired an LLM call by default. The workaround was a `chat.local`-based `skipModelCall` flag read in `run()`.
+
+Actions now fire `hydrateMessages` and `onAction` only. No `onTurnStart` / `prepareMessages` / `onBeforeTurnComplete` / `onTurnComplete`, no `run()` invocation, no turn-counter increment. The trace span is named `chat action` instead of `chat turn N`.
+
+`onAction`'s return type widens: returning `void` is side-effect-only (default); returning a `StreamTextResult`, `string`, or `UIMessage` produces a model response that's auto-piped back to the frontend.
+
+### Migration
+
+If you had `run()` branching on `payload.trigger === "action"` for a model response, return your `streamText(...)` from `onAction` instead. If you persisted in `onTurnComplete`, do that work inside `onAction`. For state-only actions, just remove the skip-the-model workaround.
+
+```ts
+// before
+onAction: async ({ action }) => {
+  if (action.type === "regenerate") {
+    runState.skipModelCall = false;
+    chat.history.slice(0, -1);
+  }
+},
+run: async ({ messages, signal }) => {
+  if (runState.skipModelCall) return;
+  return streamText({ model, messages, abortSignal: signal });
+},
+
+// after
+onAction: async ({ action, messages, signal }) => {
+  if (action.type === "regenerate") {
+    chat.history.slice(0, -1);
+    return streamText({ model, messages, abortSignal: signal });
+  }
+},
+run: async ({ messages, signal }) =>
+  streamText({ model, messages, abortSignal: signal }),
+```
+
+Actions arriving when no `onAction` handler is configured now `console.warn` once and are ignored — previously they silently fell through to `run()` with an empty wire payload.
+
+</Update>
+
+<Update label="May 5, 2026" description="0.0.0-chat-prerelease-20260505140031" tags={["SDK"]}>
+
+## Fix: duplicate turn after `chat.agent` idle-suspends
+
+Every message sent to a `chat.agent` after the run idle-suspended produced two turns on the agent side instead of one — same user message, two LLM calls. Internal session-stream reconnect logic was racing the waitpoint and feeding the just-consumed message back into the next turn's input buffer. No public API change.
+
+</Update>
+
+<Update label="May 5, 2026" description="0.0.0-chat-prerelease-20260505084711" tags={["SDK"]}>
+
+## `chat.headStart` — fast first-turn for chat.agent
+
+A new opt-in flow that cuts first-turn TTFC roughly in half by running step 1's LLM call in your warm process while the chat.agent run boots in parallel. On the LLM's `tool-calls` boundary, ownership of the durable stream hands over to the agent for tool execution and step 2+. Pure-text first turns finish on the customer side with no LLM call from the trigger run at all.
+
+Measured on `claude-sonnet-4-6` (same model both sides): TTFT 2801ms → 1218ms (−57%), total turn 4180ms → 2345ms (−44%). With Head Start, first-text time is essentially the LLM TTFB floor.
+
+### Setup
+
+```ts app/api/chat/route.ts
+import { chat } from "@trigger.dev/sdk/chat-server";
+import { streamText } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { headStartTools } from "@/lib/chat-tools/schemas";
+
+export const POST = chat.headStart({
+  agentId: "my-chat",
+  run: async ({ chat: helper }) =>
+    streamText({
+      ...helper.toStreamTextOptions({ tools: headStartTools }),
+      model: anthropic("claude-sonnet-4-6"),
+      system: "You are a helpful assistant.",
+    }),
+});
+```
+
+```tsx components/chat.tsx
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
+  headStart: "/api/chat",
+});
+```
+
+### Bundle isolation
+
+Tool schemas (`description` + `inputSchema`) live in their own module that imports only `ai` and `zod`. The agent task imports those schemas and adds heavy `execute` fns. The route handler imports schemas only — keeping the warm-process bundle light is what makes the win possible. Runtime "strip executes" helpers don't solve this — bundlers resolve imports at build time. See [Fast starts → Head Start setup](/ai-chat/fast-starts#setup) for the full split.
+
+### Compared to Preload
+
+Preload eagerly triggers the run on page load (good when you're confident the user _will_ send a message — trades idle compute for fast TTFC). Head Start gates the run on a real first message — no idle compute, customer's process runs step 1 directly. Pick one per chat.
+
+### Works on every runtime
+
+`chat.headStart` returns a standard Web Fetch handler — `(req: Request) => Promise<Response>` — so it slots into Next.js App Router, Hono, SvelteKit, Remix / React Router v7, TanStack Start, Astro, Nitro/Nuxt, Elysia, Cloudflare Workers, Bun, Deno, and any other runtime that speaks Web Fetch. Verified runtimes: Node 18+, Bun, Deno, Workers, Vercel (Node and Edge), Netlify (Functions and Edge).
+
+For Node-only frameworks (Express, Fastify, Koa, raw `node:http`), the SDK ships `chat.toNodeListener(handler)` — converts any Web Fetch handler into a Node `(req, res)` listener with proper streaming, header translation, and client-disconnect propagation.
+
+```ts
+import express from "express";
+import { chat } from "@trigger.dev/sdk/chat-server";
+
+const handler = chat.headStart({ agentId: "my-chat", run: ... });
+
+const app = express();
+app.post("/api/chat", chat.toNodeListener(handler));
+```
+
+## Docs
+
+- New [Head Start guide](/ai-chat/fast-starts#head-start) — bundle isolation, schema/execute split, route handler setup, transport option, lifecycle, limitations.
+- [Reference](/ai-chat/reference#triggerchattransport-options) — `headStart` transport option.
+
+</Update>
+
+<Update label="May 2, 2026" description="0.0.0-chat-prerelease-20260502065709" tags={["SDK"]}>
+
+## Resilient SSE reconnection
+
+The chat transport now retries indefinitely on network drops with bounded exponential backoff (100ms initial, 5s cap, 50% jitter) instead of giving up after 5 attempts. Reconnects are immediate on `online`, on tab refocus after a long background, and on Safari bfcache restore (`pageshow` with `event.persisted`).
+
+A 60s stall detector catches silent-dead-socket cases on mobile where the OS killed the TCP socket without the reader noticing. A 30s per-attempt fetch timeout prevents stuck connections from blocking the retry loop.
+
+Resume continues to use `Last-Event-ID`, so no chunks are lost when the connection comes back. No public API change — these are defaults on `TriggerChatTransport`. Customers who built `hasActiveStream` / `isStreaming` flag tracking on their side can drop it: the transport handles the silent-but-stale case internally now.
+
+`SSEStreamSubscription` (used by `TriggerChatTransport` and `AgentChat`) gained `retryNow()` and `forceReconnect()` for callers writing custom transports, plus options to tune `maxRetries` / `retryDelayMs` / `maxRetryDelayMs` / `retryJitter` / `fetchTimeoutMs` / `stallTimeoutMs` / `nonRetryableStatuses`. `404` and `410` short-circuit retry by default (stream gone / session closed).
+
+</Update>
+
+<Update label="April 24, 2026" description="0.0.0-chat-prerelease-20260501122331" tags={["SDK", "Platform"]}>
+
+## `chat.agent` now runs on Sessions
+
+Every chat is backed by a durable Session row that outlives any single run. `externalId` = your chat ID, `type` = `"chat.agent"`. Under the hood:
+
+- Output chunks stream on `session.out` (was a run-scoped `streams.writer("chat")`).
+- Client messages and stops land on `session.in` as a [`ChatInputChunk`](/ai-chat/reference#chatinputchunk) tagged union (was two run-scoped `streams.input` definitions).
+- Wire endpoints moved from `/realtime/v1/streams/{runId}/...` to `/realtime/v1/sessions/{sessionId}/...`. See the rewritten [Client Protocol](/ai-chat/client-protocol).
+
+Public surface (`chat.agent()`, `TriggerChatTransport`, `AgentChat`, `chat.stream` / `chat.messages` / `chat.stopSignal`) is unchanged — existing apps keep working. What's new is:
+
+- **Cross-run resume is free.** A chat you were in yesterday resumes against the same `sessionId` today, even if the original run long since exited. No more lost conversations when a run idle-times-out.
+- **Inbox views via `sessions.list({type: "chat.agent"})`.** Enumerate every chat in your environment, filter by tag or status.
+- **`TriggerChatTaskResult.sessionId`** + **`ChatTaskRunPayload.sessionId`** — you can reach into the raw session via `sessions.open(payload.sessionId)` for advanced cases (writing from a sub-agent, custom transport).
+- **Dashboard Agent tab** resolves via `sessionId` and stays in sync with the live stream across runs.
+
+The full wire-level protocol (session create, channel routes, JWT scopes) is documented in [Client Protocol](/ai-chat/client-protocol).
+
+## `X-Session-Settled` — fast reconnect on idle chats
+
+When a client reconnects to `session.out` and the tail record is a `trigger:turn-complete` marker (agent finished a turn, idle-waiting or exited), the server sets `X-Session-Settled: true` and uses `wait=0` on the underlying S2 read. The SSE drains any remaining records then closes in ~1s instead of long-polling for 60s.
+
+Practical impact: `TriggerChatTransport.reconnectToStream` no longer needs a client-side `isStreaming` flag. You can drop the field from your persisted `ChatSession` state entirely — the server decides. Existing callers that still persist `isStreaming` are unaffected; `reconnectToStream` keeps the fast-path short-circuit when it's `false`.
+
+## Migration
+
+See the [Sessions Upgrade Guide](/ai-chat/upgrade-guide) for the full step-by-step — auth callback split, persisted `ChatSession` shape, server-side helpers (`chat.createStartSessionAction`, `chat.createAccessToken` for renewal), and the `clientData` validation pivot.
+
+## Docs
+
+- Rewritten [Client Protocol](/ai-chat/client-protocol) — full wire format for the new `/realtime/v1/sessions/{sessionId}/...` endpoints, JWT scopes, S2 direct-write credentials, and `Last-Event-ID` resume.
+- [Database persistence pattern](/ai-chat/patterns/database-persistence) — new `chatId`-keyed `ChatSession` shape (no more `runId`) and a warning on the `onTurnComplete` race that requires a single atomic write of `messages` + `lastEventId`.
+- [Reference](/ai-chat/reference) — added `chat.createStartSessionAction`, `chat.createAccessToken`, `ChatInputChunk`, `TriggerChatTaskResult.sessionId`, `ChatTaskRunPayload.sessionId`. The old run-scoped stream-ID constants are gone.
+- Refreshed [Backend](/ai-chat/backend), [Frontend](/ai-chat/frontend), [Server Chat](/ai-chat/server-chat), [Quick start](/ai-chat/quick-start), [Overview](/ai-chat/overview), [Types](/ai-chat/types), [Error handling](/ai-chat/error-handling), and [Testing](/ai-chat/testing) for the session-based wiring.
+
+</Update>
+
+<Update label="April 19, 2026" description="0.0.0-chat-prerelease-20260419173457" tags={["SDK", "CLI"]}>
+
+## Agent Skills
+
+Ship reusable capabilities as folders — a `SKILL.md` plus optional scripts, references, and assets. The agent sees short descriptions in its system prompt, loads full instructions on demand via `loadSkill`, and invokes bundled scripts via `bash` — no manual wiring.
+
+`skills.define({ id, path })` registers the skill; the CLI bundles the folder into the deploy image. `chat.skills.set([...])` activates skills for the run; `chat.toStreamTextOptions()` auto-injects the preamble and tools.
+
+See the new [Agent Skills guide](/ai-chat/patterns/skills).
+
+</Update>
+
+<Update label="April 18, 2026" description="0.0.0-chat-prerelease-20260418174118" tags={["SDK"]}>
+
+## `chat.endRun()` — exit on your own terms
+
+New imperative API to exit the loop after the current turn completes, without the upgrade-required signal that `chat.requestUpgrade()` sends. Use for one-shot agents, budget-exhausted exits, or goal-reached completions.
+
+```ts
+chat.agent({
+  id: "one-shot",
+  run: async ({ messages, signal }) => {
+    chat.endRun();
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+The current turn streams normally, `onBeforeTurnComplete` / `onTurnComplete` fire, the turn-complete chunk is written, and the run exits instead of suspending. Callable from `run()`, `chat.defer()`, `onBeforeTurnComplete`, or `onTurnComplete`. See [Ending a run on your terms](/ai-chat/backend#ending-a-run-on-your-terms).
+
+## `finishReason` on turn-complete events
+
+`TurnCompleteEvent` and `BeforeTurnCompleteEvent` now include the AI SDK's `finishReason` (`"stop" | "tool-calls" | "length" | "content-filter" | "error" | "other"`). Clean signal for distinguishing a normal turn end from one paused on a pending tool call (HITL flows like `ask_user`):
+
+```ts
+onTurnComplete: async ({ finishReason, responseMessage }) => {
+  if (finishReason === "tool-calls") {
+    // Paused — assistant message has a pending tool call waiting for user input
+    await persistCheckpoint(responseMessage);
+  } else {
+    await persistCompleted(responseMessage);
+  }
+};
+```
+
+Undefined for manual `chat.pipe()` flows or aborted streams. See the new [Human-in-the-loop pattern](/ai-chat/patterns/human-in-the-loop).
+
+## User-initiated compaction pattern
+
+The [Compaction guide](/ai-chat/compaction) now covers how to wire a "Summarize conversation" button or `/compact` slash command via `actionSchema` + `onAction`. The agent summarizes on demand, rewrites history with `chat.history.set()`, and short-circuits the LLM call for action turns.
+
+Needed a small type fix for this: `ChatTaskPayload.trigger` now correctly includes `"action"`, so `run()` handlers can short-circuit with `if (trigger === "action") return` when an action doesn't need a response.
+
+## Human-in-the-loop pattern page
+
+New [Human-in-the-loop](/ai-chat/patterns/human-in-the-loop) page walks through `ask_user`-style mid-turn user input end-to-end: defining a no-execute tool, rendering pending tool calls on the frontend with `addToolOutput` + `sendAutomaticallyWhen`, detecting paused turns via `finishReason`, and two persistence strategies (overwrite vs. checkpoint nodes).
+
+</Update>
+
+<Update label="April 18, 2026" description="0.0.0-chat-prerelease-20260418083610" tags={["SDK"]}>
+
+## Offline test harness for `chat.agent`
+
+`@trigger.dev/sdk/ai/test` now ships `mockChatAgent`, a harness that drives a `chat.agent` definition through real turns without network or task runtime. Send messages, actions, and stop signals; inspect emitted chunks; assert on hook order.
+
+```ts
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+import { MockLanguageModelV3 } from "ai/test";
+import { myAgent } from "./my-agent";
+
+const harness = mockChatAgent(myAgent, {
+  chatId: "test-1",
+  clientData: {
+    model: new MockLanguageModelV3({
+      /* ... */
+    }),
+  },
+});
+
+const turn = await harness.sendMessage({
+  id: "u1",
+  role: "user",
+  parts: [{ type: "text", text: "hi" }],
+});
+expect(turn.chunks).toContainEqual(expect.objectContaining({ type: "text-delta", delta: "hello" }));
+await harness.close();
+```
+
+### Dependency injection via locals
+
+`setupLocals` pre-seeds `locals` before `run()` starts — the pattern for injecting database clients, service stubs, and other server-side dependencies that shouldn't leak through untrusted `clientData`:
+
+```ts
+import { dbKey } from "./db";
+
+const harness = mockChatAgent(agent, {
+  chatId: "test-1",
+  setupLocals: ({ set }) => {
+    set(dbKey, testDb);
+  },
+});
+```
+
+Hooks then read the seeded value with `locals.get(dbKey)`. Falls through to the production client in real runs.
+
+See [Testing](/ai-chat/testing).
+
+## `runInMockTaskContext` — lower-level test harness
+
+`@trigger.dev/core/v3/test` now exports `runInMockTaskContext` for unit-testing any task code offline (not just chat agents). Installs in-memory managers for `locals`, `lifecycleHooks`, `runtime`, `inputStreams`, and `realtimeStreams`, plus a mock `TaskContext`. Drivers let you push data into input streams and inspect chunks written to output streams.
+
+</Update>
+
+<Update label="April 17, 2026" description="0.0.0-chat-prerelease-20260417152143" tags={["SDK"]}>
+
+## Multi-tab coordination
+
+Prevent duplicate messages when the same chat is open in multiple browser tabs. Enable with `multiTab: true` on the transport.
+
+```tsx
+const transport = useTriggerChatTransport({ task: "my-chat", multiTab: true, accessToken });
+const { messages, setMessages } = useChat({ id: chatId, transport });
+const { isReadOnly } = useMultiTabChat(transport, chatId, messages, setMessages);
+```
+
+Only one tab can send at a time. Other tabs enter read-only mode with real-time message updates via `BroadcastChannel`. When the active tab's turn completes, any tab can send next. Crashed tabs are detected via heartbeat timeout (10s).
+
+See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination) and [`useMultiTabChat`](/ai-chat/reference#usemultitabchat).
+
+## Error stack truncation
+
+Large error stacks no longer OOM the worker process. Stacks are capped at 50 frames (top 5 + bottom 45), individual lines at 1024 chars, messages at 1000 chars. Applied in `parseError`, `sanitizeError`, and OTel span recording.
+
+</Update>
+
+<Update label="April 15, 2026" description="0.0.0-chat-prerelease-20260415164455" tags={["SDK"]}>
+
+## Fix: `resume: true` hangs on completed turns
+
+When refreshing a page after a turn completed, `useChat` with `resume: true` would hang indefinitely — `reconnectToStream` opened an SSE connection that never received data.
+
+Added `isStreaming` to session state. The transport sets it to `true` when streaming starts and `false` on `trigger:turn-complete`. `reconnectToStream` returns `null` immediately when `isStreaming` is false, so `resume: initialMessages.length > 0` is now safe to pass unconditionally.
+
+The flag flows through `onSessionChange` and is restored from `sessions` — no extra persistence code needed.
+
+</Update>
+
+<Update label="April 15, 2026" description="0.0.0-chat-prerelease-20260415152704" tags={["SDK"]}>
+
+## `hydrateMessages` — backend-controlled message history
+
+Load message history from your database on every turn instead of trusting the frontend accumulator. The hook replaces the built-in linear accumulation entirely — the backend is the source of truth.
+
+```ts
+chat.agent({
+  id: "my-chat",
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    const stored = await db.getMessages(chatId);
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      stored.push(incomingMessages[incomingMessages.length - 1]!);
+      await db.persistMessages(chatId, stored);
+    }
+    return stored;
+  },
+});
+```
+
+Tool approval updates are auto-merged after hydration — no extra handling needed.
+
+See [hydrateMessages](/ai-chat/lifecycle-hooks#hydratemessages).
+
+## `chat.history` — imperative message mutations
+
+Modify the accumulated message history from any hook or `run()`:
+
+```ts
+chat.history.rollbackTo(messageId); // Undo — keep up to this message
+chat.history.remove(messageId); // Remove one message
+chat.history.replace(id, newMsg); // Edit a message
+chat.history.slice(0, -2); // Remove last 2 messages
+chat.history.all(); // Read current state
+```
+
+See [chat.history](/ai-chat/backend#chat-history).
+
+## Custom actions — `actionSchema` + `onAction`
+
+Send typed actions (undo, rollback, edit) from the frontend via `transport.sendAction()`. Actions wake the agent, fire `onAction`, then trigger a normal `run()` turn.
+
+```ts
+chat.agent({
+  id: "my-chat",
+  actionSchema: z.discriminatedUnion("type", [
+    z.object({ type: z.literal("undo") }),
+    z.object({ type: z.literal("rollback"), targetMessageId: z.string() }),
+  ]),
+  onAction: async ({ action }) => {
+    if (action.type === "undo") chat.history.slice(0, -2);
+    if (action.type === "rollback") chat.history.rollbackTo(action.targetMessageId);
+  },
+});
+```
+
+Frontend: `transport.sendAction(chatId, { type: "undo" })`
+Server: `agentChat.sendAction({ type: "undo" })`
+
+See [Actions](/ai-chat/actions) and [Sending actions](/ai-chat/frontend#sending-actions).
+
+</Update>
+
+<Update label="April 14, 2026" description="0.0.0-chat-prerelease-20260414181032" tags={["SDK"]}>
+
+## `chat.response` — persistent data parts
+
+Added `chat.response.write()` for writing data parts that both stream to the frontend AND persist in `onTurnComplete`'s `responseMessage` and `uiMessages`.
+
+```ts
+// Persists to responseMessage.parts — available in onTurnComplete
+chat.response.write({ type: "data-handover", data: { context: summary } });
+
+// Transient — streams to frontend only, not in responseMessage
+writer.write({ type: "data-progress", data: { percent: 50 }, transient: true });
+```
+
+Non-transient `data-*` chunks written via lifecycle hook `writer.write()` now automatically persist to the response message, matching the AI SDK's default semantics. Add `transient: true` for ephemeral chunks (progress indicators, status updates).
+
+See [Custom data parts](/ai-chat/backend#custom-data-parts).
+
+## Tool approvals
+
+Added support for AI SDK tool approvals (`needsApproval: true`). When the model calls a tool that needs approval, the turn completes and the frontend shows approve/deny buttons. After approval, the updated assistant message is sent back and matched by ID in the accumulator.
+
+```ts
+const sendEmail = tool({
+  description: "Send an email. Requires human approval.",
+  inputSchema: z.object({ to: z.string(), subject: z.string(), body: z.string() }),
+  needsApproval: true,
+  execute: async ({ to, subject, body }) => {
+    /* ... */
+  },
+});
+```
+
+Frontend setup requires `sendAutomaticallyWhen` and `addToolApprovalResponse` from `useChat`. See [Tool approvals](/ai-chat/frontend#tool-approvals).
+
+## `transport.stopGeneration(chatId)`
+
+Added `stopGeneration` method to `TriggerChatTransport` for reliable stop after page refresh / stream reconnect. Works regardless of whether the AI SDK passes `abortSignal` through `reconnectToStream`.
+
+```tsx
+const stop = useCallback(() => {
+  transport.stopGeneration(chatId);
+  aiStop(); // also update useChat state
+}, [transport, chatId, aiStop]);
+```
+
+See [Stop generation](/ai-chat/frontend#stop-generation).
+
+## `generateMessageId` support
+
+`generateMessageId` can now be passed via `uiMessageStreamOptions` to control response message ID generation (e.g. UUID-v7). The backend automatically passes `originalMessages` to `toUIMessageStream` so message IDs are consistent between frontend and backend.
+
+## Bug fixes
+
+- **`onTurnComplete` not called**: Fixed `turnCompleteResult?.lastEventId` TypeError that silently skipped `onTurnComplete` when `writeTurnCompleteChunk` returned undefined in dev.
+- **Stop during streaming**: Added 2s timeout on `onFinishPromise` so `onBeforeTurnComplete` and `onTurnComplete` fire even when the AI SDK's `onFinish` doesn't fire after abort.
+- **`toStreamTextOptions` without `chat.prompt.set()`**: `prepareStep` injection (compaction, steering, background context) now works even when the user passes `system` directly to `streamText` instead of using `chat.prompt.set()`.
+- **Background queue vs tool approvals**: Background context injection is now skipped when the last accumulated message is a `tool` message, preventing it from breaking `streamText`'s `collectToolApprovals`.
+
+</Update>
diff --git a/docs/ai-chat/chat-local.mdx b/docs/ai-chat/chat-local.mdx
new file mode 100644
index 00000000000..aa1f130582e
--- /dev/null
+++ b/docs/ai-chat/chat-local.mdx
@@ -0,0 +1,173 @@
+---
+title: "chat.local"
+sidebarTitle: "chat.local"
+description: "Typed, run-scoped data accessible from hooks, run(), tools, and subtasks. Survives across turns, auto-cleared between runs, auto-hydrated into subtasks."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+Use `chat.local` to create typed, run-scoped data that persists across turns and is accessible from anywhere — the run function, tools, nested helpers. Each run gets its own isolated copy, and locals are automatically cleared between runs.
+
+Lifecycle hooks and **`run`** also receive **`ctx`** ([`TaskRunContext`](/ai-chat/reference#task-context-ctx)) — the same object as on a standard `task()` — for tags, metadata, and cleanup that needs the full run record.
+
+When a subtask is invoked via `ai.toolExecute()` (or the deprecated `ai.tool()`), initialized locals are automatically serialized into the subtask's metadata and hydrated on first access — no extra code needed. Subtask changes to hydrated locals are local to the subtask and don't propagate back to the parent.
+
+## Declaring and initializing
+
+Declare locals at module level with a unique `id`, then initialize them inside a lifecycle hook where you have context (chatId, clientData, etc.):
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+import { db } from "@/lib/db";
+
+// Declare at module level — each local needs a unique id
+const userContext = chat.local<{
+  userId: string;
+  name: string;
+  plan: "free" | "pro";
+  messageCount: number;
+}>({ id: "userContext" });
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onBoot: async ({ clientData }) => {
+    // Initialize with real data from your database
+    const user = await db.user.findUnique({
+      where: { id: clientData.userId },
+    });
+    userContext.init({
+      userId: clientData.userId,
+      name: user.name,
+      plan: user.plan,
+      messageCount: user.messageCount,
+    });
+  },
+  run: async ({ messages, signal }) => {
+    userContext.messageCount++;
+
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      system: `Helping ${userContext.name} (${userContext.plan} plan).`,
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+<Warning>
+  Initialize `chat.local` in [`onBoot`](/ai-chat/lifecycle-hooks#onboot), not `onChatStart`. `onBoot` fires on every fresh worker — including continuation runs (post-cancel, crash, `endRun`, `requestUpgrade`, OOM retry) — whereas `onChatStart` only fires on the chat's very first message. Initializing in `onChatStart` means `run()` will crash on continuation runs with `chat.local can only be modified after initialization`.
+</Warning>
+
+## Accessing from tools
+
+Locals are accessible from anywhere during task execution — including AI SDK tools:
+
+```ts
+const userContext = chat.local<{ plan: "free" | "pro" }>({ id: "userContext" });
+
+const premiumTool = tool({
+  description: "Access premium features",
+  inputSchema: z.object({ feature: z.string() }),
+  execute: async ({ feature }) => {
+    if (userContext.plan !== "pro") {
+      return { error: "This feature requires a Pro plan." };
+    }
+    // ... premium logic
+  },
+});
+```
+
+## Accessing from subtasks
+
+When you use `ai.toolExecute()` inside AI SDK `tool()` to expose a subtask, chat locals are automatically available read-only:
+
+```ts
+import { chat, ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { streamText, tool } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+
+const userContext = chat.local<{ name: string; plan: "free" | "pro" }>({ id: "userContext" });
+
+export const analyzeDataTask = schemaTask({
+  id: "analyze-data",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    // userContext.name just works — auto-hydrated from parent metadata
+    console.log(`Analyzing for ${userContext.name}`);
+    // Changes here are local to this subtask and don't propagate back
+  },
+});
+
+const analyzeData = tool({
+  description: analyzeDataTask.description ?? "",
+  inputSchema: analyzeDataTask.schema!,
+  execute: ai.toolExecute(analyzeDataTask),
+});
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  onBoot: async ({ clientData }) => {
+    userContext.init({ name: "Alice", plan: "pro" });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      tools: { analyzeData },
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+<Note>
+  Values must be JSON-serializable for subtask access. Non-serializable values (functions, class instances, etc.) will be lost during transfer.
+</Note>
+
+## Dirty tracking and persistence
+
+The `hasChanged()` method returns `true` if any property was set since the last check, then resets the flag. Use it in lifecycle hooks to only persist when data actually changed:
+
+```ts
+onTurnComplete: async ({ chatId }) => {
+  if (userContext.hasChanged()) {
+    await db.user.update({
+      where: { id: userContext.get().userId },
+      data: {
+        messageCount: userContext.messageCount,
+      },
+    });
+  }
+},
+```
+
+## API
+
+| Method | Description |
+|--------|-------------|
+| `chat.local<T>({ id })` | Create a typed local with a unique id (declare at module level) |
+| `local.init(value)` | Initialize with a value (call in hooks or `run`) |
+| `local.hasChanged()` | Returns `true` if modified since last check, resets flag |
+| `local.get()` | Returns a plain object copy (for serialization) |
+| `local.property` | Direct property access (read/write via Proxy) |
+
+<Note>
+  Locals use shallow proxying. Nested object mutations like `local.prefs.theme = "dark"` won't trigger the dirty flag. Instead, replace the whole property: `local.prefs = { ...local.prefs, theme: "dark" }`.
+</Note>
+
+## See also
+
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks) — `onBoot` is the canonical init site for `chat.local`.
+- [Database persistence pattern](/ai-chat/patterns/database-persistence) — full per-hook breakdown using `chat.local` alongside DB rows.
+- [Code execution sandbox pattern](/ai-chat/patterns/code-sandbox) — example of using `chat.local` to hold a sandbox handle across turns.
diff --git a/docs/ai-chat/client-protocol.mdx b/docs/ai-chat/client-protocol.mdx
new file mode 100644
index 00000000000..548f428339e
--- /dev/null
+++ b/docs/ai-chat/client-protocol.mdx
@@ -0,0 +1,1079 @@
+---
+title: "Client Protocol"
+sidebarTitle: "Client Protocol"
+description: "The wire protocol for building custom chat transports — how clients communicate with chat agents over Sessions and SSE."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+This page documents the protocol that chat clients use to communicate with `chat.agent()` tasks. Use this if you're building a custom transport (e.g., for a Slack bot, CLI tool, or native app) instead of using the built-in `TriggerChatTransport` or `AgentChat`.
+
+<Note>
+  Most users don't need this. Use [`TriggerChatTransport`](/ai-chat/frontend) for browser apps or [`AgentChat`](/ai-chat/server-chat) for server-side code. This page is for building your own from scratch.
+</Note>
+
+## Overview
+
+`chat.agent` is built on a durable Session row — the unit of state that owns the chat's runs across their full lifecycle. A conversation is one session; a session can host many runs over its lifetime.
+
+The protocol has three parts:
+
+1. **Create the session** — idempotent on your chat ID. Creates the row **and** triggers the first run in one call. Returns the `publicAccessToken` you'll use for everything else.
+2. **Subscribe to `.out`** — receive `UIMessageChunk` events via SSE.
+3. **Append to `.in`** — send subsequent user messages, stops, or actions.
+
+```mermaid
+sequenceDiagram
+  participant Client
+  participant API as Trigger.dev API
+  participant Agent as Chat Agent Run
+
+  Client->>API: POST /api/v1/sessions { type: "chat.agent", externalId, taskIdentifier, triggerConfig.basePayload }
+  API-->>Client: { id: sessionId, runId, publicAccessToken, ... }
+  Client->>API: GET /realtime/v1/sessions/{sessionId}/out (SSE subscribe)
+  Agent-->>Client: UIMessageChunk stream...
+  Agent-->>Client: turn-complete control record
+  Client->>API: POST /realtime/v1/sessions/{sessionId}/in/append { kind: "message", payload: { message, ... } }
+  Agent-->>Client: UIMessageChunk stream...
+  Agent-->>Client: turn-complete control record
+```
+
+<Note>
+  **Stream lifetime.** `session.out` is bounded. After each turn-complete control record, the agent appends an S2 `trim` command record back to the previous turn-complete's seq_num — the stream stays roughly one turn long forever at steady state. Full conversation history lives in a durable S3 snapshot, not on the stream. The transport's `lastEventId` bookmark plus S2's eventually-consistent trim window (10-60s) keeps single-turn-boundary resume working; multi-turn-away resume falls back to the snapshot. See [Resuming a stream](#resuming-a-stream) and [How history is rebuilt](#how-history-is-rebuilt).
+</Note>
+
+<Note>
+  **Session create triggers a run.** Unlike `POST /api/v1/tasks/{taskId}/trigger`, `POST /api/v1/sessions` is the **only** entry point for chat-agent runs. The session row is task-bound and the first run is triggered atomically as part of the create call. Don't call `/tasks/{taskId}/trigger` directly for `chat.agent` tasks — the resulting run won't be bound to a session and `.in`/`.out` won't reach it.
+</Note>
+
+<Note>
+  **One message per record.** Each `.in/append` carries at most one new `UIMessage` — the new user turn or a tool-approval response. The agent rebuilds prior history at run boot from a durable object-store snapshot plus a replay of the `session.out` tail; clients never ship full conversation history on the wire. See [How history is rebuilt](#how-history-is-rebuilt).
+</Note>
+
+## End-to-end curl recipe
+
+A single-shell walk-through of the whole protocol — copy, fill in `BASE_URL` / `SECRET_KEY` / `TASK_ID`, and run. Drives a two-turn conversation (`pong` → `echo`) using only `curl` and `jq`.
+
+```bash
+BASE_URL="https://api.trigger.dev"   # or your local webapp
+SECRET_KEY="tr_dev_..."              # secret API key for the env
+TASK_ID="ai-chat"                    # your chat.agent task id
+CHAT_ID=$(uuidgen | tr '[:upper:]' '[:lower:]')
+
+# 1. Create session + trigger first run with the user's first message.
+RESP=$(curl -sS -X POST "$BASE_URL/api/v1/sessions" \
+  -H "Authorization: Bearer $SECRET_KEY" \
+  -H "Content-Type: application/json" \
+  -d @- <<JSON
+{
+  "type": "chat.agent",
+  "externalId": "$CHAT_ID",
+  "taskIdentifier": "$TASK_ID",
+  "triggerConfig": {
+    "basePayload": {
+      "chatId": "$CHAT_ID",
+      "trigger": "submit-message",
+      "message": {
+        "id": "u1",
+        "role": "user",
+        "parts": [{ "type": "text", "text": "Reply with the single word: pong." }]
+      },
+      "metadata": { "userId": "demo-user" }
+    }
+  }
+}
+JSON
+)
+SESSION_ID=$(echo "$RESP" | jq -r .id)
+PAT=$(echo "$RESP" | jq -r .publicAccessToken)
+echo "Session: $SESSION_ID  PAT: ${PAT:0:24}..."
+
+# 2. Subscribe to .out and read until the turn-complete control record.
+#    The doc's self-contained parser (Step 2) shows full SSE handling;
+#    this recipe just greps for `text-delta` data records and the
+#    `trigger-control` header on the `turn-complete` control record,
+#    then extracts the last seq_num so step 4 can resume from it.
+SSE=$(curl -sS --max-time 30 -N \
+  -H "Authorization: Bearer $PAT" \
+  -H "Accept: text/event-stream" \
+  -H "Timeout-Seconds: 20" \
+  "$BASE_URL/realtime/v1/sessions/$SESSION_ID/out")
+echo "$SSE" | grep -E 'text-delta|trigger-control' | head -2
+LAST_SEQ=$(echo "$SSE" | grep -oE '"seq_num":[0-9]+' | tail -1 | grep -oE '[0-9]+')
+echo "lastSeq: $LAST_SEQ"
+
+# 3. Send a follow-up via .in/append.
+curl -sS -X POST "$BASE_URL/realtime/v1/sessions/$SESSION_ID/in/append" \
+  -H "Authorization: Bearer $PAT" \
+  -H "Content-Type: application/json" \
+  -d @- <<JSON
+{
+  "kind": "message",
+  "payload": {
+    "chatId": "$CHAT_ID",
+    "trigger": "submit-message",
+    "message": {
+      "id": "u2",
+      "role": "user",
+      "parts": [{ "type": "text", "text": "Now reply with: echo." }]
+    },
+    "metadata": { "userId": "demo-user" }
+  }
+}
+JSON
+
+# 4. Re-subscribe with Last-Event-ID so we resume past turn 1's records
+#    and read turn 2 only.
+curl -sS --max-time 30 -N \
+  -H "Authorization: Bearer $PAT" \
+  -H "Accept: text/event-stream" \
+  -H "Timeout-Seconds: 20" \
+  -H "Last-Event-ID: $LAST_SEQ" \
+  "$BASE_URL/realtime/v1/sessions/$SESSION_ID/out" \
+  | grep -m 2 -E 'text-delta|trigger-control'
+```
+
+The rest of this page details each step.
+
+## Step 1: Create the session and trigger the first run
+
+`POST /api/v1/sessions` does two things atomically: it creates (or returns) a session row, and triggers the first run for that session. Use your stable chat ID as `externalId` to make creation idempotent — two concurrent clients for the same chat converge on the same row, and a repeat call returns the existing session without triggering a duplicate run.
+
+The `trigger` value inside `basePayload` decides what the first run does:
+
+<CodeGroup>
+```bash trigger: "preload" — warm the agent, no message yet
+POST /api/v1/sessions
+Authorization: Bearer <secret-key-or-jwt>
+Content-Type: application/json
+
+{
+  "type": "chat.agent",
+  "externalId": "conversation-123",
+  "taskIdentifier": "ai-chat",
+  "triggerConfig": {
+    "basePayload": {
+      "chatId": "conversation-123",
+      "trigger": "preload",
+      "metadata": { "userId": "user-456" }
+    }
+  },
+  "tags": ["chat:conversation-123"]
+}
+```
+
+```bash trigger: "submit-message" — process first user message immediately
+POST /api/v1/sessions
+Authorization: Bearer <secret-key-or-jwt>
+Content-Type: application/json
+
+{
+  "type": "chat.agent",
+  "externalId": "conversation-123",
+  "taskIdentifier": "ai-chat",
+  "triggerConfig": {
+    "basePayload": {
+      "chatId": "conversation-123",
+      "trigger": "submit-message",
+      "message": {
+        "id": "msg-1",
+        "role": "user",
+        "parts": [{ "type": "text", "text": "Hello!" }]
+      },
+      "metadata": { "userId": "user-456" }
+    }
+  },
+  "tags": ["chat:conversation-123"]
+}
+```
+</CodeGroup>
+
+Pick `"preload"` when the UI has rendered but the user hasn't typed (warms the agent so the first response is fast); pick `"submit-message"` when you already have the first message and want it processed in the same call.
+
+### Required fields
+
+| Field | Type | Description |
+| --- | --- | --- |
+| `type` | `string` | Discriminator. Use `"chat.agent"`. |
+| `taskIdentifier` | `string` | The `id` you passed to `chat.agent({ id: ... })` — e.g. `"ai-chat"`. |
+| `triggerConfig.basePayload` | `object` | The wire payload sent to the **first run** created by this call. Same shape as [`ChatTaskWirePayload`](#chattaskwirepayload) in Step 3. Durable fields (`chatId`, `metadata`, `idleTimeoutInSeconds`, `sessionId`) flow through to continuation runs too; first-turn-only fields (`message`, `trigger`) are stripped on continuations — those are session-create concerns and don't replay. See [What goes in `basePayload`](#what-goes-in-basepayload) below. |
+
+### Optional fields
+
+| Field | Type | Description |
+| --- | --- | --- |
+| `externalId` | `string` | Your stable chat ID. Strongly recommended — without it, repeat calls create new sessions. Cannot start with `session_`. |
+| `tags` | `string[]` | Up to 10 dashboard tags. |
+| `metadata` | `object` | Arbitrary JSON metadata stored on the session row (separate from `basePayload.metadata`, which goes to the agent). |
+| `expiresAt` | `string` (ISO date) | Retention cap. |
+| `triggerConfig.machine` | `string` | Machine preset (`micro`, `small-1x`, …) for every run. |
+| `triggerConfig.queue` | `string` | Queue name. |
+| `triggerConfig.tags` | `string[]` | Tags applied to every run (in addition to session-level `tags`). |
+| `triggerConfig.maxAttempts` | `number` | Per-run retry cap (1–10). |
+| `triggerConfig.maxDuration` | `number` | Per-run wall-clock cap, seconds. |
+| `triggerConfig.lockToVersion` | `string` | Pin every run to a specific worker version. |
+| `triggerConfig.region` | `string` | Region preference. |
+| `triggerConfig.idleTimeoutInSeconds` | `number` | Surfaced to the agent through the wire payload (1–3600). |
+
+### What goes in `basePayload`
+
+`basePayload` is the [`ChatTaskWirePayload`](#chattaskwirepayload) sent to the agent at run boot — the same shape used for every subsequent `.in/append` (Step 3). Two fields you must always include:
+
+- `chatId` — should equal your `externalId`. The agent uses this as its conversation identity (e.g. as a DB key in `hydrateMessages`); the `externalId` is what the URL routes resolve. Setting them to the same value is the standard pattern and the only way the built-in clients work.
+- `trigger` — see the two examples above. `"preload"` and `"submit-message"` are the only valid choices for the first run; the others (`"regenerate-message"`, `"action"`, `"close"`, `"handover-prepare"`) are for subsequent `.in/append` calls.
+
+The agent's typed `clientData` (declared via `chat.withClientData({ schema: ... })`) is read from `basePayload.metadata`. If your agent declares `clientData: { userId: string }`, then `metadata.userId` is required on every run — including the first one in `basePayload`.
+
+### Response
+
+```http
+HTTP/1.1 201 Created
+content-type: application/json; charset=utf-8
+x-trigger-jwt: eyJhbGciOi...
+x-trigger-jwt-claims: {"sub":"...","scopes":["read:runs:run_abc123","write:inputStreams:run_abc123"]}
+
+{
+  "id": "session_cm4z2plfh000abcd1efgh",
+  "externalId": "conversation-123",
+  "type": "chat.agent",
+  "taskIdentifier": "ai-chat",
+  "triggerConfig": { "basePayload": { /* echoed back */ } },
+  "currentRunId": "run_abc123",
+  "tags": ["chat:conversation-123"],
+  "metadata": null,
+  "closedAt": null,
+  "closedReason": null,
+  "expiresAt": null,
+  "createdAt": "2026-04-24T09:00:00.000Z",
+  "updatedAt": "2026-04-24T09:00:00.000Z",
+  "runId": "run_abc123",
+  "publicAccessToken": "eyJhbGciOi...",
+  "isCached": false
+}
+```
+
+| Field | Description |
+| --- | --- |
+| `id` | The `session_*` friendly ID. Stable for the life of the conversation. |
+| `runId` / `currentRunId` | Friendly ID of the first run. Identical on a fresh create; will diverge over the conversation (see [Continuations](#continuations)). |
+| `publicAccessToken` | Session-scoped JWT carrying `read:sessions:{externalId}` + `write:sessions:{externalId}`. **This is the token you use for every subsequent `.in`/`.out` call.** Persist it. Lifetime is 60 minutes — see [Refreshing the token](#refreshing-the-token). |
+| `isCached` | `true` if the session existed already (idempotent re-create). HTTP status is 200 in that case, 201 on a fresh create. |
+
+<Warning>
+  **Use `publicAccessToken` from the body, not the `x-trigger-jwt` response header.** The header is included by the underlying run-trigger machinery and carries **run-scoped** scopes (`read:runs:{runId}` + `write:inputStreams:{runId}`) — it cannot subscribe to `.out` or append to `.in`. The body's `publicAccessToken` is the only token with the correct session-level scopes.
+</Warning>
+
+### Idempotency
+
+Re-calling `POST /api/v1/sessions` with the same `(taskIdentifier, externalId)` pair is **idempotent for the lifetime of the session**:
+
+- If the session is still alive: returns the existing row with `isCached: true`, `runId` unchanged, and a **fresh** 60-minute `publicAccessToken`. No duplicate run is triggered. (Idle/exited runs are different — see [Continuations](#continuations).)
+- If the session has been closed (`POST /api/v1/sessions/{id}/close`): returns **HTTP 409**. Closed is one-way; reuse a different `externalId` to start a new conversation.
+- Any tags / metadata / expiresAt / triggerConfig fields you send on the cached path are written through to the row, so you can update e.g. `triggerConfig.basePayload.metadata` mid-conversation. The new fields apply to **future** runs (continuations); the currently-live run keeps its original config.
+
+<Warning>
+  **A cached re-POST does not deliver a new `basePayload.message`.** `basePayload` is run-trigger config, not a message channel — the existing run keeps streaming and your message is silently dropped. To send a follow-up message, use `POST /realtime/v1/sessions/{sessionId}/in/append` (Step 3).
+</Warning>
+
+### Refreshing the token
+
+The `publicAccessToken` returned by `POST /api/v1/sessions` is valid for 60 minutes. Two ways to keep going past that:
+
+1. **Take refreshed tokens from the stream.** Every `turn-complete` control record on `.out` carries a `public-access-token` header with a refreshed JWT (see [`turn-complete` control record](#turn-complete-control-record)). For active conversations this just rolls — replace your stored token whenever the header is present.
+2. **Re-call `POST /api/v1/sessions`.** Idempotent, returns `isCached: true` and a brand-new 60-minute token. Use this if a chat goes idle long enough that the SSE stream has closed and you need to resume.
+
+<Note>
+  The built-in SDK clients (`TriggerChatTransport` from `@trigger.dev/sdk`, `AgentChat` from `@trigger.dev/sdk/chat`) call this endpoint and persist the refreshed `publicAccessToken` automatically, refreshing on every `turn-complete` control record.
+</Note>
+
+## Step 2: Subscribe to `.out`
+
+Subscribe to the agent's response via SSE on the session's `.out` channel:
+
+```
+GET /realtime/v1/sessions/{sessionId}/out
+Authorization: Bearer <publicAccessToken>
+Accept: text/event-stream
+```
+
+`Accept: text/event-stream` is required — without it the request is rejected as a non-SSE caller.
+
+The URL accepts either form for `{sessionId}`: the friendly `session_*` ID, or your `externalId` (the chat ID you created the session with). The `publicAccessToken` from session-create authorizes both forms. Pick whichever your client already has on hand.
+
+A session's `.out` stays the same across runs, so the client doesn't need to re-subscribe when a new run starts on the same chat. `seq_num` is **monotonically increasing across the entire session**, not just within one run — turn 1 might emit seq 0–9, turn 2 picks up at seq 10+, a continuation run on the same session continues numbering from there. This is why a single `Last-Event-ID` cursor is sufficient to resume across turns and across runs.
+
+### Stream timeout
+
+The SSE long-polls until either a record arrives or the timeout expires. The default is **60 seconds**; cap it explicitly via the `Timeout-Seconds` request header (1–600):
+
+```
+GET /realtime/v1/sessions/{sessionId}/out
+Authorization: Bearer <publicAccessToken>
+Accept: text/event-stream
+Timeout-Seconds: 30
+```
+
+If nothing arrives by the deadline, the server sends `data: [DONE]` and closes. Reconnect with `Last-Event-ID` to continue (see [Resuming a stream](#resuming-a-stream)).
+
+### Stream format (S2)
+
+The output stream uses [S2](https://s2.dev) under the hood and follows the standard SSE wire format ([WHATWG spec](https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream)). Three event types arrive on the wire:
+
+| Event | Meaning |
+| --- | --- |
+| `batch` | One or more records. The records you actually care about. |
+| `ping` | Keepalive (~every 5s on idle). Body is `{"timestamp": <ms>}`. Ignore it. |
+| _(no `event:`, just `data: [DONE]`)_ | Stream is closing — server sends this once before EOF. |
+
+A `batch` event in raw SSE format looks like this — note the `data` is a single line of JSON, no embedded newlines (per the SSE spec):
+
+```
+id: 0,1,106
+event: batch
+data: {"records":[{"seq_num":0,"timestamp":1712150400000,"body":"{\"data\":{\"type\":\"text-delta\",\"id\":\"msg_1\",\"delta\":\"pong\"},\"id\":\"abc\"}"}],"tail":{"seq_num":10,"timestamp":1712150400500}}
+
+```
+
+The `id:` line on the wire is a comma-separated triple internal to S2 (`startSeq,endSeq,byteOffset`) — **don't try to parse it**. Use `record.seq_num` from inside the `data` body instead (see [Resuming a stream](#resuming-a-stream)).
+
+Decoded `data` payload:
+
+```json
+{
+  "records": [
+    {
+      "seq_num": 0,
+      "timestamp": 1712150400000,
+      "body": "{\"data\":{\"type\":\"text-delta\",\"id\":\"msg_1\",\"delta\":\"pong\"},\"id\":\"abc\"}"
+    }
+  ],
+  "tail": {
+    "seq_num": 10,
+    "timestamp": 1712150400500
+  }
+}
+```
+
+| Field | Description |
+| --- | --- |
+| `records[]` | One or more records delivered in this batch, in arrival order. |
+| `records[].seq_num` | Monotonic per-record cursor. Use the **last** one you successfully processed as your `Last-Event-ID` on resume. |
+| `records[].timestamp` | Unix ms when the record was written to S2. |
+| `records[].body` | For data records: a JSON-encoded **string** wrapping `{ data: UIMessageChunk, id: string }`. For control records: an empty string (semantics live in `headers`). For S2 command records: opaque bytes. See [Records on session.out](#records-on-session-out). |
+| `records[].headers` | Optional `[name, value]` pairs. Empty for data records; a `trigger-control` entry for control records; a single empty-name `["", "<op>"]` entry for S2 command records. |
+| `tail.seq_num` | Latest known tail of the S2 stream — useful for detecting how far behind the live edge you are. Skip if you don't need it. |
+| `tail.timestamp` | Timestamp of `tail.seq_num`. |
+
+### Records on `session.out`
+
+Three kinds of records can arrive on the wire. They all share the `batch` envelope above; you tell them apart by `headers`.
+
+| Kind | `headers[0][0]` | `headers` carries | `body` |
+| --- | --- | --- | --- |
+| **Data record** | _empty array or non-empty name_ | (currently none from the agent) | JSON envelope `{"data": UIMessageChunk, "id": <partId>}` |
+| **Trigger control record** | `"trigger-control"` | `["trigger-control", <subtype>]` plus subtype-specific siblings (e.g. `["public-access-token", <jwt>]` and `["session-in-event-id", <seq>]` on `turn-complete`) | empty string |
+| **S2 command record** | `""` (empty name) | `["", "<op>"]` (currently `"trim"`) | opaque bytes — S2-interpreted |
+
+**Uniform filter rule for custom readers:**
+
+```ts
+// Always advance the resume cursor — even for records you skip.
+lastEventId = String(record.seq_num);
+
+// S2 command record: bump cursor, don't dispatch.
+if (record.headers?.[0]?.[0] === "") continue;
+
+// Trigger control record: route by `trigger-control` value, don't
+// dispatch as a UIMessageChunk.
+const controlValue = record.headers?.find(([name]) => name === "trigger-control")?.[1];
+if (controlValue === "turn-complete") {
+  const token = record.headers.find(([name]) => name === "public-access-token")?.[1];
+  // ...fire your turn-complete handler with the optional refreshed token...
+  continue;
+}
+if (controlValue === "upgrade-required") {
+  // ...your upgrade flow, if any. The server has already swapped the run
+  // by the time this arrives — subsequent chunks are from the new run...
+  continue;
+}
+
+// Otherwise: data record. Parse the body, dispatch the UIMessageChunk.
+const { data: chunk } = JSON.parse(record.body);
+```
+
+Built-in SDK transports (`TriggerChatTransport`, `AgentChat`) handle all of this for you — control records surface via `onTurnComplete({ chatId, lastEventId, publicAccessToken })` and the upgrade flow. Custom transports need the routing above.
+
+<Note>
+  **Prior wire shape.** Earlier SDK versions emitted `trigger:turn-complete` and `trigger:upgrade-required` as `UIMessageChunk`-shaped data records with `chunk.type === "trigger:turn-complete"`. Current versions use the header-form control records described above. Built-in SDK transports handle the new shape transparently; custom transports filtering on `chunk.type` need to switch to the `trigger-control` header check.
+</Note>
+
+### Built-in parser (recommended for SDK users)
+
+If you're working in TypeScript and depending on `@trigger.dev/core/v3` is acceptable, use `SSEStreamSubscription` — it handles batch decoding, deduplication, command-record filtering, and `Last-Event-ID` tracking for you:
+
+```ts
+import { SSEStreamSubscription, controlSubtype } from "@trigger.dev/core/v3";
+
+const subscription = new SSEStreamSubscription(
+  `${baseUrl}/realtime/v1/sessions/${sessionId}/out`,
+  {
+    headers: { Authorization: `Bearer ${publicAccessToken}` },
+    timeoutInSeconds: 120,
+    lastEventId,
+  }
+);
+
+const stream = await subscription.subscribe();
+const reader = stream.getReader();
+
+while (true) {
+  const { done, value } = await reader.read();
+  if (done) break;
+
+  // value is { id, chunk, timestamp, headers }. S2 command records are
+  // filtered out of this stream entirely (cursor still advances). Trigger
+  // control records pass through with `chunk === undefined` and a
+  // `trigger-control` header.
+  const control = controlSubtype(value.headers);
+  if (control === "turn-complete") break;
+  if (control === "upgrade-required") continue;
+
+  const chunk = value.chunk as { type?: string; delta?: string } | undefined;
+  if (chunk?.type === "text-delta") process.stdout.write(chunk.delta ?? "");
+}
+```
+
+### Self-contained parser (for custom transports)
+
+If you're building a transport in another language or don't want the dependency, here's a complete reader. It handles the SSE framing, the comma-separated `id:` line, batch unwrapping, the inner `body` string, and `ping` / `[DONE]` events:
+
+```ts
+async function* readSessionOut(
+  url: string,
+  publicAccessToken: string,
+  opts: { lastEventId?: string; timeoutSeconds?: number } = {}
+) {
+  const headers: Record<string, string> = {
+    Authorization: `Bearer ${publicAccessToken}`,
+    Accept: "text/event-stream",
+  };
+  if (opts.lastEventId) headers["Last-Event-ID"] = opts.lastEventId;
+  if (opts.timeoutSeconds) headers["Timeout-Seconds"] = String(opts.timeoutSeconds);
+
+  const res = await fetch(url, { headers });
+  if (!res.ok || !res.body) throw new Error(`SSE failed: ${res.status}`);
+
+  const decoder = new TextDecoder();
+  const reader = res.body.getReader();
+  let buf = "";
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) return;
+    buf += decoder.decode(value, { stream: true });
+
+    // SSE events are separated by blank lines (CRLF or LF).
+    const events = buf.split(/\r?\n\r?\n/);
+    buf = events.pop() ?? ""; // last chunk is incomplete
+
+    for (const raw of events) {
+      let eventType = "message"; // SSE default
+      const dataLines: string[] = [];
+      for (const line of raw.split(/\r?\n/)) {
+        if (line.startsWith("event:")) eventType = line.slice(6).trim();
+        else if (line.startsWith("data:")) dataLines.push(line.slice(5).trimStart());
+        // We deliberately ignore `id:` — use record.seq_num for resume cursors.
+      }
+      const data = dataLines.join("\n");
+      if (!data) continue;
+
+      if (eventType === "ping") continue;
+      if (data === "[DONE]") return;
+
+      if (eventType === "batch") {
+        const batch = JSON.parse(data) as {
+          records: Array<{
+            seq_num: number;
+            timestamp: number;
+            body: string;
+            headers?: Array<[string, string]>;
+          }>;
+        };
+        for (const record of batch.records) {
+          const firstHeaderName = record.headers?.[0]?.[0];
+
+          // S2 command record (trim/fence) — bump cursor, skip dispatch.
+          if (firstHeaderName === "") {
+            yield { seqNum: record.seq_num, timestamp: record.timestamp, kind: "command" };
+            continue;
+          }
+
+          // Trigger control record (turn-complete, upgrade-required) —
+          // semantics live in headers, body is empty. Route by header.
+          const controlValue = record.headers?.find(([n]) => n === "trigger-control")?.[1];
+          if (controlValue) {
+            const token = record.headers?.find(([n]) => n === "public-access-token")?.[1];
+            yield {
+              seqNum: record.seq_num,
+              timestamp: record.timestamp,
+              kind: "control",
+              subtype: controlValue,
+              publicAccessToken: token,
+            };
+            continue;
+          }
+
+          // Data record — UIMessageChunk wrapped in `{ data, id }`.
+          const inner = JSON.parse(record.body) as { data: unknown; id: string };
+          yield {
+            seqNum: record.seq_num, // use this for Last-Event-ID on resume
+            timestamp: record.timestamp,
+            kind: "data",
+            chunk: inner.data, // the actual UIMessageChunk
+          };
+        }
+      }
+    }
+  }
+}
+```
+
+Driving it:
+
+```ts
+let lastSeq: string | undefined;
+for await (const ev of readSessionOut(sseUrl, publicAccessToken)) {
+  lastSeq = String(ev.seqNum);                       // always advance the cursor
+
+  if (ev.kind === "command") continue;               // S2 trim/fence — skip
+  if (ev.kind === "control") {
+    if (ev.subtype === "turn-complete") break;       // turn done
+    if (ev.subtype === "upgrade-required") continue; // run swap handled server-side
+    continue;
+  }
+
+  // ev.kind === "data" — the UIMessageChunk
+  const chunk = ev.chunk as { type: string; delta?: string };
+  if (chunk.type === "text-delta") process.stdout.write(chunk.delta ?? "");
+}
+// On reconnect, pass `lastEventId: lastSeq` to resume from the next record.
+```
+
+### Chunk types
+
+Data records on the stream carry a `UIMessageChunk` from the [AI SDK](https://ai-sdk.dev/docs/ai-sdk-ui/ui-message-stream). Two Trigger.dev-specific control events ride alongside as **header-form control records** (see [Records on session.out](#records-on-session-out)).
+
+Within a single assistant turn the AI SDK chunk types you'll typically see, in order:
+
+| Chunk type | Shape | Notes |
+| --- | --- | --- |
+| `start` | `{ type: "start", messageId: string }` | First chunk of a new assistant message. **Persist `messageId`** — you'll need it to send tool-approval responses (see [Tool approval responses](#tool-approval-responses)). |
+| `start-step` | `{ type: "start-step" }` | New `prepareStep` boundary. |
+| `text-start` / `text-delta` / `text-end` | `{ type: ..., id: string, delta?: string }` | Streaming text. Concatenate `delta`s for the visible reply. |
+| `tool-input-start` / `tool-input-delta` / `tool-input-available` | tool-call argument streaming | The tool the model is calling. |
+| `tool-output-available` | tool result | After the agent runs the tool. |
+| `data-*` | `{ type: "data-<name>", data: ... }` | Custom data parts written by the agent's hooks. |
+| `finish-step` / `finish` | end markers for the assistant message | Followed by the `turn-complete` control record. |
+
+Refer to the AI SDK docs linked above for the full union — only the two control records below are Trigger.dev-specific.
+
+### `turn-complete` control record
+
+Signals that the agent's turn is finished — stop reading and wait for user input.
+
+```
+headers:
+  ["trigger-control", "turn-complete"]
+  ["public-access-token", "eyJ..."]   // optional, refreshed JWT
+  ["session-in-event-id", "42"]       // optional, agent-internal resume cursor
+body: ""
+```
+
+| Header | Description |
+| --- | --- |
+| `trigger-control: turn-complete` | Always present on this record. |
+| `public-access-token: <jwt>` (optional) | A refreshed JWT with the same session + run scopes. If present, replace your stored token. |
+| `session-in-event-id: <seq>` (optional) | Internal cursor used by the agent to resume `.in` across worker boots without replaying already-processed user messages. Custom transports should ignore this header — it carries no client-side meaning. |
+
+When you receive this record:
+1. Update `publicAccessToken` if one is included on the headers.
+2. Close the stream reader (unless you want to keep it open across turns — see [Resuming a stream](#resuming-a-stream)).
+3. Wait for the next user message before sending on `.in`.
+
+### `upgrade-required` control record
+
+Signals that the agent cannot handle this message on its current version and a new run has been started. Emitted when the agent calls [`chat.requestUpgrade()`](/ai-chat/patterns/version-upgrades).
+
+```
+headers:
+  ["trigger-control", "upgrade-required"]
+body: ""
+```
+
+The server has already swapped the run on the same session by the time this record is delivered. Subsequent records on the same SSE subscription come from the new run.
+
+When you receive this record:
+1. Treat it as informational — no client action required. The same SSE keeps streaming the new run's chunks on the same session.
+2. Optionally surface a "switched to vN.N+1" indicator in your UI.
+
+The built-in clients handle this transparently.
+
+### Resuming a stream
+
+If the SSE connection drops, reconnect with the `Last-Event-ID` header set to the **last `record.seq_num` you successfully processed** (decoded from the batch body — not the SSE `id:` line, which is a comma-list internal to S2):
+
+```
+GET /realtime/v1/sessions/{sessionId}/out
+Authorization: Bearer <publicAccessToken>
+Accept: text/event-stream
+Last-Event-ID: 42
+```
+
+The server resumes streaming from `seq_num = 43` onward. `Last-Event-ID` is a single non-negative integer; passing the SSE `id:` line value verbatim (e.g. `0,1,106`) silently falls back to "start from the beginning."
+
+`SSEStreamSubscription` tracks this automatically via its `lastEventId` option.
+
+<Note>
+  **What "resumable" means.** `session.out` is trimmed back to the previous `turn-complete` control record after each turn finishes. In practice:
+
+  - **Resume across a single turn boundary always works** — your bookmark is the last turn's `turn-complete` record, which is still on the stream.
+  - **The S2 trim is eventually consistent** (10-60s typical), so close-then-reload-quickly cases reliably still see records that are about to be trimmed.
+  - **Resume across multiple turns of inactivity** may find your bookmark trimmed. The S2 read silently clamps forward to the first surviving record; the cleanest recovery is to fetch the latest snapshot and treat the SSE as fresh from there (or rehydrate via your own DB if you use `hydrateMessages`). See [How history is rebuilt](#how-history-is-rebuilt).
+</Note>
+
+### `X-Peek-Settled` / `X-Session-Settled` — opt-in fast close on idle reconnects
+
+On **reconnect-on-reload** paths (resuming a chat where nothing may be streaming), send `X-Peek-Settled: 1` as a request header when opening the SSE. When present, the server peeks the tail of `.out` and walks past any trailing S2 trim command record to find the most recent data/control record underneath. If that record is a `turn-complete` control record (agent finished a turn and is idle-waiting or exited), the SSE:
+
+- Uses `wait=0` internally — drains any residual records and closes in ~1s instead of long-polling for 60s.
+- Sets the `X-Session-Settled: true` response header so the client can tell the close is terminal rather than a mid-stream drop.
+
+**Do not send `X-Peek-Settled` on the active-send response-stream path.** The peek would race the newly-triggered turn's first chunk — if the agent hasn't written the new turn's first record yet, the peek sees the prior turn's `turn-complete` and closes the SSE before the response lands on S2. The built-in `TriggerChatTransport.reconnectToStream` sets the header; `sendMessages → subscribeToStream` does not.
+
+```ts
+// Reconnect path (page reload)
+const response = await fetch(sseUrl, {
+  headers: {
+    Authorization: `Bearer ${publicAccessToken}`,
+    "X-Peek-Settled": "1",
+    "Last-Event-ID": lastEventId,
+  },
+});
+const settled = response.headers.get("X-Session-Settled") === "true";
+// ...subscribe as normal; if settled and nothing arrives, you're done.
+
+// Active send path — no X-Peek-Settled, keep long-poll semantics
+const liveResponse = await fetch(sseUrl, {
+  headers: {
+    Authorization: `Bearer ${publicAccessToken}`,
+    "Last-Event-ID": lastEventId,
+  },
+});
+```
+
+## Step 3: Send messages, stops, and actions
+
+All client-to-agent signals are appended to the session's `.in` channel:
+
+```
+POST /realtime/v1/sessions/{sessionId}/in/append
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+```
+
+`{sessionId}` accepts the same friendly-or-external forms as `.out`. The `publicAccessToken` from session-create authorizes both.
+
+The body is a JSON-serialized [`ChatInputChunk`](#chatinputchunk) — a tagged union covering messages, stops, and actions. Send them as raw JSON strings (not wrapped in a `data` field). On success the response is `200 OK` with body `{ "ok": true }`; on failure it's `4xx`/`5xx` with `{ "ok": false, "error": "<message>" }`. Common failures:
+
+| Status | When |
+| --- | --- |
+| `401` | Missing or invalid `Authorization` header. |
+| `403` | Token doesn't carry `write:sessions:{externalId}`. |
+| `409` | The session is closed — `{ "ok": false, "error": "Cannot append to a closed session" }`. |
+| `413` | Body exceeds 512 KiB. A normal `kind: "message"` payload is a few KB; if you hit this you're shipping more than one message per record. |
+| `500` | Transient backend failure on the durable stream. Safe to retry — appends are idempotent on `(externalId, X-Part-Id)` if you set the optional `X-Part-Id` request header (the built-in clients set it from a UUID). |
+
+<Warning>
+  **Schema validation of `metadata` happens inside the agent, not at this endpoint.** A `kind: "message"` with bad or missing metadata returns `200 OK` here, but the agent rejects the turn at run time. From the wire the failure looks like a `turn-complete` control record with no preceding `text-delta` — i.e. an empty assistant response.
+
+  **How to detect from the client:** treat "received `turn-complete` after sending a `submit-message` with no `text-delta`/`tool-input-*` chunks in between" as a schema-validation suspect, and surface a sensible error to your user. **How to confirm from the dashboard / Trigger MCP:** the run trace includes a `chat turn N [ERROR]` span followed by `waiting for next message (after error)`; the `[ERROR]` span carries the validation error message in its events. Use `mcp__trigger__get_run_details` (or open the run in the dashboard) on the run ID surfaced in the `runId` field of session-create.
+</Warning>
+
+### `ChatInputChunk`
+
+```ts
+type ChatInputChunk =
+  | { kind: "message"; payload: ChatTaskWirePayload }
+  | { kind: "stop"; message?: string };
+```
+
+The discriminator `kind` drives the agent's dispatch — `"message"` goes to the turn loop, `"stop"` fires the abort controller.
+
+### `ChatTaskWirePayload`
+
+```ts
+type ChatTaskWirePayload<TMessage extends UIMessage = UIMessage, TMetadata = unknown> = {
+  /**
+   * The new message for this turn — at most ONE per record.
+   *  - "submit-message": the new user message, OR a tool-approval-responded
+   *    assistant message (with `state: "approval-responded"` tool parts).
+   *  - "regenerate-message": omitted (the server trims its own tail).
+   *  - "preload" / "close" / "action": omitted.
+   *  - "handover-prepare": omitted (use `headStartMessages` instead — see below).
+   */
+  message?: TMessage;
+
+  /**
+   * Escape hatch for chat.headStart. Ships full UIMessage history on the
+   * very first turn — before any snapshot exists. Used ONLY by
+   * trigger: "handover-prepare" against the customer's own HTTP route
+   * handler. The server ignores this field on any other trigger.
+   */
+  headStartMessages?: TMessage[];
+
+  chatId: string;
+  trigger:
+    | "submit-message"
+    | "regenerate-message"
+    | "preload"
+    | "close"
+    | "action"
+    | "handover-prepare";
+  messageId?: string;
+  /**
+   * Wire envelope for the agent's typed `clientData` (declared via
+   * `chat.withClientData({ schema })`). Whatever you put here is parsed
+   * against that schema at the agent boundary. If the agent declares
+   * `clientData: { userId: string }`, then `metadata.userId` is required.
+   */
+  metadata?: TMetadata;
+  action?: unknown;
+  /**
+   * Informational — the server sets this automatically on continuation
+   * runs (when the prior run is dead). Clients don't need to send it.
+   * Read by the agent's boot gate to skip `onChatStart` and trigger
+   * snapshot read + replay.
+   */
+  continuation?: boolean;
+  /**
+   * Informational — paired with `continuation: true`, set by the server
+   * from the prior run's friendly ID. Surfaced to the agent in
+   * `ctx.previousRunId`. Clients don't need to send it.
+   */
+  previousRunId?: string;
+  idleTimeoutInSeconds?: number;
+  sessionId?: string;
+};
+```
+
+<Note>
+  **`metadata` is the wire envelope for `clientData`.** The agent's `clientData` (typed via `chat.withClientData({ schema })`) is read from this field at run boot. If the agent declares e.g. `{ userId: string, model?: string }`, then every `kind: "message"` payload — and the `triggerConfig.basePayload` you sent at session create — must carry a matching `metadata.userId`. The agent rejects messages whose metadata fails schema validation.
+</Note>
+
+### Sending a message
+
+```
+POST /realtime/v1/sessions/{sessionId}/in/append
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+
+{
+  "kind": "message",
+  "payload": {
+    "message": {
+      "id": "msg-2",
+      "role": "user",
+      "parts": [{ "type": "text", "text": "Tell me more" }]
+    },
+    "chatId": "conversation-123",
+    "trigger": "submit-message",
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+After sending, subscribe to `.out` (if you closed the stream after the previous turn's `turn-complete`) to receive the response.
+
+<Note>
+  Send only the **new** user message — never the full history. The agent rebuilds prior history from a durable S3 snapshot plus a `session.out` replay at run boot. See [How history is rebuilt](#how-history-is-rebuilt).
+</Note>
+
+### Sending a stop
+
+```json
+{ "kind": "stop" }
+```
+
+Interrupts the agent's current turn. `streamText` aborts, the agent emits a `turn-complete` control record, and the run returns to idle.
+
+An optional `message` field surfaces in the agent's stop handler:
+
+```json
+{ "kind": "stop", "message": "user cancelled" }
+```
+
+### Sending an action
+
+Custom actions (undo, rollback, edit) ride on the same `.in` channel using `kind: "message"` with `trigger: "action"` in the payload. Omit `message` — actions don't carry a UIMessage:
+
+```json
+{
+  "kind": "message",
+  "payload": {
+    "chatId": "conversation-123",
+    "trigger": "action",
+    "action": { "type": "undo" },
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+Actions wake the agent from suspension (same as messages) and fire the `onAction` hook — they are not turns, so `run()` and turn lifecycle hooks do not fire. If `onAction` returns a `StreamTextResult`, the response is auto-piped to the frontend (but still no `run()` or `onTurnComplete`). The `action` payload is validated against the agent's `actionSchema`. If the agent didn't register an `actionSchema` (or your `action` payload doesn't match it), validation fails the same way `metadata` does — `.in/append` returns `200 OK`, but the run trace shows `chat turn N [ERROR]` and the wire emits a `turn-complete` control record with no other chunks. See [Actions](/ai-chat/actions) for the agent-side schema setup.
+
+### Regenerating the last response
+
+To regenerate the assistant's last response, send `trigger: "regenerate-message"` with no `message`:
+
+```json
+{
+  "kind": "message",
+  "payload": {
+    "chatId": "conversation-123",
+    "trigger": "regenerate-message",
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+The agent trims trailing assistant messages from its accumulator and re-streams from the prior user turn. The frontend's `useChat()` already removed the trailing assistant locally — the wire signal tells the agent to do the same.
+
+### Tool approval responses
+
+When a tool requires approval (`needsApproval: true`), the agent streams the tool call with an `approval-requested` state and completes the turn. After the user approves or denies, send the **updated assistant message** (with `approval-responded` tool parts) back as a `kind: "message"` chunk — singular, not the full chain:
+
+```json
+{
+  "kind": "message",
+  "payload": {
+    "message": {
+      "id": "asst-msg-1",
+      "role": "assistant",
+      "parts": [
+        { "type": "text", "text": "I'll send that email for you." },
+        {
+          "type": "tool-sendEmail",
+          "toolCallId": "call-1",
+          "state": "approval-responded",
+          "input": { "to": "user@example.com", "subject": "Hello" },
+          "approval": { "id": "approval-1", "approved": true }
+        }
+      ]
+    },
+    "chatId": "conversation-123",
+    "trigger": "submit-message",
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+The agent matches the incoming message by `id` against the rebuilt accumulator. If a match is found, it **replaces** the existing message instead of appending.
+
+<Note>
+  The message `id` must match the one the agent assigned during streaming. `TriggerChatTransport` keeps IDs in sync automatically. Custom transports should use the `messageId` from the stream's `start` chunk.
+</Note>
+
+## How history is rebuilt
+
+The agent rebuilds the full conversation accumulator on every fresh run boot. There are two reconstruction paths, and the agent picks based on what hooks the customer registered:
+
+### Path A — `hydrateMessages` registered
+
+If the agent declares a [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) hook, the runtime trusts the customer to be the source of truth for history. Snapshot read and replay are **skipped entirely** at boot. The hook fires per turn — `incomingMessages` is 0-or-1-length consistently (since each record carries at most one new message) — and returns the canonical chain from the customer's database.
+
+### Path B — Snapshot + replay (default)
+
+When `hydrateMessages` is not registered, the runtime reconstructs history from durable infrastructure on every run boot:
+
+<Steps>
+  <Step title="Read the latest snapshot">
+    The runtime fetches a per-session JSON snapshot from object storage (S3 or compatible). The snapshot stores `{ messages, lastOutEventId, lastOutTimestamp, savedAt }` — what was true at the moment the previous turn finished. A 404 (no snapshot yet) is fine — treated as empty.
+  </Step>
+  <Step title="Replay session.out tail">
+    The runtime subscribes to `session.out` with `wait=0` starting from the snapshot's `lastOutEventId` (or seq 0 if there is no snapshot). Any chunks since that cursor are fed through the AI SDK's `processUIMessageStream` reducer to materialize fresh `UIMessage[]`. This catches turns whose snapshot write didn't make it before a crash.
+  </Step>
+  <Step title="Merge by id, replay wins">
+    Snapshot messages and replayed messages are merged by `id`. On collision, replay wins — `session.out` is the freshest representation of any assistant message. Partial trailing assistant work from a crashed turn is cleaned up via `cleanupAbortedParts`.
+  </Step>
+  <Step title="Write a fresh snapshot after every turn">
+    When `onTurnComplete` fires, the runtime serializes the accumulator and writes it back to object storage. The write is **awaited** — the run may suspend immediately after, and fire-and-forget would lose the snapshot.
+  </Step>
+</Steps>
+
+Object-store configuration is the same as the rest of Trigger.dev — set `OBJECT_STORE_*` env vars. With no object store configured and no `hydrateMessages` hook, conversations don't survive run boundaries; the runtime logs a warning at registration time.
+
+For a deeper walkthrough of the snapshot model, including OOM-retry interaction and crash semantics, see [Persistence and replay](/ai-chat/patterns/persistence-and-replay).
+
+## Head-start protocol caveat
+
+The [`chat.headStart`](/ai-chat/fast-starts#head-start) flow runs the first turn's LLM call inside the customer's own HTTP route handler, then hands the durable stream off to the agent for tool execution and step 2+. On that first-ever turn no snapshot exists yet — the agent boots empty.
+
+To bridge that gap, the head-start route handler ships **full UIMessage history** through the dedicated `headStartMessages` field with `trigger: "handover-prepare"`. This is the **only** path where a wire-shipped UIMessage[] still seeds the agent's accumulator:
+
+```json
+{
+  "kind": "message",
+  "payload": {
+    "headStartMessages": [
+      { "id": "u1", "role": "user", "parts": [/* ... */] },
+      { "id": "a1", "role": "assistant", "parts": [/* ... */] }
+    ],
+    "chatId": "conversation-123",
+    "trigger": "handover-prepare",
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+Two reasons this exception is safe:
+
+1. **The route handler runs against the customer's own HTTP endpoint**, not `/realtime/v1/sessions/{id}/in/append`. The 512 KiB body cap on the realtime route doesn't apply.
+2. **`headStartMessages` is only honored on `trigger: "handover-prepare"`**. The runtime ignores the field on every other trigger — the one-message-per-record rule still holds for normal turns.
+
+After turn 1 completes, the snapshot is written and turn 2+ run as a normal single-message-per-record chat.
+
+## Pending and steering messages
+
+You can send messages while the agent is still streaming a response. These are **pending messages** — the agent receives them mid-turn and can inject them between tool-call steps.
+
+The wire format is identical to a normal `kind: "message"` send — same `.in` channel, single `message` field. The difference is timing. What happens depends on the agent's `pendingMessages` configuration:
+
+- **With `pendingMessages.shouldInject`**: the message is injected into the model's context at the next `prepareStep` boundary. The agent sees it and can adjust its behavior mid-response.
+- **Without `pendingMessages` config**: the message queues for the next turn.
+
+See [Pending Messages](/ai-chat/pending-messages) for how to configure the agent side.
+
+<Note>
+  Unlike a normal `sendMessage`, pending messages should **not** cancel the active stream subscription. Keep reading — the agent incorporates the message into the same turn or queues it for the next one.
+</Note>
+
+## Continuations
+
+A run can end for several reasons: idle timeout, max turns reached, `chat.requestUpgrade()`, crash, or cancellation. When this happens, the session row stays alive — only the run is gone. The next message you append to `.in` automatically triggers a fresh run on the same session.
+
+**Clients send the wire shape exactly as a normal `submit-message`** — the server detects the absent run and handles the continuation itself:
+
+```json
+{
+  "kind": "message",
+  "payload": {
+    "message": {
+      "id": "u-42",
+      "role": "user",
+      "parts": [{ "type": "text", "text": "Where were we?" }]
+    },
+    "chatId": "conversation-123",
+    "trigger": "submit-message",
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+POST to the same `/realtime/v1/sessions/{sessionId}/in/append` URL with the same `publicAccessToken` you've been using — both stay valid across runs. The server detects the absent run, triggers a new one on the session's `triggerConfig`, and the agent boots, reads the snapshot from the prior run's last turn, replays any tail, and continues. Only `runId` changes — the new run's id is encoded in the next refreshed `publicAccessToken`'s `read:runs:{runId}` scope.
+
+<Note>
+  **You don't need to track `runId` or set `continuation: true` / `previousRunId` yourself.** The server detects continuation when the prior run is in a terminal state and sets those fields on the new run's boot payload automatically. The `continuation` and `previousRunId` fields on `ChatTaskWirePayload` are informational — used internally by the agent's boot path, never required from the client.
+</Note>
+
+<Note>
+  **`onChatStart` does NOT fire on continuation runs.** The hook is once-per-chat — it fires only on the chat's very first user message. Customers who want per-turn setup that also runs on continuation turns should use `onTurnStart` instead.
+</Note>
+
+<Tip>
+  This is how [version upgrades](/ai-chat/patterns/version-upgrades) work transparently — the agent calls `chat.requestUpgrade()`, the run exits, and the client's next message triggers a continuation on the new version. Same session, new run, same snapshot.
+</Tip>
+
+## Closing the conversation
+
+When the user is done with the conversation, close the session:
+
+```bash
+POST /api/v1/sessions/{sessionId}/close
+Authorization: Bearer <secret-key-or-jwt>
+Content-Type: application/json
+
+{ "reason": "user-ended" }
+```
+
+The body is optional — `{}` (or no body at all) closes the session with no reason set. If provided, `reason` is a free-form string up to 256 characters used for dashboard / audit display. Closing is **idempotent**: re-calling on an already-closed session returns the existing row without clobbering the original `closedAt` / `closedReason`.
+
+A long-running chat that's just between turns is a **live** session, not a closed one — don't close it prematurely. Once closed, the session cannot be reopened; reuse a different `externalId` if the user wants to start fresh.
+
+## Session state
+
+A client needs to track per-conversation:
+
+| Field | Description |
+| --- | --- |
+| `sessionId` | Durable session ID (`session_*`). Stable for the life of the conversation. |
+| `chatId` | Your stable conversation ID (passed as `externalId` on create). |
+| `runId` | Current run ID. Changes when a run ends and a continuation starts. Only needed if you want to display it. |
+| `publicAccessToken` | JWT for session access. Stable across runs; refreshed via the `public-access-token` header on every `turn-complete` control record. |
+| `lastEventId` | Last `record.seq_num` received on `.out`. Use to resume mid-stream. |
+
+`sessionId`, `chatId`, and `publicAccessToken` are durable. `runId` is live-run state that refreshes on each new run. On reload, you only need `sessionId` + `publicAccessToken` + `lastEventId` to resume — `runId` is a hint that can be `null` when no run is active.
+
+## Authentication
+
+| Operation | Auth |
+| --- | --- |
+| Create session (`POST /api/v1/sessions`) | Secret API key, or JWT with `write:sessions` super-scope plus a matching `tasks:{taskIdentifier}` scope |
+| Close session (`POST /api/v1/sessions/{id}/close`) | Secret API key, or JWT with `admin:sessions:{id}` / `admin:sessions` super-scope |
+| `.in` append | The session's `publicAccessToken` (carries `write:sessions:{id}`) |
+| `.out` subscribe | The session's `publicAccessToken` (carries `read:sessions:{id}`) |
+
+The `publicAccessToken` returned in the body of `POST /api/v1/sessions` carries both `read:sessions:{externalId}` and `write:sessions:{externalId}` and is **the only token you need** for every `.in`/`.out` operation thereafter. A token minted on the externalId form authorizes both the externalId and the friendlyId URL forms on every read and write route, so use whichever URL form your client already has on hand.
+
+<Warning>
+  **Don't use the `x-trigger-jwt` header from `POST /api/v1/tasks/{taskId}/trigger`.** That header carries `read:runs:{runId}` + `write:inputStreams:{runId}` — run-scoped scopes, not session-scoped. It cannot subscribe to `.out` or append to `.in`. Always use the `publicAccessToken` from the session-create response body.
+</Warning>
+
+## FAQ
+
+<Expandable title="After sending `kind: stop`, can I immediately send the next message?">
+Yes. `.in` records are processed in arrival order — the agent's stop handler aborts the in-flight `streamText`, emits a `turn-complete` control record, and reads the next record. You don't have to wait for `turn-complete` on the wire before posting the next `.in/append`. In practice you usually do anyway, because your UI is gated on the stream coming back to ready.
+</Expandable>
+
+<Expandable title="What's the format of the optional `X-Part-Id` header?">
+Any opaque ASCII string up to ~64 characters. The built-in clients pass a `nanoid(7)` (e.g. `"V1StGXR"`) generated per request. The server uses it as a per-record idempotency key — re-POSTing the same body with the same `X-Part-Id` produces a single S2 record. If you don't send the header, the server generates one for you and idempotency is per-request only.
+</Expandable>
+
+<Expandable title="What happens on rate-limit (429)?">
+The `.in/append` route returns standard rate-limit response headers (`x-ratelimit-limit`, `x-ratelimit-remaining`, `x-ratelimit-reset` — Unix ms epoch when the bucket refills). On `429`, back off until `x-ratelimit-reset` and retry with the same `X-Part-Id` to remain idempotent. Default per-environment limits are generous (millions of requests/window); you'll typically only hit this with runaway client loops.
+</Expandable>
+
+<Expandable title="How do I tell from the `.out` stream that a run has ended (vs idled between turns)?">
+You don't need to. There's no `trigger:run-ended` chunk. The protocol is designed so the client doesn't track run lifecycle:
+
+- A `turn-complete` control record means **the turn finished**, not that the run is gone. The run may still be alive, idle-waiting for the next `.in` record, or it may have suspended / exited shortly after.
+- When you POST the next message to `.in/append`, the server figures out whether the existing run can pick it up or whether to spawn a continuation. Either way you get streamed responses on the same `.out` URL.
+
+If you genuinely need the live `runId` (for displaying the dashboard link, say), read it from the latest `turn-complete` control record's refreshed `public-access-token` header — the JWT's `read:runs:{runId}` scope encodes it. Or call `GET /api/v1/sessions/{sessionId}` (omitted from this page; see the Sessions API reference) to read `currentRunId`.
+</Expandable>
+
+<Expandable title="Does the `seq_num` reset across continuations or runs?">
+No. `seq_num` is monotonic across the entire session — turn 1 might emit seq 0–9, turn 2 picks up at seq 10+, and a continuation run on the same session continues numbering from where the prior run left off. A single `Last-Event-ID` cursor is sufficient to resume across turns and runs.
+</Expandable>
+
+<Expandable title="What's the maximum size of a single `.in/append` body?">
+512 KiB. A typical `kind: "message"` is a few KB. If you're brushing the cap you're shipping more than one message per record, which the protocol forbids. The headStart path (`trigger: "handover-prepare"`) sends through the customer's own HTTP route handler, not `.in/append`, so the cap doesn't apply there.
+</Expandable>
+
+## See also
+
+- [`TriggerChatTransport`](/ai-chat/frontend) — Built-in browser transport (implements this protocol)
+- [`AgentChat`](/ai-chat/server-chat) — Built-in server-side client
+- [Persistence and replay](/ai-chat/patterns/persistence-and-replay) — How the snapshot + replay model works end-to-end
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks) — What the agent does on each event
+- [Version upgrades](/ai-chat/patterns/version-upgrades) — How `chat.requestUpgrade()` uses continuations
diff --git a/docs/ai-chat/compaction.mdx b/docs/ai-chat/compaction.mdx
new file mode 100644
index 00000000000..3ab280a44f0
--- /dev/null
+++ b/docs/ai-chat/compaction.mdx
@@ -0,0 +1,411 @@
+---
+title: "Compaction"
+sidebarTitle: "Compaction"
+description: "Automatic context compaction to keep long conversations within token limits."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## Overview
+
+Long conversations accumulate tokens across turns. Eventually the context window fills up, causing errors or degraded responses. Compaction solves this by automatically summarizing the conversation when token usage exceeds a threshold, then using that summary as the context for future turns.
+
+The `compaction` option on `chat.agent()` handles this in both paths:
+
+- **Between tool-call steps** (inner loop) — via the AI SDK's `prepareStep`, compaction runs between tool calls within a single turn
+- **Between turns** (outer loop) — for single-step responses with no tool calls, where `prepareStep` never fires
+
+## Basic usage
+
+Provide `shouldCompact` to decide when to compact and `summarize` to generate the summary:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, generateText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) => {
+      const result = await generateText({
+        model: anthropic("claude-haiku-4-5"),
+        messages: [...messages, { role: "user", content: "Summarize this conversation concisely." }],
+      });
+      return result.text;
+    },
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+<Note>
+  The `prepareStep` for inner-loop compaction is automatically injected when you spread `chat.toStreamTextOptions()` into your `streamText` call. If you provide your own `prepareStep` after the spread, it overrides the auto-injected one.
+</Note>
+
+## How it works
+
+After each turn completes:
+
+1. `shouldCompact` is called with the current token usage
+2. If it returns `true`, `summarize` generates a summary from the model messages
+3. The **model messages** (sent to the LLM) are replaced with the summary
+4. The **UI messages** (persisted and displayed) are preserved by default
+5. The `onCompacted` hook fires if configured
+
+On the next turn, the LLM receives the compact summary instead of the full history — dramatically reducing token usage while preserving context.
+
+## Customizing what gets persisted
+
+By default, compaction only affects model messages — UI messages stay intact so users see the full conversation after a page refresh. You can customize this with `compactUIMessages`:
+
+### Summary + recent messages
+
+Replace older messages with a summary but keep the last few exchanges visible:
+
+```ts
+import { generateId } from "ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) => {
+      return generateText({
+        model: anthropic("claude-haiku-4-5"),
+        messages: [...messages, { role: "user", content: "Summarize." }],
+      }).then((r) => r.text);
+    },
+    compactUIMessages: ({ uiMessages, summary }) => [
+      {
+        id: generateId(),
+        role: "assistant",
+        parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }],
+      },
+      ...uiMessages.slice(-4), // Keep the last 4 messages
+    ],
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+### Flatten to summary only
+
+Replace all messages with just the summary (like the LLM sees):
+
+```ts
+compactUIMessages: ({ summary }) => [
+  {
+    id: generateId(),
+    role: "assistant",
+    parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }],
+  },
+],
+```
+
+## Customizing model messages
+
+By default, model messages are replaced with a single summary message. Use `compactModelMessages` to customize what the LLM sees after compaction:
+
+### Summary + recent context
+
+Keep the last few model messages so the LLM has recent detail alongside the summary:
+
+```ts
+compactModelMessages: ({ modelMessages, summary }) => [
+  { role: "user", content: summary },
+  ...modelMessages.slice(-2), // Keep last exchange for detail
+],
+```
+
+### Keep tool results
+
+Preserve tool-call results so the LLM remembers what tools returned:
+
+```ts
+compactModelMessages: ({ modelMessages, summary }) => [
+  { role: "user", content: summary },
+  ...modelMessages.filter((m) => m.role === "tool"),
+],
+```
+
+## shouldCompact event
+
+The `shouldCompact` callback receives context about the current state:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `ModelMessage[]` | Current model messages |
+| `totalTokens` | `number \| undefined` | Total tokens from the triggering step/turn |
+| `inputTokens` | `number \| undefined` | Input tokens |
+| `outputTokens` | `number \| undefined` | Output tokens |
+| `usage` | `LanguageModelUsage` | Full usage object |
+| `totalUsage` | `LanguageModelUsage` | Cumulative usage across all turns |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn (0-indexed) |
+| `clientData` | `unknown` | Custom data from the frontend |
+| `source` | `"inner" \| "outer"` | Whether this is between steps or between turns |
+| `steps` | `CompactionStep[]` | Steps array (inner loop only) |
+| `stepNumber` | `number` | Step index (inner loop only) |
+
+## summarize event
+
+The `summarize` callback receives similar context:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `ModelMessage[]` | Messages to summarize |
+| `usage` | `LanguageModelUsage` | Usage from the triggering step/turn |
+| `totalUsage` | `LanguageModelUsage` | Cumulative usage |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn |
+| `clientData` | `unknown` | Custom data from the frontend |
+| `source` | `"inner" \| "outer"` | Where compaction is running |
+| `stepNumber` | `number` | Step index (inner loop only) |
+
+## onCompacted hook
+
+Track compaction events for logging, billing, or analytics:
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  compaction: { ... },
+  onCompacted: async ({ summary, totalTokens, messageCount, chatId, turn }) => {
+    logger.info("Compacted", { chatId, turn, totalTokens, messageCount });
+    await db.compactionLog.create({
+      data: { chatId, summary, totalTokens, messageCount },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+## User-initiated compaction
+
+Sometimes you want the user to decide when to compact — a "Summarize conversation" button, a `/compact` slash command, or a settings toggle. Wire this up with [actions](/ai-chat/actions): the frontend sends a typed action, `onAction` runs the summary, and `chat.history.set()` replaces the conversation.
+
+### Backend
+
+Define a `compact` action that reuses your existing `summarize` function:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, generateText, generateId, convertToModelMessages } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+
+// Reusable summarize fn — also used by the automatic compaction config.
+async function summarize(messages: ModelMessage[]) {
+  const result = await generateText({
+    model: anthropic("claude-haiku-4-5"),
+    messages: [...messages, { role: "user", content: "Summarize this conversation concisely." }],
+  });
+  return result.text;
+}
+
+export const myChat = chat.agent({
+  id: "my-chat",
+
+  // Automatic compaction still runs on threshold.
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) => summarize(messages),
+  },
+
+  // User-initiated: the frontend sends { type: "compact" }.
+  actionSchema: z.discriminatedUnion("type", [
+    z.object({ type: z.literal("compact") }),
+  ]),
+
+  onAction: async ({ action, uiMessages }) => {
+    if (action.type !== "compact") return;
+
+    const summary = await summarize(convertToModelMessages(uiMessages));
+
+    // Replace the full history with a single summary message.
+    chat.history.set([
+      {
+        id: generateId(),
+        role: "assistant",
+        parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }],
+      },
+    ]);
+  },
+
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+Actions fire `onAction` only (plus `hydrateMessages` if set) — `run()` and `onTurnComplete` do not fire for actions. Persist the compacted state directly inside `onAction` after the `chat.history.set` call. See [Actions](/ai-chat/actions) for the full lifecycle.
+
+### Frontend
+
+Call `transport.sendAction()` from a button or slash command:
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+
+function ChatView({ chatId }: { chatId: string }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+  const { messages } = useChat({ id: chatId, transport });
+
+  return (
+    <>
+      <button onClick={() => transport.sendAction(chatId, { type: "compact" })}>
+        Summarize conversation
+      </button>
+      {messages.map(/* ... */)}
+    </>
+  );
+}
+```
+
+The call returns as soon as the backend accepts the action. Because `onTurnComplete` replaces the `uiMessages` with the summary, `useChat` receives the new state via the normal turn-complete flow — the UI updates automatically.
+
+### Indicating compaction in the UI
+
+For "Compacting..." feedback while the summary generates, append a transient data part from `onAction` via `chat.stream.append()`:
+
+```ts
+onAction: async ({ action, uiMessages }) => {
+  if (action.type !== "compact") return;
+
+  chat.stream.append({ type: "data-compaction", data: { status: "compacting" } });
+  const summary = await summarize(convertToModelMessages(uiMessages));
+  chat.stream.append({ type: "data-compaction", data: { status: "complete" } });
+
+  chat.history.set([ /* ... */ ]);
+},
+```
+
+See [Raw streaming with `chat.stream`](/ai-chat/backend#raw-streaming-with-chat-stream) for the full API.
+
+## Using with chat.createSession()
+
+Pass the same `compaction` config to `chat.createSession()`. The session handles outer-loop compaction automatically inside `turn.complete()`:
+
+```ts
+const session = chat.createSession(payload, {
+  signal,
+  idleTimeoutInSeconds: 60,
+  timeout: "1h",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) =>
+      generateText({ model: anthropic("claude-haiku-4-5"), messages }).then((r) => r.text),
+    compactUIMessages: ({ uiMessages, summary }) => [
+      { id: generateId(), role: "assistant",
+        parts: [{ type: "text", text: `[Summary]\n\n${summary}` }] },
+      ...uiMessages.slice(-4),
+    ],
+  },
+});
+
+for await (const turn of session) {
+  const result = streamText({
+    model: anthropic("claude-sonnet-4-5"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+    stopWhen: stepCountIs(15),
+  });
+
+  await turn.complete(result);
+  // Outer-loop compaction runs automatically after complete()
+
+  await db.chat.update({
+    where: { id: turn.chatId },
+    data: { messages: turn.uiMessages },
+  });
+}
+```
+
+## Using with raw tasks (MessageAccumulator)
+
+Pass `compaction` to the `MessageAccumulator` constructor. Use `prepareStep()` for inner-loop compaction and `compactIfNeeded()` for the outer loop:
+
+```ts
+const conversation = new chat.MessageAccumulator({
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) =>
+      generateText({ model: anthropic("claude-haiku-4-5"), messages }).then((r) => r.text),
+    compactUIMessages: ({ summary }) => [
+      { id: generateId(), role: "assistant",
+        parts: [{ type: "text", text: `[Summary]\n\n${summary}` }] },
+    ],
+  },
+});
+
+for (let turn = 0; turn < 100; turn++) {
+  const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+  const result = streamText({
+    model: anthropic("claude-sonnet-4-5"),
+    messages,
+    prepareStep: conversation.prepareStep(), // Inner-loop compaction
+    stopWhen: stepCountIs(15),
+  });
+
+  const response = await chat.pipeAndCapture(result);
+  if (response) await conversation.addResponse(response);
+
+  // Outer-loop compaction
+  const usage = await result.totalUsage;
+  await conversation.compactIfNeeded(usage, { chatId: payload.chatId, turn });
+
+  await db.chat.update({ data: { messages: conversation.uiMessages } });
+  await chat.writeTurnComplete();
+}
+```
+
+## Fully manual compaction
+
+For maximum control, use `chat.compact()` directly inside a custom `prepareStep`:
+
+```ts
+prepareStep: async ({ messages: stepMessages, steps }) => {
+  const result = await chat.compact(stepMessages, steps, {
+    threshold: 80_000,
+    summarize: async (msgs) =>
+      generateText({ model: anthropic("claude-haiku-4-5"), messages: msgs }).then((r) => r.text),
+  });
+  return result.type === "skipped" ? undefined : result;
+},
+```
+
+Or use the `chat.compactionStep()` factory:
+
+```ts
+prepareStep: chat.compactionStep({
+  threshold: 80_000,
+  summarize: async (msgs) =>
+    generateText({ model: anthropic("claude-haiku-4-5"), messages: msgs }).then((r) => r.text),
+}),
+```
+
+<Note>
+  The fully manual APIs only handle inner-loop compaction (between tool-call steps). For outer-loop coverage, use the `compaction` option on `chat.agent()`, `chat.createSession()`, or `MessageAccumulator`.
+</Note>
diff --git a/docs/ai-chat/error-handling.mdx b/docs/ai-chat/error-handling.mdx
new file mode 100644
index 00000000000..9eb6ec1f0e0
--- /dev/null
+++ b/docs/ai-chat/error-handling.mdx
@@ -0,0 +1,415 @@
+---
+title: "Error handling"
+sidebarTitle: "Error handling"
+description: "How errors flow through chat.agent — stream errors, hook errors, run failures — and how to recover."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+`chat.agent` errors fall into four layers, each with different recovery semantics. The default behavior is **conversation-preserving**: a thrown error in a hook or `run()` does not kill the chat. The current turn ends with an error chunk, and the agent waits for the user's next message.
+
+## Error layers at a glance
+
+| Layer | Source | Default behavior | Recovery |
+|-------|--------|------------------|----------|
+| **Stream** | `streamText` errors mid-response (rate limits, model API failures) | `onError` callback converts to error chunk | Sanitize message via `uiMessageStreamOptions.onError` |
+| **Hook / turn** | Throws in `onValidateMessages`, `onTurnStart`, `run`, etc. | Error chunk + turn-complete written to stream; conversation continues | Catch in your hook, or rely on default |
+| **Run** | Unhandled exception escapes the run | Run fails. No retry by default. Standard task `onFailure` fires. | `onFailure` task hook |
+| **Frontend** | Stream delivers `{ type: "error", errorText }` | `useChat` exposes via `error` field and `onError` callback | Show toast, retry button, etc. |
+
+## Stream errors mid-turn
+
+When the model API errors mid-response (rate limits, network failures, malformed output), the AI SDK's `streamText` calls the `onError` callback. Use `uiMessageStreamOptions.onError` to convert the error to a user-friendly string. The string is sent to the frontend as an error chunk.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    onError: (error) => {
+      console.error("Stream error:", error);
+      if (error instanceof Error && error.message.includes("rate limit")) {
+        return "Rate limited. Please wait a moment and try again.";
+      }
+      if (error instanceof Error && error.message.includes("context_length")) {
+        return "This conversation is too long. Please start a new chat.";
+      }
+      return "Something went wrong while generating a response. Please try again.";
+    },
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Note>
+  Returning a string from `onError` is what gets shown to the user. Do not return raw error messages — they may leak internal details (API keys, stack traces, etc.).
+</Note>
+
+The frontend receives this as an error chunk that `useChat` exposes via its `error` field:
+
+```tsx
+const { messages, error } = useChat({ transport });
+
+{error && <div className="text-red-600">{error.message}</div>}
+```
+
+## Hook and turn errors
+
+If any lifecycle hook (`onValidateMessages`, `onChatStart`, `onTurnStart`, `hydrateMessages`, `onAction`, `prepareMessages`, `onBeforeTurnComplete`, `onTurnComplete`) or `run()` throws an unhandled exception, the turn loop catches it:
+
+1. Writes `{ type: "error", errorText: error.message }` to the stream
+2. Writes a turn-complete chunk to close the turn
+3. Waits for the next user message
+
+The conversation stays alive. The user can send another message and continue.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnStart: async ({ chatId, uiMessages }) => {
+    // If this throws, the turn ends with an error chunk
+    // and the agent waits for the next message
+    await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+### Catching errors in your own hooks
+
+For granular control, wrap your hook code in try/catch and decide what to do. Common patterns:
+
+```ts
+onValidateMessages: async ({ messages }) => {
+  try {
+    return await validateUIMessages({ messages, tools: chatTools });
+  } catch (err) {
+    // Log to your error tracking service
+    Sentry.captureException(err);
+    // Throw a user-facing error message — this becomes the error chunk
+    throw new Error("Your message contains invalid data and could not be sent.");
+  }
+},
+```
+
+<Tip>
+  The `Error.message` you throw is sent verbatim to the frontend as the error chunk's `errorText`. Use messages safe for end users.
+</Tip>
+
+### Catching errors inside `run()`
+
+`run()` is your code — wrap it in try/catch for full control. This is the right place to save partial state to your DB before the error chunk goes out:
+
+```ts
+run: async ({ messages, chatId, signal }) => {
+  try {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  } catch (err) {
+    // Save the failed turn for debugging / undo
+    await db.failedTurn.create({
+      data: {
+        chatId,
+        error: err instanceof Error ? err.message : String(err),
+        messages,
+      },
+    });
+    throw err; // Re-throw to trigger the error chunk
+  }
+},
+```
+
+## Saving error state to your DB
+
+To persist errors for debugging or undo, use `onTurnComplete` (which fires even after errors) or the standard task `onComplete` hook.
+
+### Using `onTurnComplete`
+
+`onTurnComplete` fires after every turn — successful **or** errored. The `responseMessage` will be undefined or partial on errors. Use this to mark the turn as failed:
+
+```ts
+onTurnComplete: async ({ chatId, uiMessages, responseMessage, stopped }) => {
+  // Persist the messages regardless of error state
+  await db.chat.update({
+    where: { id: chatId },
+    data: {
+      messages: uiMessages,
+      // Mark the chat as errored if no response message
+      lastTurnStatus: responseMessage ? "ok" : stopped ? "stopped" : "errored",
+    },
+  });
+},
+```
+
+### Using the standard `onFailure` task hook
+
+For run-level failures (the entire run dies), use the standard task `onFailure` hook. This fires when the run terminates with an unhandled exception:
+
+```ts
+chat.agent({
+  id: "my-chat",
+  onFailure: async ({ error, ctx }) => {
+    // Log run-level failure to your monitoring service
+    await monitoring.recordRunFailure({
+      runId: ctx.run.id,
+      chatId: ctx.run.tags.find(t => t.startsWith("chat:"))?.slice(5),
+      error: error.message,
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ ... });
+  },
+});
+```
+
+<Info>
+  `chat.agent` uses `retry: { maxAttempts: 1 }` internally, so the run never retries on failure. To add run-level retries, wrap the agent in a parent task or implement your own retry logic in the frontend (re-send the message).
+</Info>
+
+## Recovery patterns
+
+### Pattern 1: Undo to last successful response
+
+A common pattern is to let the user "undo" the failed turn and try again. Combine `chat.history.rollbackTo` with a custom action:
+
+```ts
+chat.agent({
+  id: "my-chat",
+  actionSchema: z.discriminatedUnion("type", [
+    z.object({ type: z.literal("undo") }),
+  ]),
+  onAction: async ({ action, uiMessages }) => {
+    if (action.type === "undo") {
+      // Find the last user message and roll back to it
+      const lastUserIdx = [...uiMessages].reverse().findIndex(m => m.role === "user");
+      if (lastUserIdx !== -1) {
+        const targetIdx = uiMessages.length - 1 - lastUserIdx - 1;
+        const target = uiMessages[targetIdx];
+        if (target) chat.history.rollbackTo(target.id);
+      }
+    }
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ ... });
+  },
+});
+```
+
+On the frontend, show an "Undo" button when an error occurs:
+
+```tsx
+{error && (
+  <button onClick={() => transport.sendAction(chatId, { type: "undo" })}>
+    Undo and try again
+  </button>
+)}
+```
+
+### Pattern 2: Retry the last message
+
+For transient errors (network blips, rate limits), the simplest recovery is to re-send the last user message. The AI SDK's `useChat` provides `regenerate()`:
+
+```tsx
+const { messages, error, regenerate } = useChat({ transport });
+
+{error && (
+  <button onClick={() => regenerate()}>Retry</button>
+)}
+```
+
+`regenerate()` removes the last assistant response and re-sends. Combined with `onValidateMessages` or `hydrateMessages`, you can reload the canonical state from your DB before retrying.
+
+### Pattern 3: Save partial responses
+
+When a stream errors mid-response, the `responseMessage` in `onBeforeTurnComplete` and `onTurnComplete` contains the partial output. Save it as a "draft" so the user can see what was generated before the error:
+
+```ts
+onBeforeTurnComplete: async ({ chatId, responseMessage, stopped }) => {
+  if (responseMessage && responseMessage.parts.length > 0) {
+    // Save partial response — user can manually accept or discard
+    await db.partialResponse.create({
+      data: {
+        chatId,
+        message: responseMessage,
+        reason: stopped ? "stopped" : "errored",
+      },
+    });
+  }
+},
+```
+
+### Pattern 4: Fall back to a different model
+
+If the primary model errors, try a fallback model in the same turn:
+
+```ts
+run: async ({ messages, signal }) => {
+  try {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  } catch (err) {
+    console.warn("Primary model failed, falling back:", err);
+    return streamText({
+      model: anthropic("claude-sonnet-4-6"),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  }
+},
+```
+
+<Note>
+  This only catches errors thrown synchronously by `streamText` setup. Errors that happen mid-stream go through `uiMessageStreamOptions.onError`, not your try/catch.
+</Note>
+
+## What gets written to the stream on error
+
+When an error occurs at any layer, the frontend's `UIMessageChunk` stream surfaces an error chunk:
+
+```json
+{ "type": "error", "errorText": "Rate limited. Please wait a moment and try again." }
+```
+
+A `turn-complete` control record follows on `session.out` (header-form, not a data chunk — see [`turn-complete` control record](/ai-chat/client-protocol#turn-complete-control-record) for the wire format) to mark the turn as done.
+
+The AI SDK's `useChat` processes this and:
+
+1. Sets `useChat`'s `error` field to an `Error` with `message = errorText`
+2. Calls the user's `onError` callback (if set)
+3. Marks the turn as complete (`status` returns to `"ready"`)
+
+```tsx
+const { messages, error, status } = useChat({
+  transport,
+  onError: (err) => {
+    toast.error(err.message);
+  },
+});
+```
+
+## Frontend error handling
+
+### Showing the error to the user
+
+```tsx
+function Chat() {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+  const { messages, error, sendMessage } = useChat({ transport });
+
+  return (
+    <div>
+      {messages.map(m => /* ... */)}
+      {error && (
+        <div className="rounded border border-red-300 bg-red-50 p-3">
+          <p className="text-red-700">{error.message}</p>
+        </div>
+      )}
+      <form onSubmit={(e) => { e.preventDefault(); sendMessage(/* ... */); }}>
+        {/* ... */}
+      </form>
+    </div>
+  );
+}
+```
+
+### Distinguishing error types
+
+The `errorText` is just a string, so distinguish error types via prefixes or codes:
+
+```ts
+// Backend
+uiMessageStreamOptions: {
+  onError: (error) => {
+    if (error.message.includes("rate limit")) return "RATE_LIMIT: Please wait and try again.";
+    if (error.message.includes("context_length")) return "CONTEXT_TOO_LONG: Start a new chat.";
+    return "UNKNOWN: Something went wrong.";
+  },
+},
+```
+
+```tsx
+// Frontend
+{error?.message.startsWith("RATE_LIMIT") && <RateLimitNotice />}
+{error?.message.startsWith("CONTEXT_TOO_LONG") && <NewChatPrompt />}
+```
+
+<Tip>
+  For richer error structures, use [`chat.response.write()`](/ai-chat/backend#custom-data-parts) with a custom `data-error` part type. This lets you ship structured error metadata (codes, retry hints, etc.) instead of stringly-typed messages.
+</Tip>
+
+### Errors from `accessToken` / `startSession`
+
+If your `accessToken` or `startSession` callback throws (auth failure, DB write failure, network error), the rejection surfaces through `useChat`'s `error` state — same as a stream error. The transport doesn't retry the callback automatically; the customer is responsible for handling it.
+
+```tsx
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: async ({ chatId }) => {
+    try {
+      return await mintChatAccessToken(chatId);
+    } catch (err) {
+      // Customer's server action failed (e.g. user lost auth).
+      // Re-throw to surface as a useChat error, or return a sentinel
+      // your UI can detect and prompt re-auth.
+      throw new Error(`AUTH_REFRESH: ${err.message}`);
+    }
+  },
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+});
+```
+
+`startSession` failures most commonly mean the customer's authorization layer rejected the request (no plan, quota exceeded, user not allowed to chat with this agent). The customer's server should produce a meaningful error message; the transport propagates it verbatim to `useChat`'s `error` state.
+
+## Run-level retries
+
+`chat.agent` uses `retry: { maxAttempts: 1 }` — the run **never retries** on unhandled failure. This is intentional: each turn is conversation-preserving, so a true run failure is severe and shouldn't silently retry (which could send duplicate API calls or mutate state twice).
+
+To add retry-like behavior:
+
+- **Per-turn retries**: handle inside `run()` with try/catch and a fallback model
+- **Per-message retries**: re-send from the frontend (call `sendMessage` or `regenerate` again)
+- **Whole-run retries**: wrap `chat.agent` with a parent task that has `retry` configured, and call the agent's task internally
+
+## Best practices
+
+1. **Always set `uiMessageStreamOptions.onError`** to sanitize stream errors before they reach the user.
+2. **Persist messages in `onTurnStart`** so a mid-stream failure still leaves the user's message visible.
+3. **Use `onTurnComplete` to mark turn status** in your DB (`ok` / `errored` / `stopped`).
+4. **Don't throw raw errors with internal details** in hooks — catch, log, then throw a sanitized user-facing message.
+5. **Provide an undo or retry affordance** in the UI when errors occur.
+6. **Use `onFailure` for run-level monitoring** (Sentry, monitoring dashboards).
+7. **For known transient errors (rate limits, network)**, consider a fallback model inside `run()` instead of failing the turn.
+
+## `ChatChunkTooLargeError`
+
+A specific run-failing error worth flagging on its own. Anything written through the chat output is one record on the underlying realtime stream, capped at ~1 MiB per record. A single chunk over the cap throws `ChatChunkTooLargeError` (named export from `@trigger.dev/sdk`). The most common trigger is a tool whose result object is large enough to overflow as one `tool-output-available` chunk.
+
+The error carries `chunkType`, `chunkSize`, and `maxSize`. Catch with the `isChatChunkTooLargeError` guard and route oversized values out-of-band.
+
+See [Large payloads in chat.agent](/ai-chat/patterns/large-payloads) for the ID-reference pattern that works around the cap, plus guidance on transient data parts and out-of-band logging.
+
+## See also
+
+- [`uiMessageStreamOptions.onError`](/ai-chat/backend#error-handling-with-onerror) — stream error handler details
+- [Custom actions](/ai-chat/actions) — implement undo/retry actions
+- [`chat.history`](/ai-chat/backend#chat-history) — rollback to a previous message
+- [Large payloads](/ai-chat/patterns/large-payloads) — handling the ~1 MiB per-chunk cap
+- [Database persistence](/ai-chat/patterns/database-persistence) — saving conversation state
+- [Standard task hooks](/tasks/overview) — `onFailure`, `onComplete`, `onWait`, etc.
diff --git a/docs/ai-chat/fast-starts.mdx b/docs/ai-chat/fast-starts.mdx
new file mode 100644
index 00000000000..7310988964c
--- /dev/null
+++ b/docs/ai-chat/fast-starts.mdx
@@ -0,0 +1,582 @@
+---
+title: "Fast starts"
+sidebarTitle: "Fast starts"
+description: "Two ways to cut first-turn TTFC: Preload eagerly triggers the run before the first message; Head Start runs step 1 in your warm server while the agent boots in parallel."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+The first turn of a brand-new conversation pays for the chat.agent run's cold start: dequeue, process boot, `onPreload` / `onChatStart` hooks, and only then the LLM call. Two features address this from different angles.
+
+## Picking an approach
+
+| | [Preload](#preload) | [Head Start](#head-start) |
+|---|---|---|
+| **What it does** | Eagerly triggers the run before the first message | Runs step 1's LLM call in your warm process while the agent boots in parallel |
+| **First-turn TTFC win** | Hides agent boot if the user *does* send a message | ~50% reduction (LLM TTFB floor); boot fully overlaps with TTFB |
+| **When to fire** | Page load / input focus — your call | First message arrival — automatic |
+| **Cost when user never sends** | Idle compute until the preload window times out | Zero (no run was triggered) |
+| **Requires a warm server process** | No — works for browser-only surfaces | Yes — your route handler runs step 1 |
+| **Requires LLM keys client-side?** | No | No — keys stay in your warm server |
+| **Bundle constraints** | None | Route handler must import schema-only tools (no heavy executes) |
+
+**Pick one, not both.** Running both for the same chat is wasted work — Head Start gates on a real first message, so adding Preload on top eats the idle-compute cost Head Start was avoiding.
+
+**Use Preload** when the chat surface is browser-only, when you don't have a warm Node/Bun/Edge process serving the page, or when you can confidently predict the user *will* send a message (the run never goes idle).
+
+**Use Head Start** when the chat lives behind a warm server (Next.js App Router, Hono, SvelteKit, Workers, etc.) and you want first-turn TTFC down at the LLM TTFB floor without any speculative run.
+
+---
+
+## Preload
+
+Preload eagerly triggers a run for a chat before the first message is sent. Initialization (DB setup, context loading) happens while the user is still typing, reducing first-response latency.
+
+### Frontend
+
+Call `transport.preload(chatId)` to start a run early:
+
+```tsx
+import { useEffect } from "react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+
+export function Chat({ chatId }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+    clientData: { userId: currentUser.id },
+  });
+
+  // Preload on mount: run starts before the user types anything.
+  // Trigger config (idleTimeoutInSeconds, machine, tags) lives in the
+  // server action that wraps `chat.createStartSessionAction`.
+  useEffect(() => {
+    transport.preload(chatId);
+  }, [chatId]);
+
+  const { messages, sendMessage } = useChat({ id: chatId, transport });
+  // ...
+}
+```
+
+Preload is a no-op if a session already exists for this chatId.
+
+Your `accessToken` callback receives `{ chatId }` and is invoked the same way on preload as on any other refresh — no special branching by purpose. See [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options).
+
+### Backend
+
+The `onPreload` hook fires immediately. The run then waits for the first message. When the user sends a message, `onChatStart` fires with `preloaded: true` so you can skip work that already ran:
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onPreload: async ({ chatId, clientData }) => {
+    // Eagerly initialize: runs before the first message
+    userContext.init(await loadUser(clientData.userId));
+    await db.chat.create({ data: { id: chatId } });
+  },
+  onChatStart: async ({ preloaded }) => {
+    if (preloaded) return; // Already initialized in onPreload
+    // ... fallback initialization for non-preloaded runs
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+With `chat.createSession()` or raw tasks, check `payload.trigger === "preload"` and wait for the first message:
+
+```ts
+if (payload.trigger === "preload") {
+  // Initialize early...
+  const result = await chat.messages.waitWithIdleTimeout({
+    idleTimeoutInSeconds: 60,
+    timeout: "1h",
+  });
+  if (!result.ok) return;
+  currentPayload = result.output;
+}
+```
+
+---
+
+## Head Start
+
+Head Start runs step 1's LLM call in your warm server process while the chat.agent run boots in parallel. The user sees one continuous turn: text first from your server, then a clean handover to the agent for tool execution and any further steps.
+
+`chat.headStart` returns a standard [Web Fetch API](https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API) handler — `(req: Request) => Promise<Response>` — so it slots into any runtime that speaks Web Fetch.
+
+**Verified runtimes:** Node 18+, Bun, Deno, Cloudflare Workers, Vercel (Node and Edge), Netlify (Functions and Edge). The handler uses only `fetch` and Web `ReadableStream` / `TransformStream` (no `node:*` imports), and the S2 streaming dependency picks the right transport for each runtime automatically (HTTP/2 on Node/Deno, HTTP/1.1 on Bun/Workers/browsers).
+
+**Compatible frameworks (native Web Fetch):** Next.js App Router, Hono, SvelteKit, Remix, React Router v7, TanStack Start, Astro, Nitro/Nuxt, Elysia. Mount the handler directly.
+
+**Node-only frameworks (Express, Fastify, Koa):** the handler still works, but the framework gives you a Node `IncomingMessage` instead of a Web `Request`. Use a small adapter — examples in [Mounting in your framework](#mounting-in-your-framework) below.
+
+When the first turn is pure text (no tool calls), the agent run boots and exits without ever calling an LLM. You only pay for what the conversation actually needed.
+
+### Measured TTFC
+
+3 runs each, prompt `"say hi in five words"`, same model both sides (Anthropic Claude Sonnet 4):
+
+| | Without Head Start | With Head Start | Δ |
+| --- | --- | --- | --- |
+| TTFT (avg) | 2801 ms | **1218 ms** | **−57%** |
+| TTFT (range) | 2351–3101 ms | 1201–1252 ms | |
+| Total turn | 4180 ms | 2345 ms | −44% |
+
+With Head Start, time-to-first-text is essentially the LLM TTFB floor (50ms spread). Without it, agent boot + hooks stack before the LLM call, adding 750ms of variance.
+
+### How it works
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant B as Browser
+    participant H as Route handler<br/>(your warm server)
+    participant T as chat.agent run<br/>(Trigger.dev)
+
+    B->>H: POST first message<br/>(headStart URL)
+
+    par Step 1 + agent boot in parallel
+        H->>H: streamText step 1<br/>(your model, schema-only tools)
+        H-->>B: SSE: step 1 chunks
+    and
+        H->>T: createSession + trigger run
+        T->>T: boot → wait on session.in
+    end
+
+    alt finishReason: tool-calls
+        H->>T: handover signal<br/>(partial assistant message)
+        T->>T: execute tools, run step 2 LLM
+        T-->>H: chunks via session.out
+        H-->>B: SSE: step 2 chunks
+        T-->>H: trigger:turn-complete
+    else finishReason: stop (pure text)
+        H->>T: handover-skip signal
+        T->>T: exit (no LLM call)
+    end
+
+    H-->>B: SSE close
+    Note over B,T: Subsequent turns bypass the handler:<br/>browser writes directly to session.in
+```
+
+<Steps>
+  <Step title="Browser POSTs the first message to your route handler">
+    The transport sees `headStart: "/api/chat"` is set and there's no session yet for this chat. It POSTs the wire payload (messages, chatId, metadata) to your route handler.
+  </Step>
+  <Step title="Your handler creates the session and triggers the agent run">
+    A single `apiClient.createSession` round-trip both creates the chat session and triggers an agent run with `trigger: "handover-prepare"`. The agent run boots into a wait state on `session.in`.
+  </Step>
+  <Step title="Your handler runs streamText step 1">
+    `streamText` runs in your warm process with `stopWhen: stepCountIs(1)`. The output is streamed to the browser as SSE while the agent run boots in parallel. Boot time (~488ms) overlaps with LLM TTFB (~389ms), fully hidden.
+  </Step>
+  <Step title="Mid-turn handover">
+    On step 1's `tool-calls` finish, your handler signals the agent and the SDK splices the agent's step-2+ stream into the same SSE response. On pure-text finish, your handler signals `handover-skip` and the agent run exits clean — no LLM call from the trigger side.
+  </Step>
+  <Step title="Subsequent turns bypass the route handler">
+    After turn 1, the transport hydrates the session PAT from response headers and writes turn 2 onward directly to `session.in`. Same direct-trigger path as a regular `chat.agent` setup.
+  </Step>
+</Steps>
+
+### Setup
+
+<Warning>
+**Bundle isolation is the load-bearing constraint.** Head Start only saves time because your route-handler bundle stays lightweight. Anything you import in that handler — and anything those modules import transitively — lands in the bundle. If your tool catalog with heavy `execute` fns (E2B, Puppeteer, native bindings, the trigger SDK runtime, Turndown, image processing, `node:child_process`) ends up in the bundle, you've put cold-start back into a different process.
+
+This is an **import-chain** problem, not a runtime one. A "we'll strip the executes at runtime" helper would not fix it — bundlers resolve imports at build time. The only correct shape is to keep schemas in their own module that imports `ai` and `zod` only.
+</Warning>
+
+<Steps>
+  <Step title="Split your tool definitions into schemas + executes">
+    Schemas in one module (light deps), executes in another (heavy deps). The agent task pulls in both; the route handler pulls in schemas only.
+
+    ```ts lib/chat-tools/schemas.ts
+    // ⚠️ This file MUST NOT import anything heavier than `ai` and `zod`.
+    // Any import here lands in the route-handler bundle.
+    import { tool } from "ai";
+    import { z } from "zod";
+
+    export const fetchPage = tool({
+      description: "Fetch a URL and return text",
+      inputSchema: z.object({ url: z.string().url() }),
+      // No execute — agent task adds it elsewhere.
+    });
+
+    export const headStartTools = { fetchPage };
+    ```
+
+    ```ts trigger/chat-tools.ts
+    // Heavy deps live here. Only the trigger task imports this module.
+    import { tool } from "ai";
+    import TurndownService from "turndown";
+    import { fetchPage as fetchPageSchema } from "@/lib/chat-tools/schemas";
+
+    const turndown = new TurndownService();
+
+    export const fetchPage = tool({
+      ...fetchPageSchema,
+      execute: async ({ url }) => {
+        const res = await fetch(url);
+        return { body: turndown.turndown(await res.text()) };
+      },
+    });
+
+    export const chatTools = { fetchPage };
+    ```
+  </Step>
+  <Step title="Define your chat.agent (heavy executes)">
+    The agent uses the full tool set — these are the executes that run when step 2+ needs them.
+
+    ```ts trigger/chat.ts
+    import { chat } from "@trigger.dev/sdk/ai";
+    import { streamText, stepCountIs } from "ai";
+    import { anthropic } from "@ai-sdk/anthropic";
+    import { chatTools } from "./chat-tools";
+
+    export const myChat = chat.agent({
+      id: "my-chat",
+      run: async ({ messages, signal }) =>
+        streamText({
+          ...chat.toStreamTextOptions({ tools: chatTools }),
+          model: anthropic("claude-sonnet-4-6"),
+          messages,
+          stopWhen: stepCountIs(10),
+          abortSignal: signal,
+        }),
+    });
+    ```
+  </Step>
+  <Step title="Build the head-start handler">
+    Call `chat.headStart({ agentId, run })`. It returns a standard Web Fetch handler: `(req: Request) => Promise<Response>`. Inside the `run` callback you call `streamText` yourself and spread `chat.toStreamTextOptions({ tools })` to inherit the SDK-owned wiring (messages, schema-only tools, `stopWhen: stepCountIs(1)`, abort signal). Add your own `model` and `system` on top.
+
+    ```ts lib/chat-handler.ts
+    import { chat } from "@trigger.dev/sdk/chat-server";
+    import { streamText } from "ai";
+    import { anthropic } from "@ai-sdk/anthropic";
+    import { headStartTools } from "@/lib/chat-tools/schemas";
+
+    export const chatHandler = chat.headStart({
+      agentId: "my-chat",
+      run: async ({ chat: helper }) =>
+        streamText({
+          ...helper.toStreamTextOptions({ tools: headStartTools }),
+          model: anthropic("claude-sonnet-4-6"),
+          system: "You are a helpful assistant.",
+          stopWhen: stepCountIs(15),
+        }),
+    });
+    ```
+
+    <Tip>
+      Use the **same model** on both sides (route handler and `chat.agent`) to avoid a tone or style shift between step 1 and step 2+. Your LLM provider keys stay server-side in your warm process — Trigger.dev never holds them in this design.
+    </Tip>
+
+    Mount the handler in whatever framework you use — see [Mounting in your framework](#mounting-in-your-framework) below.
+  </Step>
+  <Step title="Opt in on the transport">
+    Add `headStart: "/api/chat"` to `useTriggerChatTransport`. Subsequent turns bypass this URL automatically — `accessToken` and (optionally) `startSession` still run for the direct-trigger path on turn 2 onward.
+
+    ```tsx components/chat.tsx
+    const transport = useTriggerChatTransport<typeof myChat>({
+      task: "my-chat",
+      accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+      startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+      headStart: "/api/chat",
+    });
+    ```
+  </Step>
+</Steps>
+
+### Mounting in your framework
+
+`chat.headStart` returns a Web Fetch handler — `(req: Request) => Promise<Response>`. Frameworks that natively pass Web `Request` objects mount it as-is. Node-only frameworks (Express, Fastify, Koa) need a small adapter.
+
+#### Web Fetch frameworks (recommended)
+
+<CodeGroup>
+
+```ts Next.js (App Router)
+// app/api/chat/route.ts
+import { chatHandler } from "@/lib/chat-handler";
+
+export const POST = chatHandler;
+// Default function timeout on Vercel is 10s. Bump if your turns
+// run long (multi-step tool use, slow models):
+// export const maxDuration = 60;
+```
+
+```ts Hono
+// src/index.ts
+import { Hono } from "hono";
+import { chatHandler } from "./chat-handler";
+
+const app = new Hono();
+
+app.post("/api/chat", (c) => chatHandler(c.req.raw));
+
+export default app;
+```
+
+```ts SvelteKit
+// src/routes/api/chat/+server.ts
+import type { RequestHandler } from "./$types";
+import { chatHandler } from "$lib/chat-handler";
+
+export const POST: RequestHandler = ({ request }) => chatHandler(request);
+```
+
+```ts Remix / React Router v7
+// app/routes/api.chat.ts
+import type { ActionFunctionArgs } from "@remix-run/node";
+import { chatHandler } from "~/lib/chat-handler";
+
+export async function action({ request }: ActionFunctionArgs) {
+  return chatHandler(request);
+}
+```
+
+```ts TanStack Start
+// app/routes/api/chat.ts
+import { createAPIFileRoute } from "@tanstack/start/api";
+import { chatHandler } from "~/lib/chat-handler";
+
+export const Route = createAPIFileRoute("/api/chat")({
+  POST: ({ request }) => chatHandler(request),
+});
+```
+
+```ts Astro
+// src/pages/api/chat.ts
+import type { APIRoute } from "astro";
+import { chatHandler } from "../../lib/chat-handler";
+
+export const POST: APIRoute = ({ request }) => chatHandler(request);
+```
+
+```ts Nitro / Nuxt
+// server/api/chat.post.ts
+import { chatHandler } from "~/lib/chat-handler";
+
+export default defineEventHandler((event) => chatHandler(toWebRequest(event)));
+```
+
+```ts Elysia
+// src/index.ts
+import { Elysia } from "elysia";
+import { chatHandler } from "./chat-handler";
+
+new Elysia()
+  .post("/api/chat", ({ request }) => chatHandler(request))
+  .listen(3000);
+```
+
+</CodeGroup>
+
+#### Edge / standalone runtimes
+
+<CodeGroup>
+
+```ts Cloudflare Workers
+// src/index.ts
+import { chatHandler } from "./chat-handler";
+
+export default {
+  async fetch(req: Request): Promise<Response> {
+    const url = new URL(req.url);
+    if (req.method === "POST" && url.pathname === "/api/chat") {
+      return chatHandler(req);
+    }
+    return new Response("Not found", { status: 404 });
+  },
+};
+```
+
+```ts Bun (native server)
+// server.ts
+import { chatHandler } from "./chat-handler";
+
+Bun.serve({
+  port: 3000,
+  async fetch(req) {
+    const url = new URL(req.url);
+    if (req.method === "POST" && url.pathname === "/api/chat") {
+      return chatHandler(req);
+    }
+    return new Response("Not found", { status: 404 });
+  },
+});
+```
+
+```ts Deno (Deno.serve)
+// server.ts
+import { chatHandler } from "./chat-handler.ts";
+
+Deno.serve({ port: 3000 }, async (req) => {
+  const url = new URL(req.url);
+  if (req.method === "POST" && url.pathname === "/api/chat") {
+    return chatHandler(req);
+  }
+  return new Response("Not found", { status: 404 });
+});
+```
+
+</CodeGroup>
+
+#### Node-only frameworks
+
+Express, Fastify, and Koa pass Node `IncomingMessage` / `ServerResponse` objects rather than Web `Request` / `Response`. The SDK ships `chat.toNodeListener` that wraps any Web Fetch handler as a Node `(req, res)` listener — body bytes are read upfront, headers translated, the response body streamed chunk-by-chunk, and client disconnect is propagated to the handler via `AbortSignal`.
+
+<CodeGroup>
+
+```ts Express
+import express from "express";
+import { chat } from "@trigger.dev/sdk/chat-server";
+import { chatHandler } from "./chat-handler";
+
+const app = express();
+app.post("/api/chat", chat.toNodeListener(chatHandler));
+app.listen(3000);
+```
+
+```ts Fastify
+import Fastify from "fastify";
+import { chat } from "@trigger.dev/sdk/chat-server";
+import { chatHandler } from "./chat-handler";
+
+const fastify = Fastify();
+const listener = chat.toNodeListener(chatHandler);
+
+fastify.post("/api/chat", (req, reply) => {
+  // Hand the raw Node request/response to the adapter and tell
+  // Fastify we'll handle the response ourselves (no auto-reply).
+  reply.hijack();
+  return listener(req.raw, reply.raw);
+});
+
+fastify.listen({ port: 3000 });
+```
+
+```ts Koa
+import Koa from "koa";
+import Router from "@koa/router";
+import { chat } from "@trigger.dev/sdk/chat-server";
+import { chatHandler } from "./chat-handler";
+
+const app = new Koa();
+const router = new Router();
+const listener = chat.toNodeListener(chatHandler);
+
+router.post("/api/chat", async (ctx) => {
+  ctx.respond = false; // Tell Koa not to send the response itself.
+  await listener(ctx.req, ctx.res);
+});
+
+app.use(router.routes()).listen(3000);
+```
+
+```ts Raw node:http
+import http from "node:http";
+import { chat } from "@trigger.dev/sdk/chat-server";
+import { chatHandler } from "./chat-handler";
+
+const listener = chat.toNodeListener(chatHandler);
+
+http
+  .createServer((req, res) => {
+    if (req.method === "POST" && req.url === "/api/chat") {
+      return listener(req, res);
+    }
+    res.statusCode = 404;
+    res.end();
+  })
+  .listen(3000);
+```
+
+</CodeGroup>
+
+<Warning>
+  Don't run `express.json()` (or any body-parsing middleware) before the head-start route — it consumes the request body before `chat.toNodeListener` can read the raw bytes. Either skip the parser for this route, or scope it to other routes.
+</Warning>
+
+#### Streaming response timeouts
+
+The handler keeps the SSE response open until the agent run signals turn-complete (or skip, on a pure-text turn). Make sure your framework / serverless function timeout accommodates that:
+
+- **Pure-text first turns**: ~LLM TTFB (1–3 s typically).
+- **Tool-calling first turns**: LLM step 1 + agent boot + tool execution + step 2 LLM call. Usually 5–15 s; longer for multi-step tool use.
+- **Vercel**: default function timeout is 10 s on Hobby, 60 s on Pro. Set `export const maxDuration = N;` on the route segment.
+- **Cloudflare Workers**: default 30 s CPU time (paid plans up to 5 min). Streaming wall time is generally not the bottleneck.
+- **AWS Lambda behind API Gateway**: 29 s API Gateway hard limit; Lambda Function URL allows up to 15 min.
+
+### What gets routed where
+
+| | First turn (handover) | Subsequent turns |
+| --- | --- | --- |
+| Browser sends message via | POST to `headStart` URL | Direct write to `session.in` |
+| Step 1 LLM call runs in | Your warm process | Trigger.dev agent run |
+| Tool execution runs in | Trigger.dev agent run | Trigger.dev agent run |
+| Step 2+ LLM call runs in | Trigger.dev agent run | Trigger.dev agent run |
+| `onChatStart` / `onTurnStart` fire | After handover signal arrives | Normally |
+| `onTurnComplete` fires | After turn finishes (handover) or skipped (handover-skip) | Normally |
+
+### The `chat.headStart` API
+
+```ts
+chat.headStart<TTools>({
+  agentId: string,                       // The chat.agent({ id }) you're handing off to
+  run: (args: HeadStartRunArgs<TTools>) => Promise<StreamTextResult<any, any>>,
+  idleTimeoutInSeconds?: number,         // How long the agent waits for the handover signal. Default: 60
+}): (req: Request) => Promise<Response>
+```
+
+The `run` callback receives:
+
+- `messages: UIMessage[]` — user messages parsed from the request body.
+- `signal: AbortSignal` — fires when the request closes or the SDK times out the handover.
+- `chat: HeadStartChatHelper<TTools>` — exposes `chat.toStreamTextOptions({ tools })` and a `chat.session` escape hatch for power users.
+
+`chat.toStreamTextOptions({ tools })` returns options to spread into `streamText`. The SDK owns these keys — overriding them will break the protocol:
+
+| Key | What the SDK sets | Why |
+| --- | --- | --- |
+| `messages` | `convertToModelMessages(uiMessages)` | First-turn user history |
+| `tools` | What you pass | Schema-only tools for step 1 |
+| `stopWhen` | `stepCountIs(1)` | Step 1 only — agent picks up step 2+ |
+| `abortSignal` | Combined request + idle timeout | Safe cleanup on disconnect |
+
+You bring `model`, `system`, `providerOptions`, `prepareStep`, anything else `streamText` accepts.
+
+#### The transport option
+
+```ts
+useTriggerChatTransport({
+  // ... task, accessToken, startSession, ...
+  headStart?: string,  // URL of your chat.headStart route handler
+});
+```
+
+Optional. When set, the FIRST message of a brand-new chat (no existing session state) routes through this URL. Subsequent turns bypass it and use the direct-trigger path.
+
+This is **not** a stock `useChat` `endpoint` — it's not the canonical request URL for every turn, just the first-turn shortcut.
+
+### Limitations
+
+- **First turn only.** Step 2+ and turn 2+ run on the trigger side. There's no per-turn "head start every turn" mode — the win comes from amortizing agent boot across the LLM call once.
+- **Single step on the warm-server side.** The handler runs `stopWhen: stepCountIs(1)`. Multi-step handover (handler does step 1 + step 2 + ...) is out of scope.
+- **Your server needs an LLM provider key.** The first-turn LLM call runs in your warm process, so that environment needs whatever keys the model requires. The agent's executes still run on the Trigger.dev side with whatever environment variables they need there.
+- **Browser-only chat surfaces don't apply.** Without a warm server process, there's nowhere to run step 1 ahead of the agent run. Use [Preload](#preload) or eat the cold-start tax.
+- **Streaming-capable runtime required.** Your framework / runtime has to support streaming HTTP responses (Web Fetch `Response` body or equivalent). Most modern hosts do — Next.js, Hono, SvelteKit, Workers, Bun, Deno, Vercel, etc. Some legacy platforms that buffer full responses won't deliver chunks until the turn is over, which negates the TTFC benefit (correctness still holds).
+- **Non-`useChat` chat surfaces** (Slack bots, Discord bots, custom protocols) don't fit the `chat.headStart` shape — the API expects the AI SDK transport's wire payload on input. For those, trigger the chat.agent directly from your bot handler.
+
+## Reference
+
+- [`chat.headStart` factory and types](/ai-chat/reference) — full signatures for `HeadStartRunArgs`, `HeadStartChatHelper`, `HeadStartSession`, `HeadStartHandlerOptions`.
+- [`headStart` transport option](/ai-chat/reference#triggerchattransport-options) — alongside `accessToken`, `startSession`, etc.
+- [`onPreload` hook](/ai-chat/lifecycle-hooks#onpreload) — the backend hook that fires when a run is preloaded.
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
new file mode 100644
index 00000000000..d23d3c2628b
--- /dev/null
+++ b/docs/ai-chat/frontend.mdx
@@ -0,0 +1,580 @@
+---
+title: "Frontend"
+sidebarTitle: "Frontend"
+description: "Transport setup, session management, client data, and frontend patterns for AI Chat."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## How the transport works
+
+Vanilla `useChat` expects an `api` URL — it POSTs the conversation to your own Next.js route handler, which terminates the stream. `useTriggerChatTransport` replaces that round-trip: instead of an `api` URL, you pass a custom [`ChatTransport`](https://ai-sdk.dev/docs/ai-sdk-ui/transport) that talks directly to the Trigger.dev cloud (or your self-hosted webapp) on behalf of `useChat`.
+
+There's no API route to maintain. The browser uses a short-lived session-scoped PAT (minted by your `accessToken` server action) to:
+
+- **Create the session** via your `startSession` action on the first message (or `transport.preload(chatId)`).
+- **Append the new user message** to the session's durable `.in` stream.
+- **Subscribe to the `.out` SSE stream** for the agent's response chunks (text, tool calls, reasoning, custom `data-*` parts).
+
+The transport handles the auth refresh, reconnect, `Last-Event-ID` resume, and stop-signal plumbing transparently. `useChat` sees the result as `UIMessageChunk`s and renders them unchanged.
+
+## Transport setup
+
+Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to create a memoized transport instance, then pass it to `useChat`:
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+import type { myChat } from "@/trigger/chat";
+import { mintChatAccessToken, startChatSession } from "@/app/actions";
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({ transport });
+  // ... render UI
+}
+```
+
+The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
+
+The two callbacks have distinct responsibilities:
+
+- **`accessToken`** is a *pure* PAT mint — the transport invokes it on a 401/403 to refresh the session-scoped token. Customer wraps `auth.createPublicToken({ scopes: { read: { sessions: chatId }, write: { sessions: chatId } } })`, which resolves to a `Promise<string>` (the JWT). Return that string from your `accessToken` callback.
+- **`startSession`** wraps `chat.createStartSessionAction(taskId)` and is called when the transport needs to *create* the session (`transport.preload(chatId)`, or lazily on the first `sendMessage` for a chatId without a cached PAT). The customer's server controls authorization here, alongside any DB writes paired with session creation.
+
+See [Quick start](/ai-chat/quick-start) for the matching server actions.
+
+<Tip>
+  The hook keeps `onSessionChange` and `clientData` up to date via internal refs, so you don't need
+  to memoize callbacks or worry about stale closures when those options change between renders.
+</Tip>
+
+## Typed messages (`chat.withUIMessage`)
+
+If your chat agent is defined with [`chat.withUIMessage<YourUIMessage>()`](/ai-chat/types) (custom `data-*` parts, typed tools, etc.), pass the same message type through `useChat` so `messages` and `message.parts` are narrowed on the client:
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, type InferChatUIMessage } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "./myChat";
+
+type Msg = InferChatUIMessage<typeof myChat>;
+
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+});
+const { messages } = useChat<Msg>({ transport });
+```
+
+See the [Types](/ai-chat/types) guide for defining `YourUIMessage`, default stream options, and backend examples.
+
+### Calling a fetch endpoint instead of a server action
+
+If you want to mint tokens via a REST endpoint instead of a Next.js server action, the same callbacks accept any async function. Import `AccessTokenParams` and `StartSessionParams` from `@trigger.dev/sdk/chat` to type your fetch handler.
+
+```ts
+import type { AccessTokenParams, StartSessionParams } from "@trigger.dev/sdk/chat";
+
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: async ({ chatId }: AccessTokenParams) => {
+    const res = await fetch(`/api/chat/${chatId}/access-token`, { method: "POST" });
+    return res.text();
+  },
+  startSession: async ({ chatId, taskId, clientData }: StartSessionParams) => {
+    const res = await fetch(`/api/chat/${chatId}/start`, {
+      method: "POST",
+      body: JSON.stringify({ taskId, clientData }),
+    });
+    return res.json(); // { publicAccessToken: string }
+  },
+});
+```
+
+The fetch handlers on the server side wrap the same SDK helpers as the server-action variant: `auth.createPublicToken({ scopes: { read: { sessions: chatId }, write: { sessions: chatId } } })` for refresh and `chat.createStartSessionAction(taskId)` for create.
+
+## Session management
+
+Every chat is backed by a durable Session — the row that owns the chat's runs, persists across run lifecycles, and orchestrates handoffs. The transport manages the session for you; what you persist on your side is a small piece of state per chat that lets a fresh tab resume without a round-trip to create a new session.
+
+### What the transport persists per chat
+
+| Field | Type | Notes |
+| --- | --- | --- |
+| `publicAccessToken` | `string` | Session-scoped JWT (`read:sessions:{chatId} + write:sessions:{chatId}`). Refreshed automatically on 401/403 via `accessToken`. |
+| `lastEventId` | `string \| undefined` | Last SSE event received on `.out`. Used to resume mid-stream after a reload. |
+| `isStreaming` | `boolean \| undefined` | **Optional.** The transport sets it internally, but you don't have to persist it — the server decides "nothing is streaming" via the session's [`X-Session-Settled`](/ai-chat/client-protocol#x-session-settled-fast-close-on-idle-reconnects) signal on reconnect. If you do persist it, the transport keeps the fast-path short-circuit. If you drop it, reconnects open the SSE and close fast on settled sessions. |
+
+### Session cleanup (frontend)
+
+Since session creation and updates are handled server-side, the frontend only needs to handle session deletion when a run ends:
+
+```tsx
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  sessions: loadedSessions, // Restored from DB on page load
+  onSessionChange: (chatId, session) => {
+    if (!session) {
+      deleteSession(chatId); // Server action — run ended
+    }
+  },
+});
+```
+
+### Restoring on page load
+
+On page load, fetch both the messages and the session state from your database, then pass them to `useChat` and the transport. Pass `resume: true` to `useChat` when there's an existing conversation — this tells the AI SDK to reconnect to the stream via the transport.
+
+Because the underlying Session row outlives individual runs, a chat you were in yesterday resumes against the same chat — even if the original run has long since exited. The transport hydrates from the persisted state and uses `lastEventId` to resubscribe; if the client tries to send a new message and no run is alive, the server triggers a fresh continuation run on the same session before the message is appended.
+
+```tsx app/chat/[chatId]/ChatPage.tsx
+"use client";
+
+import { useEffect, useState } from "react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+import {
+  mintChatAccessToken,
+  startChatSession,
+  getChatMessages,
+  getSession,
+  deleteSession,
+} from "@/app/actions";
+
+// Rendered from `app/chat/[chatId]/page.tsx`, which awaits `params`
+// and forwards `chatId` into this client component:
+//
+//   export default async function Page({ params }: { params: Promise<{ chatId: string }> }) {
+//     const { chatId } = await params;
+//     return <ChatPage chatId={chatId} />;
+//   }
+export default function ChatPage({ chatId }: { chatId: string }) {
+  const [initialMessages, setInitialMessages] = useState([]);
+  const [initialSession, setInitialSession] = useState(undefined);
+  const [loaded, setLoaded] = useState(false);
+
+  useEffect(() => {
+    async function load() {
+      const [messages, session] = await Promise.all([getChatMessages(chatId), getSession(chatId)]);
+      setInitialMessages(messages);
+      setInitialSession(session ? { [chatId]: session } : undefined);
+      setLoaded(true);
+    }
+    load();
+  }, [chatId]);
+
+  if (!loaded) return null;
+
+  return (
+    <ChatClient
+      chatId={chatId}
+      initialMessages={initialMessages}
+      initialSessions={initialSession}
+    />
+  );
+}
+
+function ChatClient({ chatId, initialMessages, initialSessions }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+    sessions: initialSessions,
+    onSessionChange: (id, session) => {
+      if (!session) deleteSession(id);
+    },
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    messages: initialMessages,
+    transport,
+    resume: initialMessages.length > 0, // Resume if there's an existing conversation
+  });
+
+  // ... render UI
+}
+```
+
+<Info>
+  `resume: true` causes `useChat` to call `reconnectToStream` on the transport when the component
+  mounts. The transport uses the session's `lastEventId` to skip past already-seen stream events, so
+  the frontend only receives new data. Only enable `resume` when there are existing messages — for
+  brand new chats, there's nothing to reconnect to.
+</Info>
+
+<Note>
+  After resuming, `useChat`'s built-in `stop()` won't send the stop signal to the backend because
+  the AI SDK doesn't pass its abort signal through `reconnectToStream`. Use
+  `transport.stopGeneration(chatId)` for reliable stop behavior after resume — see
+  [Stop generation](#stop-generation) for the recommended pattern.
+</Note>
+
+<Warning>
+  In React strict mode (enabled by default in Next.js dev), you may see a `TypeError: Cannot read
+  properties of undefined (reading 'state')` in the console when using `resume`. This is a [known
+  bug in the AI SDK](https://github.com/vercel/ai/issues/8477) caused by React strict mode
+  double-firing the resume effect. The error is caught internally and **does not affect
+  functionality** — streaming and message display work correctly. It only appears in development and
+  will not occur in production builds.
+</Warning>
+
+### Network resilience
+
+You don't need to handle network drops, mobile background-kills, or Safari bfcache restores. The transport retries indefinitely with bounded backoff, reconnects on `online` / tab refocus / `pageshow` with `event.persisted`, and uses `Last-Event-ID` to resume without dropping chunks. See the [changelog entry](/ai-chat/changelog) for the gory details.
+
+## Client data and metadata
+
+### Transport-level client data
+
+Set default client data on the transport that's included in every request. When the task uses `clientDataSchema`, this is type-checked to match:
+
+```ts
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  clientData: { userId: currentUser.id },
+});
+```
+
+The transport threads `clientData` through three places automatically: into `startSession`'s `params.clientData` for the first run's `payload.metadata`, into per-turn `metadata` on every `.in/append` chunk, and live-updates if the option value changes between renders (so React-driven values like the current user work without reconstructing the transport).
+
+### Per-message metadata
+
+Pass metadata with individual messages via `sendMessage`. Per-message values are merged with transport-level client data (per-message wins on conflicts):
+
+```ts
+sendMessage({ text: "Hello" }, { metadata: { model: "gpt-4o", priority: "high" } });
+```
+
+### Typed client data with clientDataSchema
+
+Instead of manually parsing `clientData` with Zod in every hook, pass a `clientDataSchema` to `chat.agent`. The schema validates the data once per turn, and `clientData` is typed in all hooks and `run`:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  clientDataSchema: z.object({
+    model: z.string().optional(),
+    userId: z.string(),
+  }),
+  onChatStart: async ({ chatId, clientData }) => {
+    // clientData is typed as { model?: string; userId: string }
+    await db.chat.create({
+      data: { id: chatId, userId: clientData.userId },
+    });
+  },
+  run: async ({ messages, clientData, signal }) => {
+    // Same typed clientData — no manual parsing needed
+    return streamText({
+      model: openai(clientData?.model ?? "gpt-4o"),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+The schema also types the `clientData` option on the frontend transport:
+
+```ts
+// TypeScript enforces that clientData matches the schema
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  clientData: { userId: currentUser.id },
+});
+```
+
+Supports Zod, ArkType, Valibot, and other schema libraries supported by the SDK.
+
+## Stop generation
+
+Use `transport.stopGeneration(chatId)` to stop the current generation. This sends a stop signal to the running task via input streams, aborting the current `streamText` call while keeping the run alive for the next message.
+
+`stopGeneration` works in all scenarios — including after a page refresh when the stream was reconnected via `resume`. Call it alongside `useChat`'s `stop()` to also update the frontend state:
+
+```tsx
+const { messages, sendMessage, stop: aiStop, status } = useChat({ transport });
+
+// Wrap both calls in a single stop handler
+const stop = useCallback(() => {
+  transport.stopGeneration(chatId);
+  aiStop();
+}, [transport, chatId, aiStop]);
+
+{
+  status === "streaming" && (
+    <button type="button" onClick={stop}>
+      Stop
+    </button>
+  );
+}
+```
+
+<Info>
+  `transport.stopGeneration(chatId)` handles the backend stop signal and closes
+  the SSE connection, while `aiStop()` (from `useChat`) updates the frontend
+  status to `"ready"` and fires the `onFinish` callback.
+</Info>
+
+<Tip>
+  A [PR to the AI SDK](https://github.com/vercel/ai/pull/14350) has been
+  submitted to pass `abortSignal` through `reconnectToStream`, which would make
+  `useChat`'s built-in `stop()` work after resume without needing
+  `stopGeneration`. Until that lands, use the pattern above for reliable stop
+  behavior after page refresh.
+</Tip>
+
+See [Stop generation](/ai-chat/backend#stop-generation) in the backend docs for how to handle stop signals in your task.
+
+## Tool approvals
+
+The AI SDK supports tools that require human approval before execution. To use this with `chat.agent`, define a tool with `needsApproval: true` on the backend, then handle the approval UI and configure `sendAutomaticallyWhen` on the frontend.
+
+### Backend: define an approval-required tool
+
+```ts
+import { tool } from "ai";
+import { z } from "zod";
+
+const sendEmail = tool({
+  description: "Send an email. Requires human approval before sending.",
+  inputSchema: z.object({
+    to: z.string(),
+    subject: z.string(),
+    body: z.string(),
+  }),
+  needsApproval: true,
+  execute: async ({ to, subject, body }) => {
+    await emailService.send({ to, subject, body });
+    return { sent: true, to, subject };
+  },
+});
+```
+
+Pass the tool to `streamText` in your `run` function as usual. When the model calls the tool, `chat.agent` streams a `tool-approval-request` chunk. The turn completes and the run waits for the next message.
+
+### Frontend: approval UI
+
+Import `lastAssistantMessageIsCompleteWithApprovalResponses` from the AI SDK and pass it to `sendAutomaticallyWhen`. This tells `useChat` to automatically re-send messages once all approvals have been responded to.
+
+Destructure `addToolApprovalResponse` from `useChat` and wire it to your approval buttons:
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { lastAssistantMessageIsCompleteWithApprovalResponses } from "ai";
+
+function Chat({ chatId, transport }) {
+  const { messages, sendMessage, addToolApprovalResponse, status } = useChat({
+    id: chatId,
+    transport,
+    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
+  });
+
+  const handleApprove = (approvalId: string) => {
+    addToolApprovalResponse({ id: approvalId, approved: true });
+  };
+
+  const handleDeny = (approvalId: string) => {
+    addToolApprovalResponse({ id: approvalId, approved: false, reason: "User denied" });
+  };
+
+  return (
+    <div>
+      {messages.map((msg) =>
+        msg.parts.map((part, i) => {
+          if (part.state === "approval-requested") {
+            return (
+              <div key={i}>
+                <p>Tool "{part.type}" wants to run with input:</p>
+                <pre>{JSON.stringify(part.input, null, 2)}</pre>
+                <button onClick={() => handleApprove(part.approval.id)}>Approve</button>
+                <button onClick={() => handleDeny(part.approval.id)}>Deny</button>
+              </div>
+            );
+          }
+          // ... render other parts
+        })
+      )}
+    </div>
+  );
+}
+```
+
+### How it works
+
+1. Model calls a tool with `needsApproval: true` — the turn completes with the tool in `approval-requested` state
+2. Frontend shows Approve/Deny buttons
+3. User clicks Approve — `addToolApprovalResponse` updates the tool part to `approval-responded`
+4. `sendAutomaticallyWhen` returns `true` — `useChat` re-sends the updated assistant message
+5. The transport sends the message via input streams — the backend matches it by ID and replaces the existing assistant message in the accumulator
+6. `streamText` sees the approved tool, executes it, and streams the result
+
+<Info>
+  Message IDs are kept in sync between frontend and backend automatically. The backend always
+  includes a `generateMessageId` function when streaming responses, ensuring the `start` chunk
+  carries a `messageId` that the frontend uses. This makes the ID-based matching reliable
+  for tool approval updates.
+</Info>
+
+## Sending actions
+
+Send custom actions (undo, rollback, edit) to the agent via `transport.sendAction()`. Actions wake the agent and fire only `hydrateMessages` (if configured) and `onAction` — they're not turns, so `onTurnStart` / `prepareMessages` / `onBeforeTurnComplete` / `onTurnComplete` and `run()` do not fire.
+
+For optimistic UI, mirror the action's effect on the `useChat` state via `setMessages` while the request is in flight:
+
+```tsx
+function ChatControls({ chatId }: { chatId: string }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+
+  const { setMessages } = useChat({ transport });
+
+  return (
+    <div>
+      <button
+        onClick={() => {
+          void transport.sendAction(chatId, { type: "undo" });
+          setMessages((prev) => prev.slice(0, -2));
+        }}
+      >
+        Undo last exchange
+      </button>
+      <button
+        onClick={() => transport.sendAction(chatId, { type: "rollback", targetMessageId: "msg-5" })}
+      >
+        Rollback to message
+      </button>
+    </div>
+  );
+}
+```
+
+The action payload is validated against the agent's `actionSchema` on the backend — invalid actions are rejected. See [Actions](/ai-chat/actions) for the backend setup.
+
+<Note>
+  `sendAction` returns a `ReadableStream<UIMessageChunk>`. For side-effect-only actions (where `onAction` returns `void`), the stream completes immediately with `trigger:turn-complete`. For actions where `onAction` returns a `StreamTextResult`, the stream carries the assistant chunks the same way `sendMessages` does — `useChat` consumes them automatically.
+</Note>
+
+For server-to-server usage, `AgentChat` has the same method:
+
+```ts
+const stream = await agentChat.sendAction({ type: "undo" });
+for await (const chunk of stream) {
+  if (chunk.type === "text-delta") process.stdout.write(chunk.delta);
+}
+```
+
+## Multi-tab coordination
+
+When the same chat is open in multiple browser tabs, `multiTab: true` prevents duplicate messages and syncs conversation state across tabs. Only one tab can send at a time. Other tabs enter read-only mode with real-time message updates.
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useMultiTabChat } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+
+function Chat({ chatId }: { chatId: string }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+    multiTab: true,
+  });
+
+  const { messages, setMessages, sendMessage } = useChat({
+    id: chatId,
+    transport,
+  });
+
+  const { isReadOnly } = useMultiTabChat(transport, chatId, messages, setMessages);
+
+  return (
+    <div>
+      {isReadOnly && (
+        <div className="bg-amber-50 text-amber-700 p-2 text-sm">
+          This chat is active in another tab. Messages are read-only.
+        </div>
+      )}
+      {/* message list */}
+      <input
+        disabled={isReadOnly}
+        placeholder={isReadOnly ? "Active in another tab" : "Type a message..."}
+      />
+    </div>
+  );
+}
+```
+
+### How it works
+
+1. When a tab sends a message, the transport "claims" the chatId via `BroadcastChannel`
+2. Other tabs detect the claim and enter read-only mode (`isReadOnly: true`)
+3. The active tab broadcasts its messages so read-only tabs see updates in real-time
+4. When the turn completes, the claim is released. Any tab can send next.
+5. Heartbeats detect crashed tabs (10s timeout clears stale claims)
+
+### What `useMultiTabChat` does
+
+- Returns `{ isReadOnly }` for disabling the input UI
+- Broadcasts `messages` from the active tab to other tabs
+- Calls `setMessages` on read-only tabs when messages arrive from the active tab
+- Tracks read-only state via the transport's `BroadcastChannel` coordinator
+
+<Note>
+  Multi-tab coordination is same-browser only (`BroadcastChannel` is a browser API). It gracefully degrades to a no-op in Node.js, SSR, or browsers without `BroadcastChannel` support. Cross-device coordination requires server-side involvement.
+</Note>
+
+## Self-hosting
+
+If you're self-hosting Trigger.dev, pass the `baseURL` option:
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  baseURL: "https://your-trigger-instance.com",
+});
+```
+
+`baseURL` also accepts a function so you can route per endpoint — useful when fronting `.in/append` with an edge proxy (e.g. to inject server-trusted signal into the wire) while keeping `.out` SSE direct:
+
+```ts
+baseURL: ({ endpoint }) =>
+  endpoint === "out" ? "https://api.trigger.dev" : "https://chat-proxy.example.com",
+```
+
+For per-request control beyond URL routing (header injection, custom retries, tracing), pass a `fetch` override. See [Trusted edge signals](/ai-chat/patterns/trusted-edge-signals) for a full proxy walkthrough.
diff --git a/docs/ai-chat/how-it-works.mdx b/docs/ai-chat/how-it-works.mdx
new file mode 100644
index 00000000000..ecde885f4ac
--- /dev/null
+++ b/docs/ai-chat/how-it-works.mdx
@@ -0,0 +1,230 @@
+---
+title: "How it works"
+sidebarTitle: "How it works"
+description: "End-to-end mechanics of a chat.agent turn: the two durable channels per session, the long-lived task that reads and writes them, and how a chat survives refreshes, deploys, and idle gaps."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+This page explains how `chat.agent` is put together, what each piece does on a single turn, and how a chat survives across turns. It is not an API tour — for that, see [Backend](/ai-chat/backend), [Frontend](/ai-chat/frontend), and the [Reference](/ai-chat/reference). For the byte-level wire format, see [Client Protocol](/ai-chat/client-protocol).
+
+<Note>
+**What you don't have to think about**: SSE reconnects, WebSocket backpressure, container cold starts, whether a worker is currently running, or how to re-deliver chunks the client missed during a reload. The platform handles those. **What you do have to think about**: idempotency in your `run()` function, and how much state you keep in memory between turns versus persist in your own database.
+</Note>
+
+## The primary noun: a chat session is a pair of streams and a task
+
+A **chat session** is the unit chat.agent owns. It is three things bound together:
+
+- An **inbox** channel called `.in` — every user message lands here as a record.
+- An **outbox** channel called `.out` — every assistant chunk leaves through here.
+- A long-lived **agent task** that reads from `.in` and writes to `.out`.
+
+Both channels are S2 ([s2.dev](https://s2.dev)) durable append-only streams, keyed by the session. Think of them as a pair of per-session topics on a tiny Kafka: records have monotonically increasing sequence numbers, readers resume from a cursor, writers append to the tail. We chose S2 because reads are resumable from an offset — so a browser reload can replay the response stream without re-running the LLM, and a crashed run can rejoin mid-conversation by reading from where it left off.
+
+A chat ID identifies the session for the lifetime of the conversation. The same session can be served by **many runs**: one run handles a turn (or several), goes idle, eventually exits, and the next user message triggers a fresh continuation run on the same session. Sessions are the durable identity; runs are the ephemeral compute.
+
+## The lifecycle states
+
+A run moves through a small state machine over its lifetime. Each state is named below, with the trigger that moves it to the next.
+
+### Cold start
+
+There is no run yet for this session. The frontend's first `sendMessage` posts to the session's `.in` channel; the server sees no live `currentRunId` and triggers a fresh `chat.agent` run with `continuation: false`. Moves to **Streaming** as soon as the task wakes and begins consuming `.in`.
+
+### Streaming
+
+The agent task is running. It reads the new message off `.in`, fires `onTurnStart`, runs your `run()` function, and pipes `streamText()` chunks onto `.out`. The browser is SSE-subscribed to `.out` and renders chunks as they land. When `streamText()` ends, the task writes a `trigger:turn-complete` control record (an S2 record with an empty body and a special header) and immediately trims `.out` back to the *previous* turn's completion marker — keeping the outbox bounded to roughly one turn of chunks at steady state. Moves to **Idle** after `onTurnComplete` runs and the post-turn snapshot is written.
+
+### Idle (awaiting next message)
+
+The turn is over. The task is alive but not doing work — it is parked in a waitpoint on `.in`, waiting for the next user message. If one arrives, it goes back to **Streaming** for the next turn. If `idleTimeoutInSeconds` (defaulting to a few minutes) passes with no new message, it moves to **Suspended**.
+
+### Suspended
+
+The task fires `onChatSuspend`, then the engine **checkpoints** the run's whole process state and frees the compute. The session is still live (the row exists, the `.out` stream is still readable, the chat ID still works), but no machine is dedicated to it. This is the same Checkpoint-Resume System that powers every Trigger.dev task — covered in detail at [How it works → Checkpoint-Resume](/how-it-works#the-checkpoint-resume-system). Moves to **Resuming** when the next message lands in `.in`.
+
+### Resuming
+
+The engine restores the suspended run from its checkpoint. The same JS process picks up exactly where it parked — `chat.local` values, the accumulator, in-flight promises, in-memory caches all preserved as they were. `onChatResume` fires immediately after the restore, then the task transitions to **Streaming**. No boot work, no snapshot read, no SDK reinitialization. This is the cheap path.
+
+### Continuation (after exit)
+
+If the run has fully exited (because it hit `maxTurns`, the customer called `chat.endRun()` or `chat.requestUpgrade()`, or it was cancelled or crashed), the next user message can't resume it — there is nothing to resume. Instead, the server triggers a brand-new run with `continuation: true`. The new run does a cold boot, reads the prior conversation's S3 snapshot, replays any `.out` chunks after the snapshot cursor, AND replays any `.in` records past the last `turn-complete` cursor (the user messages a dead run never acknowledged). If the predecessor died mid-stream and left a partial assistant response in `.out`, the smart default splices `[firstInFlightUser, partialAssistant]` onto the chain so any follow-up has full context — see [Recovery boot](/ai-chat/patterns/recovery-boot). The new run then enters **Streaming** with `turn === 0` of the new run but `messageCount > 0`.
+
+### Closed
+
+`POST /api/v1/sessions/:id/close` flips `closedAt` on the session row. Future appends are rejected. Reads still work for transcript viewing. The session is terminal.
+
+## One turn, end to end
+
+Here is a typical cold turn — user opens the page, types "What's the weather?", reads the response — traced through every component.
+
+<Steps>
+  <Step title="Browser: useChat calls transport.sendMessages">
+    The Vercel AI SDK's `useChat` hook serializes the user's message into the slim wire format: `{ chatId, trigger: "submit-message", message, metadata }`. Only the new message goes on the wire, not the full history.
+  </Step>
+  <Step title="Browser: transport posts to /append">
+    The transport calls `POST /realtime/v1/sessions/:chatId/in/append`, authenticated with the session's public access token. The body is one S2 record.
+  </Step>
+  <Step title="Server: route ensures a run exists">
+    The append route resolves the session, then calls `ensureRunForSession()`. The session's `currentRunId` is null (cold start), so it triggers a new `chat.agent` run on the project's dev/prod environment and atomically claims the slot via an optimistic version counter.
+  </Step>
+  <Step title="Server: route appends the record to S2 .in">
+    The route writes the message to `s2://sessions/:chatId/in` as a single record. S2 assigns a sequence number. Any waitpoints registered on this channel fire, which would wake an existing run — but there is no run waiting yet, so this is a no-op for now.
+  </Step>
+  <Step title="Browser: transport opens an SSE subscription to .out">
+    In parallel with the send, the transport opens `GET /realtime/v1/sessions/:chatId/out` (server-sent events). It passes its `lastEventId` if it has one cached; on a brand-new chat it does not. Any chunks the agent writes from now on will be delivered to this stream.
+  </Step>
+  <Step title="Task: agent run boots">
+    The newly-triggered run starts. `onBoot` fires once per worker process. Because this is a fresh chat, no snapshot is read.
+  </Step>
+  <Step title="Task: enters the turn loop, reads the message from .in">
+    The agent reads the pending record off `.in` via a waitpoint. `onChatStart` fires (once per chat lifetime). `onTurnStart` fires (every turn).
+  </Step>
+  <Step title="Task: runs your run() function, streams chunks to .out">
+    Your code calls `streamText({ model, messages })`. Each `UIMessageChunk` it produces is appended to `s2://sessions/:chatId/out` as a record. The browser sees them arrive on the SSE stream and the AI SDK renders them.
+  </Step>
+  <Step title="Task: writes the turn-complete control record">
+    When `streamText()` finishes, the agent writes a record with header `trigger:turn-complete` and an empty body. The browser transport sees this header and closes the per-turn readable stream.
+  </Step>
+  <Step title="Task: trims .out back to the previous turn-complete">
+    Immediately after writing the new turn-complete marker, the agent issues an S2 trim command targeting the *previous* turn-complete's sequence number. This bounds the stream's storage to roughly one turn of chunks plus the latest control record.
+  </Step>
+  <Step title="Task: fires onTurnComplete, writes snapshot to S3">
+    `onTurnComplete` runs (your hook for persistence). Then the agent writes `ChatSnapshotV1` — `{ version: 1, messages, lastOutEventId, lastOutTimestamp }` — to S3 at `sessions/:chatId/snapshot.json`. This write is awaited, not fire-and-forget, so the next run is guaranteed to find it.
+  </Step>
+  <Step title="Task: goes idle, then suspends">
+    The agent re-enters the waitpoint on `.in`. After `idleTimeoutInSeconds` of nothing arriving, `onChatSuspend` fires and the engine snapshots the run. Compute is freed.
+  </Step>
+</Steps>
+
+## Three layers of persistence
+
+chat.agent survives idle gaps, deploys, refreshes, and crashes because three separate persistence mechanisms work at three different layers of the stack. They're orthogonal — each protects against a different failure mode, and conflating them is a common source of bugs.
+
+### Layer 1: the engine checkpoint (compute)
+
+When a run enters the Suspended state, the engine **checkpoints** the running process — its memory, CPU registers, and open file descriptors — and frees the compute. Today this is done via [CRIU](https://criu.org/) (Checkpoint/Restore in Userspace), the same mechanism that powers every Trigger.dev task's suspend/resume. On the new microVM compute runtime (currently in [private beta](/compute-private-beta)), it becomes a full Firecracker VM snapshot: every byte of memory plus filesystem state plus every kernel object inside the VM.
+
+When the next message arrives, the engine **restores** the checkpoint. The same JS process picks up at the exact instruction it parked on. From your code's perspective, the line right after the `messagesInput.wait()` waitpoint just continues executing. Anything in process memory survives: `chat.local`, the message accumulator, in-flight Promises, in-memory caches, open DB connections. The runId is unchanged.
+
+This is what lets you write `run()` as a single long-lived function with stateful closures, even though the underlying compute actually goes through checkpoint/restore cycles between turns. `onChatSuspend` fires immediately before the checkpoint; `onChatResume` fires immediately after the restore.
+
+### Layer 2: the chat snapshot (S3)
+
+After every turn the agent writes a `ChatSnapshotV1` blob to S3 — full accumulated `UIMessage[]` plus the current `lastOutEventId` cursor. This is chat-specific and lives one layer above the engine. It has nothing to do with CRIU or Firecracker.
+
+The chat snapshot bridges run *boundaries*. If a run exits cleanly — because it hit `maxTurns`, called `chat.endRun()` or `chat.requestUpgrade()`, was cancelled, crashed, or got bumped to a new version after a deploy — the engine checkpoint is gone with it. When the next user message arrives, the server triggers a fresh run with `continuation: true`. That new run reads the S3 snapshot, replays any post-snapshot chunks from `.out`, merges by message ID, and starts its first turn with the full conversation history already in memory.
+
+The chat snapshot carries only message history — not process memory. `chat.local`, in-memory caches, open connections all need to be reinitialized on a continuation. This is why `onBoot` (every fresh worker) is the right place to initialize `chat.local`, not `onChatStart` (only the very first turn of the chat). See [Persistence and replay](/ai-chat/patterns/persistence-and-replay) for the full snapshot model.
+
+If your task registers a `hydrateMessages` hook, the chat snapshot is skipped entirely — your hook is the single source of truth for history.
+
+### Layer 3: the `lastEventId` cursor (browser)
+
+The transport stores `lastEventId` — the S2 sequence number of the most recent chunk it processed — in its session state. On page reload, it reopens the SSE stream with `Last-Event-ID: <cursor>` as a header. S2 resumes from that cursor; chunks the browser already saw are not redelivered. If the agent was mid-turn when the browser reloaded, the rest of the turn streams in. If the turn had already completed, the stream closes immediately via an `X-Session-Settled` header so the client doesn't long-poll for nothing.
+
+Unlike the other two layers, this one is client-side. The server doesn't even need to know the browser refreshed — the agent run keeps running (or stays suspended) regardless.
+
+### Which layer covers which failure mode
+
+| What happened | Recovery layer | Same run? | In-memory state preserved? |
+| --- | --- | --- | --- |
+| Idle gap mid-conversation (suspend → resume) | Engine checkpoint | Yes | Yes |
+| Run exited cleanly (`endRun`, `requestUpgrade`, `maxTurns`) | Chat snapshot | No (fresh continuation run) | No |
+| Run crashed mid-turn (OOM, exception) | Chat snapshot + `.out` tail replay | (retried as a new attempt) | No |
+| Browser tab reloaded mid-stream | `lastEventId` cursor on `.out` | (run unaffected) | (n/a) |
+| Deploy rolled out a new version mid-chat | Chat snapshot, via `requestUpgrade` flow | No | No |
+
+No single layer covers every case. The engine checkpoint alone can't survive a run exit (there's nothing to restore). The chat snapshot alone can't survive a tab refresh mid-turn (chunks already streamed would be lost). The `lastEventId` cursor alone can't bridge run boundaries (the new run wouldn't know the history). Together they cover every realistic failure.
+
+## Warm vs cold: same chat, three different timings
+
+Take the same conversation — "What's the weather?" then "What about tomorrow?" — and look at how each second turn lands.
+
+**Warm second turn (within a few seconds).** The first turn finished, the agent is parked on the `.in` waitpoint, status is **Idle**. The new message hits `/append`, the waitpoint fires, the agent wakes inside the same run with all memory intact, runs `onTurnStart` for turn 2, streams the response. No checkpoint involved — the process never went to sleep. Latency to first chunk: dominated by the LLM, not the platform.
+
+**Resumed second turn (a few minutes later).** The first turn finished and the agent suspended — the engine checkpoint is stored, compute is freed. The new message hits `/append`. The engine restores the checkpoint, fires `onChatResume`, and the task picks up exactly where it parked — all in-memory state preserved (`chat.local`, the accumulator, the lot). Latency to first chunk: the engine's restore overhead, then the LLM.
+
+**Continuation second turn (an hour later, or after a deploy).** The first turn finished and the run eventually exited. The new message hits `/append`, the server triggers a fresh run with `continuation: true`. The new run boots cold, `onBoot` fires, the agent reads the S3 chat snapshot, replays the `.out` tail, then enters the turn loop with the full conversation already accumulated. The previous run's in-memory state is gone — anything in `chat.local` has to be re-initialized in `onBoot`. Latency to first chunk: cold start plus snapshot read, then the LLM.
+
+All three look identical to the browser. Only the agent task knows which path it took, via `payload.continuation` and `ctx.attempt.number`.
+
+## Lifecycle hooks: where you plug in
+
+| Hook | When it fires | Typical use |
+| --- | --- | --- |
+| `onBoot` | Once per worker process, before any chat work | Initialize `chat.local` resources |
+| `onPreload` | Once per chat lifetime, if the chat was preloaded before the first message | Warm caches, fetch the user's profile |
+| `onChatStart` | Once per chat lifetime, on the first turn of a fresh chat (not on continuation) | First-message persistence, system-prompt setup |
+| `onValidateMessages` | Every turn, before merging the incoming message | Reject or transform user input |
+| `hydrateMessages` | Every turn, instead of snapshot+replay | Use your DB as the source of truth |
+| `onTurnStart` | Every turn, before `run()` | Compact history, persist the user message |
+| `onBeforeTurnComplete` | Every turn, after streaming, before the turn-complete record | Emit a final custom chunk |
+| `onTurnComplete` | Every turn, after the turn-complete record is written | Persist the assistant message and `lastEventId` |
+| `onChatSuspend` / `onChatResume` | At the idle → suspend / suspend → wake transitions | Release/reacquire expensive resources |
+
+See [Lifecycle hooks](/ai-chat/lifecycle-hooks) for the full signatures and firing order.
+
+## When chat.agent is the right primitive
+
+**Good fit**:
+- Multi-turn conversational agents where the user is expected to come back later.
+- Long-running agent loops with tool calls, where a single turn can take a minute or more.
+- Cases where you want page reloads to resume the in-flight response without re-running the model.
+- Cases where you can't predict idle gaps — humans go to lunch.
+
+**Not a good fit**:
+- Single-shot completions where you don't need durability or resume. Call your model directly.
+- Workflows where you control both ends and want a custom protocol. Use a [raw `task()` with chat primitives](/ai-chat/backend#raw-task-with-primitives) directly without the `chat.agent` wrapper.
+- High-fanout broadcasting (one source, many subscribers). Use Trigger.dev realtime streams against a regular task instead.
+
+## Putting it together
+
+```mermaid
+sequenceDiagram
+    participant Browser
+    participant API as Trigger.dev API
+    participant S2_in as S2 .in
+    participant S2_out as S2 .out
+    participant Agent as chat.agent task
+    participant S3 as S3 snapshot
+
+    Note over Agent: Cold start
+    Browser->>API: POST /sessions/:id/in/append
+    API->>S2_in: append(message)
+    API->>Agent: trigger run (continuation: false)
+    Browser->>API: GET /sessions/:id/out (SSE)
+    API->>S2_out: read stream
+    Agent->>S2_in: read message (waitpoint)
+    Agent->>S2_out: append chunk(s)
+    S2_out-->>Browser: SSE chunks
+    Agent->>S2_out: append turn-complete (control)
+    Agent->>S2_out: trim < previous turn-complete
+    Agent->>S3: write snapshot
+    Note over Agent: Idle on waitpoint
+
+    Note over Agent: ...time passes...
+    Note over Agent: Suspended
+
+    Browser->>API: POST /sessions/:id/in/append
+    API->>S2_in: append(message)
+    API->>Agent: restore from suspend
+    Agent->>S2_in: read message
+    Agent->>S2_out: append chunk(s)
+    S2_out-->>Browser: SSE chunks
+    Agent->>S2_out: append turn-complete
+    Agent->>S3: write snapshot
+    Note over Agent: Idle again
+```
+
+## Where to go next
+
+- [Quick start](/ai-chat/quick-start) — get a chat running in a few minutes.
+- [Backend](/ai-chat/backend) — the `chat.agent()` API in detail.
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks) — every hook, what fires when.
+- [Persistence and replay](/ai-chat/patterns/persistence-and-replay) — deeper on the snapshot model.
+- [Client protocol](/ai-chat/client-protocol) — wire format if you're writing a custom transport.
diff --git a/docs/ai-chat/lifecycle-hooks.mdx b/docs/ai-chat/lifecycle-hooks.mdx
new file mode 100644
index 00000000000..c6ea62cbc81
--- /dev/null
+++ b/docs/ai-chat/lifecycle-hooks.mdx
@@ -0,0 +1,516 @@
+---
+title: "Lifecycle hooks"
+sidebarTitle: "Lifecycle hooks"
+description: "Hook into every stage of a chat agent's run: preload, turn start, turn complete, suspend, resume, and more."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+`chat.agent({ ... })` accepts a set of lifecycle hooks for persisting state, validating input, transforming messages, and reacting to suspension and resumption. They fire at well-defined points in the chat agent's lifetime.
+
+**Once per worker process (every fresh run boot):** `onBoot` → `onPreload` (preloaded runs only).
+
+**Once per chat (first message of the chat's lifetime):** `onChatStart`.
+
+**Per-turn order:** `onValidateMessages` → `hydrateMessages` → `onChatStart` (chat's first message only) → `onTurnStart` → `run()` → `onBeforeTurnComplete` → `onTurnComplete`.
+
+**Suspend / resume:** `onChatSuspend` fires when the run transitions from idle to suspended (waiting on the next message); `onChatResume` fires on wake.
+
+**Four scopes to keep straight:**
+
+| Scope | Fires when | Use for |
+| --- | --- | --- |
+| **Process** ([`onBoot`](#onboot)) | Every fresh worker boots — initial, preloaded, and reactive continuation (post-cancel/crash/`endRun`/upgrade). | Initialize `chat.local`, open per-process resources, re-hydrate state from your DB on continuation. |
+| **Recovery** ([`onRecoveryBoot`](#onrecoveryboot)) | Continuation boot where the dead run was mid-stream — a partial assistant survives on `session.out`. | Override the smart default — drop the partial, synthesize tool results, emit a recovery banner. |
+| **Chat** ([`onChatStart`](#onchatstart)) | First message of a chat's lifetime. Does NOT fire on continuation runs or OOM retries. | One-time DB rows for the chat, resources tied to the chat's lifetime. |
+| **Turn** ([`onTurnStart`](#onturnstart), [`onTurnComplete`](#onturncomplete), etc.) | Every turn. | Persist messages, post-process responses. |
+
+## Task context (`ctx`)
+
+Every chat lifecycle callback and the `run` payload include `ctx`: the same run context object as `task({ run: (payload, { ctx }) => ... })`. Import the type with `import type { TaskRunContext } from "@trigger.dev/sdk"` (the `Context` export is the same type). Use `ctx` for tags, metadata, or any API that needs the full run record. The string `runId` on chat events is always `ctx.run.id` (both are provided for convenience). See [Task context (`ctx`)](/ai-chat/reference#task-context-ctx) in the API reference.
+
+Standard [task lifecycle hooks](/tasks/overview) such as `onWait`, `onResume`, `onComplete`, and `onFailure` are also available on `chat.agent()` with the same shapes as on a normal `task()` — but prefer the chat-specific [`onChatSuspend` / `onChatResume`](#onchatsuspend--onchatresume) for any chat-related work. The generic hooks fire on every wait/resume (including ones the runtime uses internally for non-chat reasons); the chat-specific ones fire only at the idle-to-suspended transition you actually care about and carry full chat context.
+
+## onBoot
+
+Fires **once per worker process picking up the chat** — for the initial run, for preloaded runs, AND for reactive continuation runs (post-cancel, crash, `endRun`, `requestUpgrade`, OOM retry). Does NOT fire when the same run resumes from snapshot via the idle-window suspend/resume path — use [`onChatResume`](#onchatsuspend--onchatresume) for that.
+
+This is the right place to initialize anything that lives in the JS process for the lifetime of the run: [`chat.local`](/ai-chat/chat-local) state, DB connections, sandboxes, in-memory caches. It runs before `onPreload`, `onChatStart`, the continuation-wait branch, and any turn — so anything you set up here is available everywhere downstream.
+
+<Warning>
+  If you initialize `chat.local` only in `onChatStart`, your `run()` will crash on continuation runs with `chat.local can only be modified after initialization`. `onChatStart` is once-per-chat by contract; `chat.local` is per-process and needs `onBoot`.
+</Warning>
+
+Branch on `continuation` to decide whether to load existing state from your DB or start fresh:
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onBoot: async ({ chatId, clientData, continuation, previousRunId }) => {
+    const user = await db.user.findUnique({ where: { id: clientData.userId } });
+    userContext.init({ name: user.name, plan: user.plan });
+
+    if (continuation) {
+      // Re-hydrate per-chat in-memory state from your DB.
+      // `previousRunId` is the public id of the prior run (use it for
+      // logging or to look up persisted state keyed on run id).
+      const saved = await db.chatState.findUnique({ where: { chatId } });
+      if (saved) {
+        // Re-apply your saved per-chat state into wherever your
+        // run() reads it from (a chat.local slot, an in-memory map, etc.).
+        userContext.applySaved(saved);
+      }
+    }
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+| Field             | Type                          | Description                                                                 |
+| ----------------- | ----------------------------- | --------------------------------------------------------------------------- |
+| `ctx`             | `TaskRunContext`              | Full task run context. See [reference](/ai-chat/reference#task-context-ctx). |
+| `chatId`          | `string`                      | Chat session ID                                                             |
+| `runId`           | `string`                      | The Trigger.dev run ID for this run boot                                    |
+| `chatAccessToken` | `string`                      | Scoped access token for this run                                            |
+| `clientData`      | Typed by `clientDataSchema`   | Custom data from the frontend                                               |
+| `continuation`    | `boolean`                     | `true` when this run is taking over from a prior dead run                   |
+| `previousRunId`   | `string \| undefined`         | Public id of the prior run when `continuation` is true                      |
+| `preloaded`       | `boolean`                     | Whether this run was triggered as a preload                                 |
+
+<Tip>
+  `onBoot` and `onChatStart` are complementary — keep DB-row creation in `onChatStart` (it only needs to happen once per chat) and put process-level setup (`chat.local`, connections, caches) in `onBoot` (it needs to happen on every fresh worker).
+</Tip>
+
+## onRecoveryBoot
+
+Fires once on a continuation boot when the dead predecessor was mid-stream — a partial assistant survives on `session.out`. The runtime reconstructs context automatically via a smart default; this hook is the override path for policies that need something different.
+
+The hook does NOT fire when there's no partial — clean continuations after `chat.endRun()` or `chat.requestUpgrade()`, fresh chats, OOM retries on top of a complete snapshot. Those paths dispatch any in-flight user message as a normal turn on the new run without involving the hook. It also does NOT fire when [`hydrateMessages`](#hydratemessages) is registered (the customer owns persistence).
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onRecoveryBoot: async ({ partialAssistant, inFlightUsers, writer, cause, previousRunId }) => {
+    writer.write({
+      type: "data-chat-recovery",
+      data: { cause, previousRunId, partialPresent: partialAssistant !== undefined },
+      transient: true,
+    });
+    // Return nothing → fall through to the smart default
+    // (splice partial + first user into chain, dispatch the rest).
+  },
+  run: async ({ messages, signal }) =>
+    streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal }),
+});
+```
+
+| Field              | Type                                                              | Description                                                                                          |
+| ------------------ | ----------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- |
+| `ctx`              | `TaskRunContext`                                                  | Full task run context                                                                                |
+| `chatId`           | `string`                                                          | Chat session ID                                                                                      |
+| `runId`            | `string`                                                          | The Trigger.dev run ID for this run boot                                                             |
+| `previousRunId`    | `string`                                                          | Public id of the prior run that died                                                                 |
+| `cause`            | `"cancelled" \| "crashed" \| "unknown"`                           | Best-effort cause. Currently always `"unknown"` — don't branch on it                                 |
+| `settledMessages`  | `TUIMessage[]`                                                    | The chain persisted by the predecessor's last `onTurnComplete`                                       |
+| `inFlightUsers`    | `TUIMessage[]`                                                    | User messages on `session.in` past the cursor — the message(s) the predecessor never acknowledged    |
+| `partialAssistant` | `TUIMessage \| undefined`                                         | The trailing assistant message whose stream never received `finish`                                  |
+| `pendingToolCalls` | `Array<{ toolCallId, toolName, input, partIndex }>`               | Tool calls in `input-available` state extracted from `partialAssistant`                              |
+| `writer`           | `ChatWriter`                                                      | Lazy session.out writer — write a recovery banner / signal here                                      |
+
+Returns `{ chain?, recoveredTurns?, beforeBoot? }` — every field optional. Omitted fields fall through to the smart default. See [Recovery boot](/ai-chat/patterns/recovery-boot) for the full guide, examples (drop partial, synthesize tool results, persist before boot), and interaction notes.
+
+<Tip>
+  Don't put `chat.local` initialization in `onRecoveryBoot` — use [`onBoot`](#onboot). `onRecoveryBoot` is for recovery decisions, not per-process setup. `onBoot` fires first.
+</Tip>
+
+## onPreload
+
+Fires when a **preloaded run** starts, before any messages arrive. Use it to eagerly create chat-scoped DB rows (the Chat row, the ChatSession row) while the user is still typing — so the very first message lands fast.
+
+Preloaded runs are triggered by calling `transport.preload(chatId)` on the frontend. See [Preload](/ai-chat/fast-starts#preload) for details.
+
+Per-process state (anything in [`chat.local`](/ai-chat/chat-local), DB connections, etc.) belongs in [`onBoot`](#onboot) — `onBoot` fires before `onPreload` on every fresh worker, including on continuation runs where `onPreload` never fires.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onBoot: async ({ clientData }) => {
+    // Per-process state — runs on every fresh worker (initial,
+    // preloaded, continuation). See onBoot above.
+    const user = await db.user.findUnique({ where: { id: clientData.userId } });
+    userContext.init({ name: user.name, plan: user.plan });
+  },
+  onPreload: async ({ chatId, clientData, runId, chatAccessToken }) => {
+    // Chat-scoped DB rows — only matters on preload (and onChatStart as
+    // a fallback when not preloaded).
+    await db.chat.create({ data: { id: chatId, userId: clientData.userId } });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  onChatStart: async ({ preloaded }) => {
+    if (preloaded) return; // Already initialized in onPreload
+    // ... non-preloaded chat-row initialization
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+| Field             | Type                                          | Description                      |
+| ----------------- | --------------------------------------------- | -------------------------------- |
+| `ctx`             | `TaskRunContext`                              | Full task run context. See [reference](/ai-chat/reference#task-context-ctx). |
+| `chatId`          | `string`                                      | Chat session ID                  |
+| `runId`           | `string`                                      | The Trigger.dev run ID           |
+| `chatAccessToken` | `string`                                      | Scoped access token for this run |
+| `clientData`      | Typed by `clientDataSchema`                   | Custom data from the frontend    |
+| `writer`          | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks  |
+
+Every lifecycle callback receives a `writer`, a lazy stream writer that lets you send custom `UIMessageChunk` parts (like `data-*` parts) to the frontend. Non-transient `data-*` chunks written via the `writer` are automatically added to the response message and available in `onTurnComplete`. Add `transient: true` for ephemeral chunks (progress indicators, etc.) that should not persist. See [Custom data parts](/ai-chat/backend#custom-data-parts).
+
+## onChatStart
+
+Fires **exactly once per chat**, on the very first user message of the chat's lifetime, before `run()` executes. Use it for one-time chat-scoped setup — create the Chat DB row, mint resources tied to the chat's lifetime.
+
+`onChatStart` does **not** fire on:
+
+- **Continuation runs** — a new run picking up an existing session after the prior run ended (`chat.endRun`, waitpoint timeout, `chat.requestUpgrade`, cancel, crash). The chat already started.
+- **OOM-retry attempts** — same chat, same conversation, just on a larger machine.
+
+For per-process state that has to be initialized on every fresh worker (including continuation runs), use [`onBoot`](#onboot). For per-turn setup, use [`onTurnStart`](#onturnstart).
+
+<Warning>
+  Do not initialize [`chat.local`](/ai-chat/chat-local) here. `chat.local` is per-process state that must survive continuation runs, but `onChatStart` only fires on the chat's very first message. Use [`onBoot`](#onboot) instead.
+</Warning>
+
+The `preloaded` field tells you whether [`onPreload`](#onpreload) already ran for this chat — useful for skipping setup work that's already done.
+
+<Note>
+  Because `onChatStart` fires only on the chat's first ever message, `messages` is either empty (when no message exists yet — e.g. a preloaded run that hasn't received its first turn) or contains just the first user message. There's no prior history to load here.
+</Note>
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onChatStart: async ({ chatId, clientData, preloaded }) => {
+    if (preloaded) return; // Already set up in onPreload
+
+    const { userId } = clientData as { userId: string };
+    await db.chat.create({
+      data: { id: chatId, userId, title: "New chat" },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  `clientData` contains custom data from the frontend: either the `clientData` option on the
+  transport constructor (sent with every message) or the `metadata` option on `sendMessage()`
+  (per-message). See [Client data and metadata](/ai-chat/frontend#client-data-and-metadata).
+</Tip>
+
+## onValidateMessages
+
+Validate or transform incoming `UIMessage[]` before they are converted to model messages. Fires once per turn with the raw messages from the wire payload (after cleanup of aborted tool parts), **before** accumulation and `toModelMessages()`.
+
+Return the validated messages array. Throw to abort the turn with an error.
+
+This is the right place to call the AI SDK's [`validateUIMessages`](https://ai-sdk.dev/docs/ai-sdk-ui/chatbot-message-persistence#validating-messages-on-the-server) to catch malformed messages from storage or untrusted input before they reach the model, especially useful when persisting conversations to a database where tool schemas may drift between deploys.
+
+| Field     | Type                                                            | Description                              |
+| --------- | --------------------------------------------------------------- | ---------------------------------------- |
+| `messages` | `UIMessage[]`                                                  | Incoming UI messages for this turn       |
+| `chatId`  | `string`                                                        | Chat session ID                          |
+| `turn`    | `number`                                                        | Turn number (0-indexed)                  |
+| `trigger` | `"submit-message" \| "regenerate-message" \| "preload" \| "close"` | The trigger type for this turn        |
+
+```ts
+import { validateUIMessages } from "ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  onValidateMessages: async ({ messages }) => {
+    return validateUIMessages({ messages, tools: chatTools });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, tools: chatTools, abortSignal: signal });
+  },
+});
+```
+
+<Note>
+  `onValidateMessages` fires **before** `onTurnStart` and message accumulation. If you need to validate messages loaded from a database, do the loading in `onChatStart` or `onPreload` and let `onValidateMessages` validate the full incoming set each turn.
+</Note>
+
+## hydrateMessages
+
+Load the full message history from your backend on every turn, replacing the built-in linear accumulator. When set, the hook's return value becomes the accumulated state; the normal accumulation logic (append for submit, replace for regenerate) is skipped entirely.
+
+Use this when the backend should be the source of truth for message history: abuse prevention, branching conversations (DAGs), or rollback/undo support.
+
+| Field              | Type                                                  | Description                                               |
+| ------------------ | ----------------------------------------------------- | --------------------------------------------------------- |
+| `chatId`           | `string`                                              | Chat session ID                                           |
+| `turn`             | `number`                                              | Turn number (0-indexed)                                   |
+| `trigger`          | `"submit-message" \| "regenerate-message" \| "action"` | The trigger type for this turn                           |
+| `incomingMessages` | `UIMessage[]`                                         | Validated wire messages from the frontend — 0-or-1-length (empty for actions, regenerates, and continuations; one element for normal `submit-message` and tool-approval responses) |
+| `previousMessages` | `UIMessage[]`                                         | Accumulated UI messages before this turn (`[]` on turn 0) |
+| `clientData`       | Typed by `clientDataSchema`                           | Custom data from the frontend                             |
+| `continuation`     | `boolean`                                             | Whether this run is continuing an existing chat           |
+| `previousRunId`    | `string \| undefined`                                 | The previous run ID (if continuation)                     |
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    const record = await db.chat.findUnique({ where: { id: chatId } });
+    const stored = record?.messages ?? [];
+
+    // Append the new user message and persist
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      const newMsg = incomingMessages[incomingMessages.length - 1]!;
+      stored.push(newMsg);
+      await db.chat.update({
+        where: { id: chatId },
+        data: { messages: stored },
+      });
+    }
+
+    return stored;
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+**Lifecycle position:** `onValidateMessages` → **`hydrateMessages`** → `onChatStart` (chat's first message only) → `onTurnStart` → `run()`
+
+After the hook returns, any incoming wire message whose ID matches a hydrated message is auto-merged. This makes [tool approvals](/ai-chat/frontend#tool-approvals) work transparently with hydration.
+
+<Note>
+  `hydrateMessages` also fires for [action](/ai-chat/actions) turns (`trigger: "action"`) with empty `incomingMessages`. This lets the action handler work with the latest DB state.
+</Note>
+
+<Tip>
+  Registering `hydrateMessages` short-circuits the runtime's [snapshot + replay](/ai-chat/patterns/persistence-and-replay) reconstruction at run boot — your hook is the single source of truth for history, so the runtime skips reading or writing the snapshot entirely. No object storage traffic, no replay cost. The trade-off is that you own persistence end-to-end.
+</Tip>
+
+<Note>
+  `incomingMessages` is **0-or-1-length** consistently. `submit-message` and tool-approval responses ship a single message; `regenerate-message`, continuations, and actions ship none. Patterns like [tool-result auditing](/ai-chat/patterns/tool-result-auditing) work the same regardless — iterate the array and the loop runs zero or one times.
+</Note>
+
+## onTurnStart
+
+Fires at the start of **every turn** — including the first turn of a continuation run, where `onChatStart` doesn't fire. Runs after message accumulation and (when applicable) `onChatStart`, but **before** `run()` executes. Use it to persist messages before streaming begins so a mid-stream page refresh still shows the user's message.
+
+| Field             | Type                                          | Description                                     |
+| ----------------- | --------------------------------------------- | ----------------------------------------------- |
+| `ctx`             | `TaskRunContext`                              | Full task run context. See [reference](/ai-chat/reference#task-context-ctx). |
+| `chatId`          | `string`                                      | Chat session ID                                 |
+| `messages`        | `ModelMessage[]`                              | Full accumulated conversation (model format)    |
+| `uiMessages`      | `UIMessage[]`                                 | Full accumulated conversation (UI format)       |
+| `turn`            | `number`                                      | Turn number (0-indexed)                         |
+| `runId`           | `string`                                      | The Trigger.dev run ID                          |
+| `chatAccessToken` | `string`                                      | Scoped access token for this run                |
+| `continuation`    | `boolean`                                     | Whether this run is continuing an existing chat |
+| `preloaded`       | `boolean`                                     | Whether this run was preloaded                  |
+| `clientData`      | Typed by `clientDataSchema`                   | Custom data from the frontend                   |
+| `writer`          | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks                 |
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnStart: async ({ chatId, uiMessages, runId, chatAccessToken }) => {
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  By persisting in `onTurnStart`, the user's message is saved to your database before the AI starts
+  streaming. If the user refreshes mid-stream, the message is already there.
+</Tip>
+
+## onBeforeTurnComplete
+
+Fires after the response is captured but **before** the stream closes. The `writer` can send custom chunks that appear in the current turn. Use this for post-processing indicators, compaction progress, or any data the user should see before the turn ends.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onBeforeTurnComplete: async ({ writer, usage, uiMessages }) => {
+    // Write a custom data part while the stream is still open
+    writer.write({
+      type: "data-usage-summary",
+      data: {
+        tokens: usage?.totalTokens,
+        messageCount: uiMessages.length,
+      },
+    });
+
+    // You can also compact messages here and write progress
+    if (usage?.totalTokens && usage.totalTokens > 50_000) {
+      writer.write({ type: "data-compaction", data: { status: "compacting" } });
+      chat.setMessages(compactedMessages);
+      writer.write({ type: "data-compaction", data: { status: "complete" } });
+    }
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+Receives the same fields as [`TurnCompleteEvent`](/ai-chat/reference#turncompleteevent), plus a [`writer`](/ai-chat/reference#chatwriter).
+
+## onTurnComplete
+
+Fires after each turn completes, after the response is captured and the stream is closed. This is the primary hook for persisting the assistant's response. Does not include a `writer` since the stream is already closed.
+
+| Field                | Type                     | Description                                                                                  |
+| -------------------- | ------------------------ | -------------------------------------------------------------------------------------------- |
+| `ctx`                | `TaskRunContext`         | Full task run context. See [reference](/ai-chat/reference#task-context-ctx).                |
+| `chatId`             | `string`                 | Chat session ID                                                                              |
+| `messages`           | `ModelMessage[]`         | Full accumulated conversation (model format)                                                 |
+| `uiMessages`         | `UIMessage[]`            | Full accumulated conversation (UI format)                                                    |
+| `newMessages`        | `ModelMessage[]`         | Only this turn's messages (model format)                                                     |
+| `newUIMessages`      | `UIMessage[]`            | Only this turn's messages (UI format)                                                        |
+| `responseMessage`    | `UIMessage \| undefined` | The assistant's response for this turn                                                       |
+| `turn`               | `number`                 | Turn number (0-indexed)                                                                      |
+| `runId`              | `string`                 | The Trigger.dev run ID                                                                       |
+| `chatAccessToken`    | `string`                 | Scoped access token for this run                                                             |
+| `lastEventId`        | `string \| undefined`    | Stream position for resumption. Persist this with the session.                               |
+| `stopped`            | `boolean`                | Whether the user stopped generation during this turn                                         |
+| `continuation`       | `boolean`                | Whether this run is continuing an existing chat                                              |
+| `rawResponseMessage` | `UIMessage \| undefined` | The raw assistant response before abort cleanup (same as `responseMessage` when not stopped) |
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+    // Atomic write — see Database persistence for the race-condition rationale
+    await db.$transaction([
+      db.chat.update({
+        where: { id: chatId },
+        data: { messages: uiMessages },
+      }),
+      db.chatSession.upsert({
+        where: { id: chatId },
+        create: { id: chatId, runId, publicAccessToken: chatAccessToken, lastEventId },
+        update: { runId, publicAccessToken: chatAccessToken, lastEventId },
+      }),
+    ]);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  Use `uiMessages` to overwrite the full conversation each turn (simplest). Use `newUIMessages` if
+  you prefer to store messages individually, e.g. one database row per message.
+</Tip>
+
+<Tip>
+  Persist `lastEventId` alongside the session. When the transport reconnects after a page refresh,
+  it uses this to skip past already-seen events, preventing duplicate messages.
+</Tip>
+
+<Tip>
+  For a full **conversation + session** persistence pattern (including preload, continuation, and token renewal), see [Database persistence](/ai-chat/patterns/database-persistence).
+</Tip>
+
+## onChatSuspend / onChatResume
+
+Chat-specific hooks that fire at the **idle-to-suspended** transition: the moment the run stops using compute and waits for the next message. These replace the need for the generic `onWait` / `onResume` task hooks for chat-specific work.
+
+The `phase` discriminator tells you **when** the suspend/resume happened:
+
+- `"preload"`: after `onPreload`, waiting for the first message
+- `"turn"`: after `onTurnComplete`, waiting for the next message
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onChatSuspend: async (event) => {
+    // Tear down expensive resources before suspending
+    await disposeCodeSandbox(event.ctx.run.id);
+    if (event.phase === "turn") {
+      logger.info("Suspending after turn", { turn: event.turn });
+    }
+  },
+  onChatResume: async (event) => {
+    // Re-initialize after waking up
+    logger.info("Resumed", { phase: event.phase });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+| Field        | Type             | Description                                                  |
+| ------------ | ---------------- | ------------------------------------------------------------ |
+| `phase`      | `"preload" \| "turn"` | Whether this is a preload or post-turn suspension       |
+| `ctx`        | `TaskRunContext` | Full task run context                                        |
+| `chatId`     | `string`         | Chat session ID                                              |
+| `runId`      | `string`         | The Trigger.dev run ID                                       |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend                   |
+| `turn`       | `number`         | Turn number (**`"turn"` phase only**)                        |
+| `messages`   | `ModelMessage[]` | Accumulated model messages (**`"turn"` phase only**)         |
+| `uiMessages` | `UIMessage[]`    | Accumulated UI messages (**`"turn"` phase only**)            |
+
+<Tip>
+  Unlike `onWait` (which fires for all wait types: duration, task, batch, token), `onChatSuspend` fires only at chat suspension points with full chat context. No need to filter on `wait.type`.
+</Tip>
+
+## exitAfterPreloadIdle
+
+When set to `true`, a preloaded run completes successfully after the idle timeout elapses instead of suspending. Use this for "fire and forget" preloads. If the user doesn't send a message during the idle window, the run ends cleanly.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  preloadIdleTimeoutInSeconds: 10,
+  exitAfterPreloadIdle: true,
+  onPreload: async ({ chatId, clientData }) => {
+    // Eagerly set up state. If no message comes, the run just ends.
+    await initializeChat(chatId, clientData);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+## See also
+
+- [Reference](/ai-chat/reference) for full event-type definitions
+- [Database persistence](/ai-chat/patterns/database-persistence) for the canonical persistence pattern
+- [Code execution sandbox](/ai-chat/patterns/code-sandbox) for an `onChatSuspend` use case
+- [Backend](/ai-chat/backend) for `chat.agent({ ... })` itself, prompts, stop signals, persistence overview, and runtime configuration
diff --git a/docs/ai-chat/mcp.mdx b/docs/ai-chat/mcp.mdx
new file mode 100644
index 00000000000..63c0d8ece00
--- /dev/null
+++ b/docs/ai-chat/mcp.mdx
@@ -0,0 +1,101 @@
+---
+title: "MCP Server"
+sidebarTitle: "MCP Server"
+description: "Chat with your agents from any AI coding tool using the Trigger.dev MCP server."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+The Trigger.dev MCP server includes tools for having conversations with your chat agents directly from AI coding tools like Claude Code, Cursor, Windsurf, and others. This lets your AI assistant interact with your agents without writing any code.
+
+## Available tools
+
+| Tool | Description |
+| --- | --- |
+| `list_agents` | List all agents in the current worker |
+| `start_agent_chat` | Start a conversation with an agent |
+| `send_agent_message` | Send a message and get the response |
+| `close_agent_chat` | Close a conversation |
+
+See the [MCP Tools Reference](/mcp-tools#agent-chat-tools) for full details on each tool.
+
+## Typical workflow
+
+<Steps>
+  <Step title="List available agents">
+    Ask your AI assistant to list agents in your project. This calls `list_agents` which returns all tasks created with [`chat.agent()`](/ai-chat/backend#chat-agent) or [`chat.customAgent()`](/ai-chat/backend#raw-task-with-primitives).
+  </Step>
+  <Step title="Start a chat">
+    Start a conversation with an agent using `start_agent_chat`. This triggers a run and optionally preloads the agent so it's ready to respond immediately.
+
+    If the agent uses `withClientData`, pass the required fields via `clientData`.
+  </Step>
+  <Step title="Send messages">
+    Send messages with `send_agent_message`. The agent processes each message and returns the full response text, including which tools were used. The agent retains full context across messages in the same chat.
+  </Step>
+  <Step title="Close the chat">
+    When done, close the conversation with `close_agent_chat`. This signals the agent to exit gracefully. If you skip this step, the agent will close on its own when its idle timeout expires.
+  </Step>
+</Steps>
+
+## Example conversation
+
+Here's what a typical MCP conversation with an agent looks like:
+
+```
+You: List my agents
+
+AI: Found 3 agents in worker 20250402.12 (dev):
+    - support-agent (src/trigger/support.ts)
+    - pr-review (src/trigger/pr-review.ts)
+    - research (src/trigger/research.ts)
+
+You: Start a chat with the support agent
+
+AI: Agent chat started and preloaded.
+    - Chat ID: abc-123
+    - Agent: support-agent
+    - Run ID: run_xyz
+
+You: Ask it to look up order #4821
+
+AI: I found order #4821. It was placed on March 15 and shipped via
+    FedEx (tracking: 7891234). Current status: In Transit, estimated
+    delivery March 19.
+
+    Tools used: lookupOrder, getShippingStatus
+
+You: Close the chat
+
+AI: Chat abc-123 closed.
+```
+
+## Setup
+
+If you haven't set up the MCP server yet, see the [MCP Server introduction](/mcp-introduction) for installation and client configuration.
+
+Agent chat tools require:
+- A running dev server (`trigger dev`) or a deployed worker
+- At least one agent defined with [`chat.agent()`](/ai-chat/backend#chat-agent) or [`chat.customAgent()`](/ai-chat/backend#raw-task-with-primitives)
+
+## How it works
+
+Under the hood, the MCP tools use the same protocol as the [frontend transport](/ai-chat/frontend) and [AgentChat SDK](/ai-chat/server-chat):
+
+1. **`start_agent_chat`** triggers a task run with the `preload` trigger and stores the session (run ID, chat ID) in memory.
+2. **`send_agent_message`** sends the message via the run's input stream and subscribes to the output SSE stream to collect the agent's full response.
+3. **`close_agent_chat`** sends a close signal via the input stream and removes the session.
+
+Sessions are held in-memory within the MCP server process. If the MCP server restarts, active sessions are lost — but the underlying agent runs continue until their idle timeout.
+
+<Note>
+  The `get_current_worker` tool also labels agents with `[agent]` in its output, making it easy to identify which tasks are agents even when listing all tasks.
+</Note>
+
+## See also
+
+- [AgentChat SDK](/ai-chat/server-chat) — programmatic server-side access to agents
+- [Sub-Agents](/ai-chat/patterns/sub-agents) — agents calling other agents
+- [MCP Tools Reference](/mcp-tools#agent-chat-tools) — full tool parameter reference
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
new file mode 100644
index 00000000000..30de8ad1baa
--- /dev/null
+++ b/docs/ai-chat/overview.mdx
@@ -0,0 +1,84 @@
+---
+title: "AI Agents"
+sidebarTitle: "Overview"
+description: "Durable multi-turn AI chats — one Trigger.dev task per conversation, surviving refreshes, deploys, and crashes."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+An AI chat isn't a request — it's a session. `chat.agent` runs every conversation as a single long-lived Trigger.dev task: you write the loop, it wakes up when a message arrives, freezes when none do, and the same in-memory state and on-disk workspace survive across page refreshes, deploys, idle gaps, and crashes. The substrate handles the parts most teams stitch together by hand — turn lifecycle, mid-stream resume, recovery from cancel/crash/OOM, HITL approvals, deploy upgrades — so your code is the loop you'd write anyway: messages in, `streamText` out.
+
+## A minimal example
+
+A `chat.agent` task takes `messages`, calls `streamText`, and returns the result. The frontend wires the [Vercel AI SDK's `useChat`](https://ai-sdk.dev/docs/reference/ai-sdk-ui/use-chat) to a `TriggerChatTransport`. No API routes.
+
+```ts trigger/chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  run: async ({ messages, signal }) =>
+    streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    }),
+});
+```
+
+```tsx app/components/Chat.tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+  const { messages, sendMessage } = useChat({ transport });
+  // ... render UI
+}
+```
+
+See [Quick Start](/ai-chat/quick-start) for the matching server actions and a runnable project.
+
+## Why use AI Agents on Trigger.dev
+
+- **Resume across refreshes, deploys, and crashes.** A chat in progress when you redeploy keeps streaming on the new version. Mid-stream refreshes pick up where they left off.
+- **Native AI SDK support.** Text, tool calls, reasoning, and custom `data-*` parts all flow through `useChat` over a custom `ChatTransport`. No custom protocol to maintain.
+- **Multi-turn for free.** Each turn is a step inside the same durable task; conversation history accumulates server-side, so clients only ship the new message.
+- **Fast cold starts.** Opt-in [Head Start](/ai-chat/fast-starts#head-start) runs the first `streamText` step in your warm Next.js / Hono / SvelteKit server while the agent boots in parallel — cuts time-to-first-chunk roughly in half.
+- **Production primitives ship in the box.** Stop generation, steering, edits, branching, sub-agents, HITL tool approvals, version upgrades, recovery from cancel/crash/OOM — all first-class.
+- **Observable.** Every turn is a span in the Trigger.dev dashboard. Sessions are queryable via `sessions.list` for inbox-style UIs.
+
+## How it fits together
+
+Three primitives, related but distinct:
+
+- **Chat agents** — the SDK surface you define with [`chat.agent()`](/ai-chat/backend#chat-agent). Owns the turn loop, lifecycle hooks, and the response stream.
+- **Sessions** — the durable, bi-directional channel keyed on `chatId` that holds the conversation across run boundaries. A chat agent runs *on top of* a [Session](/ai-chat/sessions).
+- **Sub-agents** — Delegate work from one agent to another via [`AgentChat`](/ai-chat/patterns/sub-agents). The sub-agent runs as its own durable agent on its own session; its response streams back through the parent as preliminary tool results, so the frontend sees the sub-agent working inside the parent's tool card.
+
+## Next steps
+
+<CardGroup cols={2}>
+  <Card title="Quick Start" icon="rocket" href="/ai-chat/quick-start">
+    Get a working chat in three steps — agent, token, frontend.
+  </Card>
+  <Card title="How it works" icon="diagram-project" href="/ai-chat/how-it-works">
+    Sessions, the turn loop, durable streams, and what survives a refresh.
+  </Card>
+  <Card title="Backend" icon="server" href="/ai-chat/backend">
+    `chat.agent` options, lifecycle hooks, and the raw-task primitives.
+  </Card>
+  <Card title="Patterns" icon="puzzle-piece" href="/ai-chat/patterns/sub-agents">
+    HITL approvals, branching, sub-agents, OOM/crash recovery.
+  </Card>
+</CardGroup>
diff --git a/docs/ai-chat/patterns/branching-conversations.mdx b/docs/ai-chat/patterns/branching-conversations.mdx
new file mode 100644
index 00000000000..8a313921f41
--- /dev/null
+++ b/docs/ai-chat/patterns/branching-conversations.mdx
@@ -0,0 +1,284 @@
+---
+title: "Branching conversations"
+sidebarTitle: "Branching conversations"
+description: "Build ChatGPT-style conversation trees with edit, regenerate, undo, and branch switching using hydrateMessages, chat.history, and actions."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+Most chat UIs treat conversations as linear sequences. But real conversations branch — users edit previous messages, regenerate responses, undo exchanges, and explore alternative paths. This pattern shows how to build a branching conversation system using `hydrateMessages`, `chat.history`, and custom actions.
+
+## Data model
+
+The standard approach (used by ChatGPT, Open WebUI, LibreChat, and others) stores messages as a tree with parent pointers:
+
+```ts
+// Each message is a node in the tree
+type ChatNode = {
+  id: string;
+  chatId: string;
+  parentId: string | null; // null for root
+  role: "user" | "assistant";
+  message: UIMessage; // the full AI SDK message
+  createdAt: Date;
+};
+```
+
+A conversation is a tree of nodes. The **active branch** is resolved by walking from a leaf node up through `parentId` pointers to the root, then reversing:
+
+```
+root
+├── user: "Hello"
+│   └── assistant: "Hi there!"
+│       ├── user: "What's the weather?" ← branch A
+│       │   └── assistant: "It's sunny!"
+│       └── user: "Tell me a joke" ← branch B (active)
+│           └── assistant: "Why did the..."
+```
+
+Switching branches means changing which leaf is "active" — the same tree, different path.
+
+## Backend setup
+
+### Store: tree operations
+
+Define helpers that read and write the node tree. Adapt to your database:
+
+```ts
+// Resolve the active path: walk from leaf to root, reverse
+async function getActiveBranch(chatId: string): Promise<UIMessage[]> {
+  const nodes = await db.chatNode.findMany({ where: { chatId } });
+  const byId = new Map(nodes.map((n) => [n.id, n]));
+
+  // Find active leaf (most recently created leaf node)
+  const childIds = new Set(nodes.map((n) => n.parentId).filter(Boolean));
+  const leaves = nodes.filter((n) => !childIds.has(n.id));
+  const activeLeaf = leaves.sort((a, b) => b.createdAt - a.createdAt)[0];
+  if (!activeLeaf) return [];
+
+  // Walk to root
+  const path: UIMessage[] = [];
+  let current: ChatNode | undefined = activeLeaf;
+  while (current) {
+    path.unshift(current.message);
+    current = current.parentId ? byId.get(current.parentId) : undefined;
+  }
+  return path;
+}
+
+// Append a message as a child of the current leaf
+async function appendMessage(chatId: string, message: UIMessage): Promise<void> {
+  const branch = await getActiveBranch(chatId);
+  const parentId = branch.length > 0 ? branch[branch.length - 1]!.id : null;
+
+  await db.chatNode.create({
+    data: { id: message.id, chatId, parentId, role: message.role, message, createdAt: new Date() },
+  });
+}
+```
+
+### Agent: hydration + actions
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+
+export const myChat = chat.agent({
+  id: "branching-chat",
+
+  // Load the active branch from the DB on every turn.
+  // The frontend's message array is ignored — the tree is the source of truth.
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      await appendMessage(chatId, incomingMessages[incomingMessages.length - 1]!);
+    }
+    return getActiveBranch(chatId);
+  },
+
+  actionSchema: z.discriminatedUnion("type", [
+    // Edit a previous user message — creates a sibling node in the tree
+    z.object({ type: z.literal("edit"), messageId: z.string(), text: z.string() }),
+    // Switch to a different branch by selecting a leaf node
+    z.object({ type: z.literal("switch-branch"), leafId: z.string() }),
+    // Undo the last user + assistant exchange
+    z.object({ type: z.literal("undo") }),
+  ]),
+
+  onAction: async ({ action, chatId }) => {
+    switch (action.type) {
+      case "edit": {
+        // Find the original message's parent, create a sibling with new content
+        const original = await db.chatNode.findUnique({ where: { id: action.messageId } });
+        if (!original) break;
+
+        const newId = generateId();
+        await db.chatNode.create({
+          data: {
+            id: newId,
+            chatId,
+            parentId: original.parentId, // same parent = sibling
+            role: "user",
+            message: { id: newId, role: "user", parts: [{ type: "text", text: action.text }] },
+            createdAt: new Date(),
+          },
+        });
+        // Active branch now resolves through the new sibling (most recent leaf)
+        break;
+      }
+
+      case "switch-branch": {
+        // Mark this leaf as the most recently accessed so getActiveBranch picks it
+        await db.chatNode.update({
+          where: { id: action.leafId },
+          data: { createdAt: new Date() },
+        });
+        break;
+      }
+
+      case "undo": {
+        // Remove the last two nodes (user + assistant) from the active branch
+        const branch = await getActiveBranch(chatId);
+        if (branch.length >= 2) {
+          const lastTwo = branch.slice(-2);
+          await db.chatNode.deleteMany({
+            where: { id: { in: lastTwo.map((m) => m.id) } },
+          });
+        }
+        break;
+      }
+    }
+
+    // Reload the (now modified) active branch into the accumulator
+    const updated = await getActiveBranch(chatId);
+    chat.history.set(updated);
+  },
+
+  onTurnComplete: async ({ chatId, responseMessage }) => {
+    // Persist the assistant's response as a new node
+    if (responseMessage) {
+      await appendMessage(chatId, responseMessage);
+    }
+  },
+
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+## Frontend
+
+### Sending actions
+
+Wire up edit, undo, and branch switching to the transport:
+
+```tsx
+function MessageActions({ message, chatId }: { message: UIMessage; chatId: string }) {
+  const transport = useTransport();
+  const [editing, setEditing] = useState(false);
+  const [editText, setEditText] = useState("");
+
+  if (message.role !== "user") return null;
+
+  return (
+    <div>
+      {editing ? (
+        <form onSubmit={() => {
+          transport.sendAction(chatId, { type: "edit", messageId: message.id, text: editText });
+          setEditing(false);
+        }}>
+          <input value={editText} onChange={(e) => setEditText(e.target.value)} />
+          <button type="submit">Save</button>
+        </form>
+      ) : (
+        <button onClick={() => { setEditText(getMessageText(message)); setEditing(true); }}>
+          Edit
+        </button>
+      )}
+    </div>
+  );
+}
+```
+
+### Branch navigation
+
+To show the `< 2/3 >` sibling switcher, query the tree for siblings at each fork point. This is a frontend concern — the backend exposes the data, the UI navigates it.
+
+```tsx
+function BranchSwitcher({ message, chatId, siblings }: {
+  message: UIMessage;
+  chatId: string;
+  siblings: { id: string; createdAt: string }[];
+}) {
+  const transport = useTransport();
+  if (siblings.length <= 1) return null;
+
+  const currentIndex = siblings.findIndex((s) => s.id === message.id);
+
+  return (
+    <div>
+      <button
+        disabled={currentIndex === 0}
+        onClick={() => {
+          // Find the leaf of the previous sibling's subtree
+          transport.sendAction(chatId, {
+            type: "switch-branch",
+            leafId: siblings[currentIndex - 1]!.id,
+          });
+        }}
+      >
+        &lt;
+      </button>
+      <span>{currentIndex + 1}/{siblings.length}</span>
+      <button
+        disabled={currentIndex === siblings.length - 1}
+        onClick={() => {
+          transport.sendAction(chatId, {
+            type: "switch-branch",
+            leafId: siblings[currentIndex + 1]!.id,
+          });
+        }}
+      >
+        &gt;
+      </button>
+    </div>
+  );
+}
+```
+
+<Note>
+  The sibling data (which messages share the same parent) needs to come from your database — query it when loading the chat or include it as client data. The agent only returns the active branch via `hydrateMessages`.
+</Note>
+
+## How it works
+
+| Operation | What happens |
+|-----------|-------------|
+| **Send message** | `hydrateMessages` appends the new message as a child of the current leaf, returns the active path |
+| **Edit message** | `onAction` creates a sibling node with the same parent. The new node becomes the latest leaf, so `hydrateMessages` resolves through it. LLM responds to the edited history |
+| **Regenerate** | Same as edit — create a new assistant sibling. The AI SDK's `regenerate()` handles this via `trigger: "regenerate-message"` |
+| **Undo** | `onAction` removes the last two nodes. `chat.history.set()` updates the accumulator. LLM responds to the earlier state |
+| **Switch branch** | `onAction` updates which leaf is "active". `hydrateMessages` loads the new path. LLM responds to the switched context |
+
+## Design notes
+
+- **Messages are immutable** — edits create siblings, not mutations. This preserves full history for analytics and auditing.
+- **The tree lives in your database** — the agent loads a linear path from it via `hydrateMessages`. The agent itself doesn't know about the tree structure.
+- **`hydrateMessages` + `onAction` + `chat.history`** are the three primitives. Hydration loads the active path, actions modify the tree, and `chat.history.set()` syncs the accumulator after tree modifications.
+- **Frontend owns navigation** — the `< 2/3 >` UI, sibling queries, and branch switching triggers are client-side concerns. The backend just processes actions and returns responses.
+
+## See also
+
+- [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) — backend-controlled message history
+- [Actions](/ai-chat/actions) — custom actions with `actionSchema` and `onAction`
+- [`chat.history`](/ai-chat/backend#chat-history) — imperative history mutations
+- [Database persistence](/ai-chat/patterns/database-persistence) — basic persistence pattern (linear)
diff --git a/docs/ai-chat/patterns/code-sandbox.mdx b/docs/ai-chat/patterns/code-sandbox.mdx
new file mode 100644
index 00000000000..d4f17e271f9
--- /dev/null
+++ b/docs/ai-chat/patterns/code-sandbox.mdx
@@ -0,0 +1,126 @@
+---
+title: "Code execution sandbox"
+sidebarTitle: "Code sandbox"
+description: "Warm an isolated sandbox on each chat turn, run an AI SDK executeCode tool, and tear down right before the run suspends — using chat.agent hooks and chat.local."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+Use a **hosted code sandbox** (for example [E2B](https://e2b.dev)) when the model should run short scripts to analyze tool output (PostHog queries, CSV-like data, math) without executing arbitrary code on the Trigger worker host.
+
+This page describes a **durable chat** pattern that fits `chat.agent()`:
+
+- **Warm** the sandbox at the start of each turn (**non-blocking**).
+- **Reuse** it for every `executeCode` tool call during that turn (and across turns in the same run if you keep the handle).
+- **Dispose** it **right before the run suspends** waiting for the next user message — using the **`onChatSuspend`** hook, not `onTurnComplete`.
+
+
+## Why not tear down in `onTurnComplete`?
+
+After a turn finishes, the chat runtime still goes through an **idle** window and only then suspends. During that window the run is still executing — useful for `chat.defer()` work — and the run hasn't suspended yet.
+
+The boundary you want for “turn done, about to sleep” is **`onChatSuspend`**, which fires right before the run transitions from idle to suspended. It provides the `phase` (`”preload”` or `”turn”`) and full chat context. See [onChatSuspend / onChatResume](/ai-chat/lifecycle-hooks#onchatsuspend--onchatresume).
+
+```mermaid
+sequenceDiagram
+  participant TurnStart as onTurnStart
+  participant Run as run / streamText
+  participant TurnDone as onTurnComplete
+  participant Idle as Idle window
+  participant Suspend as onChatSuspend
+  participant Sleep as suspended
+
+  TurnStart->>Run: warm sandbox (async)
+  Run->>TurnDone: persist / inject / etc.
+  TurnDone->>Idle: still running
+  Idle->>Suspend: dispose sandbox
+  Suspend->>Sleep: waiting for next message
+```
+
+## Recommended provider: E2B
+
+- **API key** auth — works from any Trigger.dev worker; no Vercel-only OIDC.
+- **Code Interpreter** SDK (`@e2b/code-interpreter`): long-lived sandbox, `runCode()`, `kill()`.
+
+Alternatives (Modal, Daytona, raw Docker) are fine but more DIY. Vercel’s sandbox + AI SDK helpers are a better fit when execution stays **on Vercel**, not on the Trigger worker.
+
+## Implementation sketch
+
+### 1. Run-scoped sandbox map
+
+Keep a `Map<runId, Promise<Sandbox>>` (or similar) in a **task-only module** so your Next.js app never imports it.
+
+### 2. `onTurnStart` — warm without blocking
+
+```ts
+onTurnStart: async ({ runId, ctx, ...rest }) => {
+  warmCodeSandbox(runId); // fire-and-forget Sandbox.create()
+  // ...persist messages, writer, etc.
+},
+```
+
+### 3. `chat.local` — run id for tools
+
+Tool `execute` functions do not receive hook payloads. Use [`chat.local()`](/ai-chat/chat-local) to store the current run id for the sandbox key, **initialized from `onTurnStart`** (same `runId` as the map):
+
+```ts
+// In the same task module as your tools
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const codeSandboxRun = chat.local<{ runId: string }>({ id: "codeSandboxRun" });
+
+export function warmCodeSandbox(runId: string) {
+  codeSandboxRun.init({ runId });
+  // ...start Sandbox.create(), store promise in Map by runId
+}
+```
+
+The **`executeCode`** tool reads `codeSandboxRun.runId` and awaits the sandbox promise before `runCode`.
+
+### 4. `onChatSuspend` / `onComplete` — teardown
+
+Use **`onChatSuspend`** to dispose the sandbox right before the run suspends, and **`onComplete`** as a safety net when the run ends entirely.
+
+```ts
+export const aiChat = chat.agent({
+  id: "ai-chat",
+  // ...
+  onChatSuspend: async ({ phase, ctx }) => {
+    await disposeCodeSandboxForRun(ctx.run.id);
+  },
+  onComplete: async ({ ctx }) => {
+    await disposeCodeSandboxForRun(ctx.run.id);
+  },
+});
+```
+
+Unlike `onWait` (which fires for all wait types), `onChatSuspend` only fires at chat suspension points — no need to filter on `wait.type`. The `phase` discriminator tells you if this is a preload or post-turn suspension.
+
+Optional **`onChatResume`**: log or reset flags; a fresh sandbox can be warmed again on the next **`onTurnStart`**.
+
+### 5. AI SDK tool
+
+Wrap the provider in a normal AI SDK `tool({ inputSchema, execute })` (same pattern as `webFetch`). Keep tool definitions in **task code**, not in the Next.js server bundle.
+
+### 6. Environment
+
+Set **`E2B_API_KEY`** (or your provider’s secret) on the **Trigger environment** for the worker — not in public client env.
+
+## Typing `ctx`
+
+Every `chat.agent` lifecycle event and the `run` payload include **`ctx`**: the same **[`TaskRunContext`](/ai-chat/reference#task-context-ctx)** shape as `task({ run: (payload, { ctx }) => ... })`.
+
+```ts
+import type { TaskRunContext } from "@trigger.dev/sdk";
+```
+
+The alias **`Context`** is also exported from `@trigger.dev/sdk` and is the same type.
+
+## See also
+
+- [Database persistence for chat](/ai-chat/patterns/database-persistence) — conversation + session rows, hooks, token renewal
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks)
+- [API Reference — `ctx` on events](/ai-chat/reference#task-context-ctx)
+- [Per-run data with `chat.local`](/ai-chat/chat-local)
diff --git a/docs/ai-chat/patterns/database-persistence.mdx b/docs/ai-chat/patterns/database-persistence.mdx
new file mode 100644
index 00000000000..5ee32f8a6bd
--- /dev/null
+++ b/docs/ai-chat/patterns/database-persistence.mdx
@@ -0,0 +1,401 @@
+---
+title: "Database persistence for chat"
+sidebarTitle: "Database persistence"
+description: "Split conversation state and live session metadata across hooks — preload, turn start, turn complete — without tying the pattern to a specific ORM or schema."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+Durable chat runs can span **hours** and **many turns**. You usually want:
+
+1. **Conversation state** — full **`UIMessage[]`** (or equivalent) keyed by **`chatId`**, so reloads and history views work.
+2. **Live session state** — a **scoped access token** for the session and optionally **`lastEventId`** for stream resume.
+
+This page describes a **hook mapping** that works with any database. Adapt table and column names to your stack.
+
+## Conceptual data model
+
+You can use one table or two; the important split is **semantic**:
+
+| Concept | Purpose | Typical fields |
+| ------- | ------- | -------------- |
+| **Conversation** | Durable transcript + display metadata | Stable id (same as **`chatId`**), serialized **`uiMessages`**, title, model choice, owner/user id, timestamps |
+| **Active session** | Hydrate the transport on page reload | Same **`chatId`** as key (or FK), **`publicAccessToken`**, optional **`lastEventId`** |
+
+The **conversation** row is what your UI lists as "chats." The **session** row is what the **transport** needs after a refresh: a session-scoped PAT (so the transport doesn't have to re-mint on first paint) and the SSE resume cursor.
+
+Storing the current **`runId`** is optional — useful for telemetry / dashboard linking ("View this run") but not required for resume. The Session row owns its current run server-side; the transport reads from `session.out` keyed on `chatId`, so a run swap (continuation, upgrade) is invisible to your DB schema.
+
+<Note>
+  Store **`UIMessage[]`** in a JSON-compatible column, or normalize to a messages table — the pattern is *when* you read/write, not *how* you encode rows.
+</Note>
+
+## Where each hook writes
+
+This pattern covers **durable DB rows** (the conversation and the active session). Per-process in-memory state ([`chat.local`](/ai-chat/chat-local), DB connection pools, sandboxes, etc.) belongs in [`onBoot`](/ai-chat/lifecycle-hooks#onboot) — it fires on every fresh worker including continuation runs, where `onPreload` and `onChatStart` do not.
+
+### `onPreload` (optional)
+
+When the user triggers [preload](/ai-chat/fast-starts#preload), the run starts **before** the first user message.
+
+- Ensure the **conversation** row exists (create or no-op).
+- **Upsert session**: **`chatAccessToken`** from the event (a session-scoped PAT covering both `read:sessions:{chatId}` and `write:sessions:{chatId}`).
+- Load any **user / tenant context** you need for prompts (`clientData`).
+
+If you skip preload, do the equivalent in **`onChatStart`** when **`preloaded`** is false.
+
+### `onChatStart` (chat's first message, non-preloaded path)
+
+- Fires **once per chat**, on the very first user message. Does NOT fire on continuation runs (post-`endRun`, post-waitpoint-timeout, post-`chat.requestUpgrade`) or on OOM-retry attempts.
+- If **`preloaded`** is true, return early — **`onPreload`** already ran.
+- Otherwise mirror preload: user/context, conversation create, session upsert.
+- No need to gate the conversation create on `continuation` — it's always a brand-new chat at this point.
+- For continuation runs that need to refresh per-run state (new PAT, new `lastEventId`), do it in **`onTurnStart`** / **`onTurnComplete`** — both fire on every turn including the first turn of a continuation run.
+
+### `onTurnStart`
+
+- **`await`** persist **`uiMessages`** (full accumulated history including the new user turn) **before** the hook returns — `chat.agent` does not begin streaming until `onTurnStart` resolves, so this is what bounds "user message is durable before the stream".
+
+<Warning>
+**Don't use [`chat.defer()`](/ai-chat/background-injection#chat-defer-standalone) for the message write here.** `chat.defer` is fire-and-forget — the hook resolves before the write lands and the stream starts immediately. If the user refreshes mid-stream, the next page load reads `[]` from your DB, the resumed SSE stream pushes the assistant into an empty array, and the user's message disappears from the rendered conversation forever.
+
+```ts
+// ❌ Bad — non-blocking write, mid-stream refresh drops the user message.
+onTurnStart: async ({ chatId, uiMessages }) => {
+  chat.defer(db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }));
+},
+
+// ✅ Good — awaited, durable before the model starts.
+onTurnStart: async ({ chatId, uiMessages }) => {
+  await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+},
+```
+
+`chat.defer` is for writes whose timing doesn't matter for resume — analytics, audit logs, search-index updates, etc. Anything the next page load reads needs to land before the stream begins.
+</Warning>
+
+### `onTurnComplete`
+
+- Persist **`uiMessages`** again with the **assistant** reply finalized.
+- **Upsert session** with the fresh **`chatAccessToken`** and **`lastEventId`** from the event.
+
+**`lastEventId`** lets the frontend [resume](/ai-chat/frontend) without replaying SSE events it already applied. Treat it as part of session state, not optional polish, if you care about duplicate chunks after refresh.
+
+<Warning>
+**Write the messages and `lastEventId` in a single transaction.** Both values are read in parallel on the next page load (one fetches the conversation, the other fetches the session). If a refresh races between the two writes, the page can see the assistant message persisted (full history) but a stale `lastEventId` from the previous turn. The transport then resumes from that stale cursor and replays this turn's chunks on top of the already-persisted assistant message, producing a duplicated render.
+
+```ts
+// ✅ Atomic — refresh on the next page load reads both writes consistently.
+await db.$transaction([
+  db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }),
+  db.chatSession.upsert({
+    where: { id: chatId },
+    create: { id: chatId, publicAccessToken: chatAccessToken, lastEventId },
+    update: { publicAccessToken: chatAccessToken, lastEventId },
+  }),
+]);
+
+// ❌ Two awaits — narrow race window where messages are post-write but
+// lastEventId is still pre-write. A page refresh that lands here will
+// duplicate the assistant message on resume.
+await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+await db.chatSession.upsert({ /* ... */ });
+```
+</Warning>
+
+## Token renewal (app server)
+
+The persisted PAT has a TTL (see **`chatAccessTokenTTL`** on **`chat.agent`**, default 1h). When the transport gets a **401** on a session-PAT-authed request, it calls your **`accessToken`** callback to mint a fresh PAT — no DB lookup required, since the session is keyed on `chatId` (which the transport already has).
+
+Your `accessToken` callback typically just wraps `auth.createPublicToken`:
+
+```ts
+"use server";
+import { auth } from "@trigger.dev/sdk";
+
+export async function mintChatAccessToken(chatId: string) {
+  return auth.createPublicToken({
+    scopes: { read: { sessions: chatId }, write: { sessions: chatId } },
+    expirationTime: "1h",
+  });
+}
+```
+
+If you want to keep your DB session row in sync, the transport's **`onSessionChange`** callback fires every time the cached PAT changes — persist the new value there.
+
+No Trigger task code needs to run for renewal.
+
+## Minimal pseudocode
+
+```typescript
+// Pseudocode — replace saveConversation / saveSession with your DB layer.
+
+chat.agent({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+
+  onPreload: async ({ chatId, chatAccessToken, clientData }) => {
+    if (!clientData) return;
+    await ensureUser(clientData.userId);
+    await upsertConversation({ id: chatId, userId: clientData.userId /* ... */ });
+    await upsertSession({ chatId, publicAccessToken: chatAccessToken });
+  },
+
+  onChatStart: async ({ chatId, chatAccessToken, clientData, preloaded }) => {
+    if (preloaded) return;
+    // Fires once per chat — no continuation gate needed.
+    await ensureUser(clientData.userId);
+    await upsertConversation({ id: chatId, userId: clientData.userId /* ... */ });
+    await upsertSession({ chatId, publicAccessToken: chatAccessToken });
+  },
+
+  onTurnStart: async ({ chatId, uiMessages }) => {
+    // Awaited, not chat.defer — see the warning in `onTurnStart` above.
+    await saveConversationMessages(chatId, uiMessages);
+  },
+
+  onTurnComplete: async ({ chatId, uiMessages, chatAccessToken, lastEventId }) => {
+    // Atomic: messages + lastEventId must be readable consistently on resume.
+    // See the warning above for why a non-atomic write causes duplicate renders.
+    await db.$transaction([
+      saveConversationMessagesQuery(chatId, uiMessages),
+      upsertSessionQuery({ chatId, publicAccessToken: chatAccessToken, lastEventId }),
+    ]);
+  },
+
+  run: async ({ messages, signal }) => {
+    /* streamText, etc. */
+  },
+});
+```
+
+## Alternative: `hydrateMessages`
+
+For apps that need the backend to be the single source of truth for message history — abuse prevention, branching conversations, or rollback support — use [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) instead of relying on the frontend's accumulated state.
+
+With hydration, the hook loads messages from your database on every turn. The frontend's messages are ignored (except for the new user message, which arrives in `incomingMessages`):
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    const record = await db.chat.findUnique({ where: { id: chatId } });
+    const stored = record?.messages ?? [];
+
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      stored.push(incomingMessages[incomingMessages.length - 1]!);
+      await db.chat.update({ where: { id: chatId }, data: { messages: stored } });
+    }
+
+    return stored;
+  },
+  onTurnComplete: async ({ chatId, uiMessages, chatAccessToken, lastEventId }) => {
+    // Persist the response and refresh session state atomically — see the
+    // warning in the previous section for why these two writes have to be
+    // in the same transaction.
+    await db.$transaction([
+      db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }),
+      db.chatSession.upsert({
+        where: { id: chatId },
+        create: { id: chatId, publicAccessToken: chatAccessToken, lastEventId },
+        update: { publicAccessToken: chatAccessToken, lastEventId },
+      }),
+    ]);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+This replaces the `onTurnStart` persistence pattern — the hook handles both loading and persisting the new message in one place.
+
+## Design notes
+
+- **`chatId`** is stable for the life of a thread and is the only identifier the transport persists. Runs come and go (idle continuation, upgrade, cancel/restart) but the chat keeps its identity.
+- **`continuation: true`** means "same logical chat, new run" — refresh the persisted PAT, don't assume an empty conversation.
+- The current `runId` is available on every hook event for telemetry / dashboard linking ("View this run"), but you don't need to persist it for resume to work — the transport addresses by `chatId`.
+- Keep **task modules** that perform writes **out of** browser bundles; the pattern assumes persistence runs **in the worker** (or your BFF that the task calls).
+
+## Complete example
+
+End-to-end implementation across the three files involved: agent task, server actions, and React component.
+
+<Warning>
+  The example below trusts raw `chatId` and returns rows without filtering by user. In a real multi-user app, **scope every query by the authenticated user** — read the user from your auth/session in each server action and add `where: { userId }` to all `db.chat.*` and `db.chatSession.*` queries. Without that, one client could read or delete another user's chat state, and `getAllSessions()` would leak other users' `publicAccessToken`s. The snippet keeps auth out of the way to focus on the persistence shape.
+</Warning>
+
+<CodeGroup>
+```ts trigger/chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+import { db } from "@/lib/db";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  clientDataSchema: z.object({
+    userId: z.string(),
+  }),
+  onChatStart: async ({ chatId, clientData }) => {
+    await db.chat.create({
+      data: { id: chatId, userId: clientData.userId, title: "New chat", messages: [] },
+    });
+  },
+  onTurnStart: async ({ chatId, uiMessages, runId, chatAccessToken }) => {
+    // Persist messages + session before streaming
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+    // Persist assistant response + stream position atomically — see the
+    // race-condition warning earlier on this page.
+    await db.$transaction([
+      db.chat.update({
+        where: { id: chatId },
+        data: { messages: uiMessages },
+      }),
+      db.chatSession.upsert({
+        where: { id: chatId },
+        create: { id: chatId, runId, publicAccessToken: chatAccessToken, lastEventId },
+        update: { runId, publicAccessToken: chatAccessToken, lastEventId },
+      }),
+    ]);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+```ts app/actions.ts
+"use server";
+
+import { auth } from "@trigger.dev/sdk";
+import { chat } from "@trigger.dev/sdk/ai";
+import { db } from "@/lib/db";
+
+export const startChatSession = chat.createStartSessionAction("my-chat");
+
+export async function mintChatAccessToken(chatId: string) {
+  return auth.createPublicToken({
+    scopes: { read: { sessions: chatId }, write: { sessions: chatId } },
+    expirationTime: "1h",
+  });
+}
+
+export async function getChatMessages(chatId: string) {
+  const found = await db.chat.findUnique({ where: { id: chatId } });
+  return found?.messages ?? [];
+}
+
+export async function getAllSessions() {
+  const sessions = await db.chatSession.findMany();
+  const result: Record<
+    string,
+    {
+      publicAccessToken: string;
+      lastEventId?: string;
+    }
+  > = {};
+  for (const s of sessions) {
+    result[s.id] = {
+      publicAccessToken: s.publicAccessToken,
+      lastEventId: s.lastEventId ?? undefined,
+    };
+  }
+  return result;
+}
+
+export async function deleteSession(chatId: string) {
+  await db.chatSession.delete({ where: { id: chatId } }).catch(() => {});
+}
+```
+
+```tsx app/components/chat.tsx
+"use client";
+
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+import { mintChatAccessToken, startChatSession, deleteSession } from "@/app/actions";
+
+export function Chat({ chatId, initialMessages, initialSessions }) {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+    clientData: { userId: currentUser.id }, // Type-checked against clientDataSchema
+    sessions: initialSessions,
+    onSessionChange: (id, session) => {
+      if (!session) deleteSession(id);
+    },
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    messages: initialMessages,
+    transport,
+    resume: initialMessages.length > 0,
+  });
+
+  return (
+    <div>
+      {messages.map((m) => (
+        <div key={m.id}>
+          <strong>{m.role}:</strong>
+          {m.parts.map((part, i) =>
+            part.type === "text" ? <span key={i}>{part.text}</span> : null
+          )}
+        </div>
+      ))}
+
+      <form
+        onSubmit={(e) => {
+          e.preventDefault();
+          const input = e.currentTarget.querySelector("input");
+          if (input?.value) {
+            sendMessage({ text: input.value });
+            input.value = "";
+          }
+        }}
+      >
+        <input placeholder="Type a message..." />
+        <button type="submit" disabled={status === "streaming"}>
+          Send
+        </button>
+        {status === "streaming" && (
+          <button type="button" onClick={stop}>
+            Stop
+          </button>
+        )}
+      </form>
+    </div>
+  );
+}
+```
+
+</CodeGroup>
+
+## See also
+
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks)
+- [Session management](/ai-chat/frontend#session-management) — `resume`, `lastEventId`, transport
+- [`chat.defer()`](/ai-chat/background-injection#chat-defer-standalone) — non-blocking writes during a turn
+- [Code execution sandbox](/ai-chat/patterns/code-sandbox) — combines **`onWait`** / **`onComplete`** with this persistence model
diff --git a/docs/ai-chat/patterns/human-in-the-loop.mdx b/docs/ai-chat/patterns/human-in-the-loop.mdx
new file mode 100644
index 00000000000..e53182fc954
--- /dev/null
+++ b/docs/ai-chat/patterns/human-in-the-loop.mdx
@@ -0,0 +1,272 @@
+---
+title: "Human-in-the-loop"
+sidebarTitle: "Human-in-the-loop"
+description: "Pause the agent mid-response to ask the user a clarifying question, then resume with their answer."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+Some turns need to stop and ask the user something before they can finish — picking between options, confirming a destructive action, or clarifying an ambiguous request. The AI SDK calls this **human-in-the-loop** (HITL), and the building block is a tool with no `execute` function.
+
+When the LLM calls a tool that has no `execute`, `streamText` ends with the tool call still pending. The turn completes cleanly, the frontend renders UI to collect the answer, and when the user responds, a new turn resumes with the answer merged into the same assistant message.
+
+## How it works
+
+```
+Turn N:
+  User message → run()
+  LLM streams text → calls askUser tool (no execute)
+  streamText ends with tool-call in `input-available` state
+  onTurnComplete fires (finishReason = "tool-calls")
+  Agent idle
+
+Frontend:
+  Renders question + option buttons from tool input
+  User clicks → addToolOutput({ tool, toolCallId, output })
+  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls
+  → sendMessage() fires next turn
+
+Turn N+1:
+  hydrateMessages / accumulator sees the updated assistant message
+  run() is called, LLM continues from the tool result
+  onTurnComplete fires (finishReason = "stop", responseMessage is the FULL merged message)
+```
+
+The AI SDK's `toUIMessageStream` automatically reuses the assistant message ID across the pause (we pass `originalMessages` internally), so `responseMessage` in the post-resume `onTurnComplete` is the **full merged message** — the original text, the completed tool call, and any follow-up content — not just the new parts.
+
+## Backend: define the tool
+
+A HITL tool has an `inputSchema` describing what the model can ask, but **no `execute` function**. When the LLM calls it, `streamText` returns control to your agent.
+
+```ts trigger/my-chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+
+const askUser = tool({
+  description:
+    "Ask the user a clarifying question when you need their input. " +
+    "Present 2-4 options for them to pick from.",
+  inputSchema: z.object({
+    question: z.string(),
+    options: z
+      .array(
+        z.object({
+          id: z.string(),
+          label: z.string(),
+          description: z.string().optional(),
+        })
+      )
+      .min(2)
+      .max(4),
+  }),
+  // No execute function — streamText ends, the frontend supplies the output
+  // via addToolOutput, and the next turn continues from the result.
+});
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      tools: { askUser },
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+## Frontend: render the question and collect the answer
+
+Two pieces on the client:
+
+1. **UI for the pending tool call** — render when the tool part is in `input-available` state, i.e. the LLM has called the tool but there's no output yet.
+2. **Auto-send on resolution** — use `sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls` so answering kicks off the next turn without the user having to hit "send."
+
+```tsx
+import { useChat, lastAssistantMessageIsCompleteWithToolCalls } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+
+function ChatView({ chatId }: { chatId: string }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+  const { messages, sendMessage, addToolOutput } = useChat({
+    id: chatId,
+    transport,
+    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
+  });
+
+  return (
+    <>
+      {messages.map((m) =>
+        m.parts.map((part, i) => {
+          if (part.type === "tool-askUser" && part.state === "input-available") {
+            return (
+              <AskUserCard
+                key={i}
+                question={part.input.question}
+                options={part.input.options}
+                onAnswer={(opt) =>
+                  addToolOutput({
+                    tool: "askUser",
+                    toolCallId: part.toolCallId,
+                    output: { optionId: opt.id, label: opt.label },
+                  })
+                }
+              />
+            );
+          }
+          if (part.type === "text") return <Markdown key={i}>{part.text}</Markdown>;
+          return null;
+        })
+      )}
+    </>
+  );
+}
+```
+
+`addToolOutput` patches the assistant message locally with `state: "output-available"` and fills in `output`. `lastAssistantMessageIsCompleteWithToolCalls` detects that every pending tool call now has a result, and `useChat` fires a new `sendMessage` — the backend picks it up as the next turn.
+
+## Detecting a paused turn in `onTurnComplete`
+
+Two ways to detect "this turn paused for user input" vs "this turn finished normally":
+
+### Via `finishReason` (recommended)
+
+The AI SDK's finish reason is surfaced on every `onTurnComplete` event. If the model stopped on tool calls, it's `"tool-calls"`:
+
+```ts
+onTurnComplete: async ({ finishReason, responseMessage }) => {
+  if (finishReason === "tool-calls") {
+    // Turn paused — assistant message has pending tool call(s)
+    const pending = responseMessage?.parts.filter(
+      (p) => p.type.startsWith("tool-") && p.state === "input-available"
+    );
+    // Persist as a checkpoint / partial turn
+  } else {
+    // finishReason === "stop" — normal completion
+    // Persist as a completed turn
+  }
+};
+```
+
+<Note>
+  `finishReason` is only undefined for manual `chat.pipe()` flows or aborted streams. For the common `run() → return streamText(...)` pattern it's always populated.
+</Note>
+
+### Via response parts
+
+If you need more nuance (e.g. which specific tool is pending), use `chat.history.getPendingToolCalls()`:
+
+```ts
+const pending = chat.history.getPendingToolCalls();
+// [{ toolCallId, toolName, messageId }]
+```
+
+The result reflects the most recent assistant message: the one waiting on `addToolOutput`. Use it from `onAction` to gate fresh user turns ("can't send a new message while a HITL is open"), or from `onTurnComplete` to decide what to persist.
+
+Both `finishReason === "tool-calls"` and `chat.history.getPendingToolCalls().length > 0` are equivalent in practice. Use `finishReason` for dispatch, the helper for detail.
+
+### Acting once per net-new tool result
+
+When the user's `addToolOutput` round-trips a tool answer back to the agent, the wire message carries the resolved tool part. If you want to fire side-effects (audit log, billing, notifications) exactly once per resolved tool call, do it in `hydrateMessages` before the runtime merges. `chat.history.extractNewToolResults(message)` returns only the parts whose `toolCallId` isn't already resolved on the chain:
+
+```ts
+hydrateMessages: async ({ incomingMessages }) => {
+  for (const msg of incomingMessages) {
+    if (msg.role !== "assistant") continue;
+    for (const r of chat.history.extractNewToolResults(msg)) {
+      await auditLog.record({
+        toolCallId: r.toolCallId,
+        toolName: r.toolName,
+        output: r.output,
+        errorText: r.errorText, // set only for output-error parts
+      });
+    }
+  }
+  return incomingMessages;
+},
+```
+
+`extractNewToolResults` compares against the current `chat.history`. By the time `onTurnComplete` fires, the chain already contains `responseMessage`, so the helper returns `[]` there. Use it where the message is from outside the accumulator: `hydrateMessages`, `onAction` if the action carries a message, or any custom pre-merge code path.
+
+## Persistence: one message vs one record per pause
+
+Because the AI SDK reuses the assistant message ID across the pause, the "same turn" from the user's perspective maps to **two `onTurnComplete` firings** on the server — but both receive a `responseMessage` with the **same `id`**, and the second firing's `responseMessage` contains the fully merged content.
+
+Two common persistence patterns:
+
+### Overwrite on every turn (simplest)
+
+Just store the latest `uiMessages` array on every `onTurnComplete`. The paused-turn write is overwritten by the resume-turn write; the final DB state has the full merged message.
+
+```ts
+onTurnComplete: async ({ chatId, uiMessages }) => {
+  await db.chat.update({
+    where: { id: chatId },
+    data: { messages: uiMessages },
+  });
+},
+```
+
+Use this unless you specifically need an audit trail.
+
+### Checkpoint nodes (immutable history)
+
+For apps that want every pause point recorded as its own immutable snapshot (branching, replay, diff review), save a checkpoint when paused and a sibling when complete:
+
+```ts
+onTurnComplete: async ({ chatId, responseMessage, finishReason, uiMessages }) => {
+  if (!responseMessage) return;
+
+  if (finishReason === "tool-calls") {
+    // Paused — save a checkpoint
+    await db.turnCheckpoint.create({
+      data: {
+        chatId,
+        messageId: responseMessage.id,
+        parts: responseMessage.parts,
+        kind: "partial",
+      },
+    });
+  } else {
+    // Completed — save a sibling with the merged full message
+    await db.turnCheckpoint.create({
+      data: {
+        chatId,
+        messageId: responseMessage.id,
+        parts: responseMessage.parts,
+        kind: "final",
+      },
+    });
+  }
+
+  // Always update the canonical chat record for `hydrateMessages` to load
+  await db.chat.update({
+    where: { id: chatId },
+    data: { messages: uiMessages },
+  });
+};
+```
+
+Both writes see `responseMessage.id` as the same value — they're checkpoints of the same logical message. Grouping by `messageId` + ordering by `createdAt` gives you the progression.
+
+## Multi-pause turns
+
+A single logical turn can pause more than once — the LLM asks question A, gets the answer, thinks, then asks question B before finishing. Each pause fires its own `onTurnComplete` with `finishReason === "tool-calls"`; only the last firing has `finishReason === "stop"`. The checkpoint pattern above handles this naturally — each pause adds a new checkpoint sharing the same `responseMessage.id`.
+
+## Gotchas
+
+- **Don't set an `execute` function on the HITL tool.** If it has one, `streamText` will call it immediately instead of handing control back.
+- **The frontend must use `sendAutomaticallyWhen`.** Without it, the user has to press Enter after answering — `addToolOutput` updates local state but doesn't fire a new turn by itself.
+- **Don't mutate `responseMessage` in `onTurnComplete`.** It's the captured snapshot. To add custom parts, use `chat.response.append()` in `onBeforeTurnComplete` (while the stream is open).
+- **Stop handling.** If the user stops the run while a pause is active (`chat.stop()` on the transport), `onTurnComplete` fires with `stopped: true` and `finishReason` reflecting the last successful step. Treat stopped paused turns the same as stopped normal turns.
diff --git a/docs/ai-chat/patterns/large-payloads.mdx b/docs/ai-chat/patterns/large-payloads.mdx
new file mode 100644
index 00000000000..859c51c0d62
--- /dev/null
+++ b/docs/ai-chat/patterns/large-payloads.mdx
@@ -0,0 +1,169 @@
+---
+title: "Large payloads in chat.agent"
+sidebarTitle: "Large payloads"
+description: "Why a single chunk on the chat stream is capped at ~1 MiB, what error you'll see, and how to work around it with ID references."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+The realtime stream that backs `chat.agent` enforces a **per-record cap of ~1 MiB** (`1048576` bytes minus a small envelope reserve). Anything written through the chat output — auto-piped LLM chunks, `chat.response.write`, custom `writer.write` parts — counts as one record per chunk and is rejected if it crosses the cap.
+
+This is a platform-level limit and cannot be raised per project or per stream.
+
+## What you'll see
+
+When a chunk crosses the cap, the run fails with a typed [`ChatChunkTooLargeError`](/ai-chat/error-handling):
+
+```
+ChatChunkTooLargeError: chat.agent chunk of type "tool-output-available" is 2000126 bytes,
+over the realtime stream's per-record cap of 1047552 bytes. For oversized payloads
+(e.g. large tool outputs), write the value to your own store and emit only an id/url
+through the chat stream — see https://trigger.dev/docs/ai-chat/patterns/large-payloads.
+```
+
+The error includes:
+
+- `chunkType` — discriminant on the chunk that failed (e.g. `tool-output-available`, `data-handover`, `text-delta`).
+- `chunkSize` — UTF-8 byte count of the JSON-serialized record.
+- `maxSize` — the effective cap.
+
+You can catch and re-throw / log it explicitly:
+
+```ts
+import { ChatChunkTooLargeError, isChatChunkTooLargeError } from "@trigger.dev/sdk";
+
+try {
+  await someWrite();
+} catch (err) {
+  if (isChatChunkTooLargeError(err)) {
+    logger.error("Oversized chunk", { type: err.chunkType, size: err.chunkSize });
+  }
+  throw err;
+}
+```
+
+## Most common cause: large tool outputs
+
+If you return a `streamText` result from `run()`, the AI SDK auto-pipes its `UIMessageStream` into the chat output. A tool whose result object is large (a fetched HTML body, a CSV blob, an image as base64, a deep DB row dump) gets emitted as one `tool-output-available` chunk — and that's the chunk that overruns.
+
+**Diagnose first**: log tool sizes during development.
+
+```ts
+const fetchPage = tool({
+  inputSchema: z.object({ url: z.string().url() }),
+  execute: async ({ url }) => {
+    const html = await (await fetch(url)).text();
+    if (html.length > 500_000) {
+      logger.warn("Large tool output", { tool: "fetchPage", bytes: html.length });
+    }
+    return { html };
+  },
+});
+```
+
+If the size is unbounded by input, fix the tool — not the stream.
+
+## ID-reference pattern
+
+Store the large value in your own database (or object store) and emit only an identifier through the chat stream. The frontend fetches the full payload separately on demand.
+
+This keeps the chat stream small, predictable, and resumable, and lets you reuse the value across turns or sessions without re-streaming it.
+
+<CodeGroup>
+
+```ts task.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { tool } from "ai";
+import { z } from "zod";
+
+const fetchPage = tool({
+  description: "Fetch a URL and store the HTML for later inspection.",
+  inputSchema: z.object({ url: z.string().url() }),
+  execute: async ({ url }) => {
+    const html = await (await fetch(url)).text();
+    const docId = await db.documents.create({
+      data: { url, html, byteSize: html.length },
+    });
+
+    // Tool result is small — just an id and metadata.
+    // The model and the UI both work with this lightweight handle.
+    return {
+      docId,
+      url,
+      byteSize: html.length,
+      preview: html.slice(0, 500),
+    };
+  },
+});
+```
+
+```ts api/document/[id]/route.ts
+// Frontend fetches the full document on demand.
+import { auth, currentUser } from "@/lib/auth";
+
+export async function GET(_req: Request, { params }: { params: { id: string } }) {
+  const user = await currentUser();
+  const doc = await db.documents.findUniqueOrThrow({
+    where: { id: params.id, userId: user.id },
+  });
+  return new Response(doc.html, { headers: { "content-type": "text/html" } });
+}
+```
+
+```tsx component.tsx
+function ToolResultCard({ part }: { part: ToolUIPart<"fetchPage"> }) {
+  const { docId, url, byteSize, preview } = part.output;
+  return (
+    <div>
+      <p>{url} — {(byteSize / 1024).toFixed(0)} KB</p>
+      <pre>{preview}…</pre>
+      <a href={`/api/document/${docId}`}>Open full HTML</a>
+    </div>
+  );
+}
+```
+
+</CodeGroup>
+
+The same pattern works for `chat.response.write` — push the heavy value to your DB, then emit a small data part with the id:
+
+```ts
+const id = await db.attachments.create({ data: { content: hugeReport } });
+chat.response.write({ type: "data-report", data: { id, summary: shortSummary } });
+```
+
+<Tip>
+  Persist the large value **before** you emit the id chunk. If the chunk reaches the UI before the row is written, the frontend gets a 404 on the follow-up fetch.
+</Tip>
+
+## Transient UI parts
+
+For progress indicators or status data that should stream to the UI but not persist into the response message, use `chat.response.write` with `transient: true`. The chunk still travels on the chat stream (so the 1 MiB per-record cap still applies), but it never lands in `responseMessage` or `uiMessages`:
+
+```ts
+chat.response.write({
+  type: "data-progress",
+  data: { percent: 50 },
+  transient: true,
+});
+```
+
+For genuinely high-volume diagnostic data (per-token traces, large debug dumps), don't try to ship it through the realtime stream at all. Log to your own store (DB, object storage, OTel logger) and surface it through a separate UI route that isn't tied to the chat session.
+
+## What does **not** trigger the cap
+
+These calls don't go through the realtime stream and have no per-record cap:
+
+- [`chat.history.set` / `slice` / `replace` / `remove`](/ai-chat/backend#chat-history) — locals-only mutations on the in-memory message list.
+- [`chat.inject`](/ai-chat/background-injection#chat-inject) — appends to the run's pending message queue, not the stream.
+- [`chat.defer`](/ai-chat/background-injection#chat-defer-standalone) — promise registry; awaited at turn boundaries, never serialized to the stream.
+
+The control markers `chat.agent` emits internally (`trigger:turn-complete`, `trigger:upgrade-required`) are tiny by construction.
+
+## See also
+
+- [Error handling](/ai-chat/error-handling) — how `ChatChunkTooLargeError` flows through the layers.
+- [Database persistence](/ai-chat/patterns/database-persistence) — your own store as the durable backing for ID references.
+- [Client protocol](/ai-chat/client-protocol) — chunk shapes that travel on the chat stream.
diff --git a/docs/ai-chat/patterns/oom-resilience.mdx b/docs/ai-chat/patterns/oom-resilience.mdx
new file mode 100644
index 00000000000..097fe796cf7
--- /dev/null
+++ b/docs/ai-chat/patterns/oom-resilience.mdx
@@ -0,0 +1,120 @@
+---
+title: "OOM resilience"
+sidebarTitle: "OOM resilience"
+description: "Recover from out-of-memory errors mid-turn by automatically retrying the failed turn on a larger machine — without losing the in-flight user message or re-processing completed turns."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+When a `chat.agent` turn runs out of memory, the worker process dies and everything in it is gone: the in-flight LLM call, the accumulator, any tool execution mid-flight. By default, Trigger.dev surfaces the OOM as a run failure.
+
+Setting `oomMachine` opts the agent into automatic recovery: the failed turn re-runs on a larger machine, picks up the user message that triggered the OOM (without re-processing earlier completed turns), and produces a normal response.
+
+## Setup
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  machine: "small-1x",         // default machine
+  oomMachine: "medium-2x",     // fallback on OOM
+  run: async ({ messages, signal }) =>
+    streamText({ model, messages, abortSignal: signal }),
+});
+```
+
+That's the entire opt-in. With `oomMachine` set, the agent gets:
+
+- **`retry.maxAttempts: 2`** internally — one retry for OOM only; non-OOM errors don't retry.
+- **`retry.outOfMemory.machine: oomMachine`** — the fresh attempt boots on the larger machine.
+- **`session.in` cursor recovery** — the new attempt skips records belonging to turns that already completed on the prior attempt and only re-runs the OOM'd turn.
+
+`chat.agent` does not expose generic `retry` options. OOM recovery is the only retry path because retrying an LLM-driven loop on non-OOM errors tends to be expensive and side-effecting. Drop down to a [raw `task()` with chat primitives](/ai-chat/backend#raw-task-with-primitives) if you need richer retry semantics.
+
+## How recovery works
+
+The recovery doesn't need any customer-side persistence to avoid duplicate processing. It uses two pieces of durable state Trigger already maintains for every chat:
+
+- **`session.out`** — the durable response stream. Every successful turn writes a `trigger:turn-complete` chunk here.
+- **`session.in`** — the durable input stream. Every user message after the first turn lands here as a record with a server-assigned timestamp.
+
+On retry boot, the SDK:
+
+1. Scans `session.out` for the latest `trigger:turn-complete` chunk and reads its timestamp. Call this `T_last_complete`.
+2. Sets a per-stream filter on `session.in` so any record with `timestamp <= T_last_complete` is dropped before it reaches the turn loop.
+3. Begins normal processing. The first record that passes the filter is the message that triggered the OOM (or any newer message that arrived during the retry window).
+
+Result: turns 1..N-1 are not re-processed, turn N runs on the larger machine, and the conversation continues.
+
+```mermaid
+sequenceDiagram
+  participant User
+  participant Run as chat.agent run
+  participant SessionIn as session.in
+  participant SessionOut as session.out
+
+  User->>SessionIn: u2 (turn 2)
+  Run->>SessionIn: read u2
+  Run->>SessionOut: turn-complete (T1)
+  User->>SessionIn: u3 (turn 3)
+  Run->>SessionIn: read u3
+  Run->>SessionOut: turn-complete (T2)
+  User->>SessionIn: u4 (turn 4)
+  Run->>SessionIn: read u4
+  Note over Run: OOM mid-turn
+  Run->>Run: ⚠️ killed
+  Note over Run: Attempt 2 boots on oomMachine
+  Run->>SessionOut: scan → T_last_complete = T2
+  Run->>SessionIn: read with filter (ts > T2)
+  SessionIn-->>Run: u2 (filtered, ts < T2)
+  SessionIn-->>Run: u3 (filtered, ts < T2)
+  SessionIn-->>Run: u4 (passes — the OOM'd turn)
+  Run->>SessionOut: turn 4 complete
+```
+
+The scan on `session.out` is streaming and bounded in memory: each chunk is inspected and discarded one at a time, so a long-running chat doesn't bloat the retry-boot worker. Bandwidth scales linearly with `session.out` size, but only on the OOM-retry path — a rare event.
+
+## With `hydrateMessages`
+
+If your agent uses [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) to load the durable conversation history per turn, the OOM'd turn re-runs against the full prior accumulator: the model sees `[u1, a1, u2, a2, ..., u_N]` and responds in context. This is the recommended pattern for production chats.
+
+## Without `hydrateMessages`
+
+Recovery boot reconstructs context automatically. The boot reads both the durable `session.out` snapshot (settled turns) and the `session.out` tail past the snapshot cursor (the partial assistant chunks the OOM'd turn streamed before dying). When the new attempt processes the OOM'd user message, the model sees the full prior conversation **plus** the partial assistant that was cut off — so a "keep going" follow-up continues naturally, and any other follow-up has the same context the original turn had.
+
+`hydrateMessages` is still the right choice if you want a single source of truth in your own database (branching conversations, message-level access control, etc.). It's no longer required for OOM continuity.
+
+For full control over recovery — drop the partial, synthesize tool results for an interrupted tool call, emit a recovery banner to the UI — register [`onRecoveryBoot`](/ai-chat/patterns/recovery-boot).
+
+## Tool execute idempotency
+
+If an OOM hits mid-tool-execution, the new attempt re-runs the entire turn — including the tool call. Make tool `execute` functions idempotent or checkpoint their progress externally. Trigger doesn't roll back side effects automatically.
+
+```ts
+import { tool } from "ai";
+
+export const sendEmail = tool({
+  description: "Send an email",
+  inputSchema: z.object({ to: z.string(), idempotencyKey: z.string() }),
+  execute: async ({ to, idempotencyKey }) => {
+    // Stripe-style: dedupe at the side-effect layer with a customer-supplied key.
+    return await mailer.send({ to, idempotencyKey });
+  },
+});
+```
+
+## Limitations
+
+- **One OOM retry per run.** `chat.agent` sets `maxAttempts: 2`. If attempt 2 also OOMs, the run fails. Use a sufficiently large `oomMachine` to avoid this.
+- **Single fallback tier.** Only one `oomMachine`. There's no "tiered retry" (small → medium → large). If you need that, drop down to a [raw `task()` with chat primitives](/ai-chat/backend#raw-task-with-primitives) and configure `retry` directly.
+- **Non-OOM errors don't retry.** Schema errors, model-call rejections, tool throws, etc. fail the run as before. Out-of-memory is the only retry trigger.
+- **Tools mid-execution are not checkpointed.** A partially-run tool re-runs from scratch on the new attempt. Make them idempotent.
+
+## See also
+
+- [Recovery boot](/ai-chat/patterns/recovery-boot) — the underlying hook + smart default that gives OOM recovery its full-context behavior
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks) — `onChatResume` fires on every retry attempt with `phase: "preload"` or `"turn"`
+- [Database persistence](/ai-chat/patterns/database-persistence) — the `hydrateMessages` pattern for branching, ACL, and DB-as-source-of-truth scenarios
diff --git a/docs/ai-chat/patterns/persistence-and-replay.mdx b/docs/ai-chat/patterns/persistence-and-replay.mdx
new file mode 100644
index 00000000000..f1008dda260
--- /dev/null
+++ b/docs/ai-chat/patterns/persistence-and-replay.mdx
@@ -0,0 +1,204 @@
+---
+title: "Persistence and replay"
+sidebarTitle: "Persistence and replay"
+description: "How chat.agent rebuilds conversation history at run boot — durable JSON snapshot in object storage plus session.out replay, with a hydrateMessages short-circuit for backend-owned history."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+`chat.agent` runs are processes — they boot, stream a turn, and either suspend (waiting for the next message) or exit. When the next message arrives at a session whose previous run already exited, a **fresh** run boots with no in-memory state. Something has to rebuild the conversation history before that turn can produce a coherent response.
+
+This page walks through the **snapshot + replay** model the runtime uses by default, and the [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) short-circuit that turns the whole thing off when the customer owns history.
+
+## Why a snapshot at all
+
+The wire is delta-only: each `.in/append` carries at most one new `UIMessage` (see [Client Protocol](/ai-chat/client-protocol#chattaskwirepayload)). A long conversation might be 50 turns deep with megabytes of tool results — the wire never carries that. So when run #2 boots to handle turn 51, the wire alone tells it almost nothing about turns 1–50.
+
+Two existing pieces of durable state already capture everything that happened:
+
+- **`session.in`** — every user message and tool-approval response ever sent.
+- **`session.out`** — every assistant token, tool call, and tool result the agent emitted, ordered.
+
+Replaying `session.out` from the beginning is correct but expensive — bandwidth scales with chat length, and parsing N megabytes of streamed chunks at every boot adds latency. So the runtime writes a **snapshot** after every turn and reads it on the next boot. Replay only covers the gap between the snapshot's cursor and now.
+
+## The model end-to-end
+
+```mermaid
+sequenceDiagram
+  participant User
+  participant Run1 as Run 1 (turn 1)
+  participant Snapshot as Object storage
+  participant SessionOut as session.out
+  participant Run2 as Run 2 (turn 2+)
+
+  User->>Run1: u1
+  Run1->>SessionOut: assistant chunks for a1
+  Run1->>Run1: onTurnComplete
+  Run1->>Snapshot: write { messages: [u1, a1], lastOutEventId, lastOutTimestamp }
+  Note over Run1: idle suspend (or exit)
+
+  User->>Run2: u2 (delta only)
+  Run2->>Snapshot: read snapshot
+  Run2->>SessionOut: subscribe(lastEventId, wait=0)
+  SessionOut-->>Run2: (empty — nothing since snapshot)
+  Note over Run2: accumulator = [u1, a1]
+  Run2->>Run2: append u2 from wire
+  Run2->>SessionOut: assistant chunks for a2
+  Run2->>Run2: onTurnComplete
+  Run2->>Snapshot: write { messages: [u1, a1, u2, a2], ... }
+```
+
+### Run 1 — first turn
+
+The accumulator starts empty. The wire delivers `u1`. After the model finishes, `onTurnComplete` fires, then the runtime serializes the full accumulator and writes:
+
+```json
+{
+  "version": 1,
+  "savedAt": 1715180400000,
+  "messages": [u1, a1],
+  "lastOutEventId": "42",
+  "lastOutTimestamp": 1715180399000
+}
+```
+
+The key is `packets/{projectRef}/{envSlug}/sessions/{sessionId}/snapshot.json` — overwritten every turn, never appended. The write is **awaited**, not fire-and-forget — if the run idle-suspends immediately after, in-flight promises don't reliably complete and the snapshot would be lost.
+
+### Run 2 — boot
+
+A new run boots when the user sends `u2`. Run 1 has long since exited. Run 2 has no in-memory state. The boot sequence:
+
+<Steps>
+  <Step title="Read the snapshot">
+    GET the JSON blob. On 404 (no snapshot yet — first-ever turn) or read error or version mismatch, treat as empty and continue. Snapshot misses are non-fatal — replay alone may still be sufficient.
+  </Step>
+  <Step title="Replay session.out tail">
+    Subscribe to `session.out` with `wait=0` starting from `snapshot.lastOutEventId`. Drain whatever's there and close. Returns:
+    - **Settled messages** — closed assistant turns past the snapshot cursor (the chunks of a turn that completed after the snapshot was written but before the run exited cleanly).
+    - **A partial assistant** — the trailing message if its stream never received a `finish` chunk. The dead run was mid-response when it died. `cleanupAbortedParts` has already stripped streaming-in-progress fragments.
+
+    In the steady state this returns empty. In recovery, it returns whatever the dead run was in the middle of.
+  </Step>
+  <Step title="Replay session.in tail">
+    GET `session.in` records past the last `turn-complete`'s `session-in-event-id` cursor. Returns the user messages the dead run hadn't acknowledged — typically the message that triggered the cancelled / crashed turn, plus anything the customer typed after.
+  </Step>
+  <Step title="Reconstruct the chain (smart default)">
+    Snapshot messages merge with the settled replay (replay wins on `id` collision). Then:
+
+    - If there's a partial assistant **and** at least one in-flight user message, splice `[firstInFlightUser, partialAssistant]` onto the end of the chain. The model sees the prior turn's incomplete attempt and can continue, abandon, or pivot based on the next user message.
+    - Remaining in-flight users dispatch as fresh turns after the recovered first one.
+    - If there's no partial OR no in-flight users, the chain is just the settled chain and any in-flight users dispatch normally.
+
+    Customers can override this entirely via [`onRecoveryBoot`](/ai-chat/patterns/recovery-boot).
+  </Step>
+  <Step title="Append the new wire message">
+    Append `u2` from the wire payload, exactly as on turn 1.
+  </Step>
+</Steps>
+
+The model now sees `[u1, a1, u2]` and produces `a2`. After `onTurnComplete`, the runtime overwrites the snapshot with `[u1, a1, u2, a2]` and the cycle repeats.
+
+### Crash mid-turn — replay carries the load
+
+Suppose Run 1's turn 1 streams partial assistant chunks to `session.out` and then crashes (OOM, exception, server-side cancel) before `onTurnComplete` fires. No snapshot was written. The next run boots and:
+
+1. Snapshot read returns 404 → empty.
+2. `session.out` tail replay picks up the partial assistant chunks emitted before the crash. `cleanupAbortedParts` strips streaming-in-progress fragments but keeps the cleaned trailing message as the `partialAssistant`.
+3. `session.in` tail replay finds the user message the dead run was answering (no `turn-complete` was written, so the cursor never advanced past it).
+4. Smart default splices `[firstInFlightUser, partialAssistant]` onto the chain. Any later user messages (including the customer's follow-up) dispatch as fresh turns.
+5. The model sees full prior context and responds in kind — continuing a cut-off essay on "keep going", answering a fresh question on "actually, what's 7+8?", abandoning the prior work on "scrap that, do X instead".
+
+Replay carries the conversation across the crash boundary with zero customer code. For policies different from "preserve context" — drop the partial entirely, synthesize tool results for an interrupted tool call, write a recovery banner to the UI — register [`onRecoveryBoot`](/ai-chat/patterns/recovery-boot).
+
+## OOM-retry interaction
+
+The runtime already had an OOM-retry path that scans `session.out` for the latest `trigger:turn-complete` timestamp to use as a cutoff for `session.in` (so the retry doesn't re-process completed turns — see [OOM resilience](/ai-chat/patterns/oom-resilience)). The snapshot includes a `lastOutTimestamp` field that is exactly that high-water mark.
+
+When a snapshot exists, the OOM-retry path reads `lastOutTimestamp` directly instead of scanning `session.out`. One fewer stream subscription per retry. Free win.
+
+If no snapshot exists (first turn, or `hydrateMessages` registered), the path falls back to the scan.
+
+## Action turns — no snapshot write
+
+[Action turns](/ai-chat/actions) (`trigger: "action"`) don't fire `onTurnComplete` — they fire `onAction` only. The snapshot write site is gated on `onTurnComplete`, so action turns don't snapshot.
+
+If `onAction` mutates `chat.history.*` and then the run crashes before the next regular turn, the mutation is lost. The user re-fires the action. This matches `chat.history` semantics in general — mutations are persisted at turn boundaries, not action boundaries.
+
+## The `hydrateMessages` short-circuit
+
+When the customer registers a [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) hook, the runtime trusts the hook to be the source of truth for history. Snapshot read and replay are **skipped entirely** at boot. The hook fires per turn, returns the canonical chain from the customer's database, and the accumulator is set to whatever the hook returned.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { db } from "@/lib/db";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    const stored = (await db.chat.findUnique({ where: { id: chatId } }))?.messages ?? [];
+
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      stored.push(incomingMessages[0]!);
+      await db.chat.update({ where: { id: chatId }, data: { messages: stored } });
+    }
+
+    return stored;
+  },
+  onTurnComplete: async ({ chatId, uiMessages }) => {
+    await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+What you gain:
+
+- **Zero object-store traffic per turn.** No snapshot read, no snapshot write, no replay subscription. `OBJECT_STORE_*` env vars don't have to be set.
+- **Branching, undo, edit, abuse prevention** — patterns that need a backend-side single source of truth work naturally because the customer mediates every read.
+
+What you give up:
+
+- **You own persistence end-to-end.** A bug in `hydrateMessages` that returns the wrong chain corrupts the conversation visible to the model.
+- **OOM-retry needs a `session.out` scan again** because there's no snapshot to short-circuit it. (Same as the pre-snapshot baseline — not a regression, just a missed optimization.)
+
+The runtime's snapshot+replay is the safer default. `hydrateMessages` is the right choice when you already have authoritative storage for messages and want one consistent persistence path.
+
+## When neither is configured
+
+If `hydrateMessages` is not registered **and** no object store is configured, conversations don't survive run boundaries. A continuation boots empty. The runtime logs a warning at agent registration time so you see this at deploy time, not at user-traffic time.
+
+For local development this is sometimes fine — you're not testing continuations. For production it isn't. Configure one of:
+
+- **Object store** (`OBJECT_STORE_*` env vars on your webapp) — easiest, default behavior.
+- **`hydrateMessages` + your own database** — stronger control, suits multi-tenant apps with audit needs.
+
+## Snapshot key & lifecycle
+
+| Field | Value |
+|---|---|
+| Bucket | Whatever `OBJECT_STORE_BASE_URL` points to |
+| Key prefix | `packets/{projectRef}/{envSlug}/` (server-prefixed) |
+| Key suffix | `sessions/{sessionId}/snapshot.json` |
+| Final key | `packets/{projectRef}/{envSlug}/sessions/{sessionId}/snapshot.json` |
+| Size | Tens of KB typical, capped only by object-store limits |
+| Cadence | Overwritten after every successful `onTurnComplete` |
+
+Snapshots accumulate per-session forever unless you set a lifecycle policy on the bucket. A 90-day expiry on `packets/*/sessions/*/snapshot.json` is a reasonable default if your chats don't typically resume after that window. Closed sessions are not auto-cleaned today.
+
+### MinIO and S3-compatible stores
+
+Snapshot read/write reuses the same object-store layer as Trigger.dev's existing large-payload routes. Anything that already works for large payloads — AWS S3, MinIO (self-host or local development), Cloudflare R2, Tigris, Backblaze B2 — works for snapshots too. `OBJECT_STORE_DEFAULT_PROTOCOL` controls the routing (`s3`, `minio`, etc.) and the SDK picks the right driver automatically. No snapshot-specific config.
+
+For local development against `pnpm run docker`, the bundled MinIO container is enough — set `OBJECT_STORE_DEFAULT_PROTOCOL=minio` and the standard MinIO env vars on the webapp, and continuations work end-to-end against a local stack.
+
+## See also
+
+- [Client Protocol](/ai-chat/client-protocol#how-history-is-rebuilt) — the wire-level view of the same model
+- [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) — the short-circuit hook
+- [OOM resilience](/ai-chat/patterns/oom-resilience) — how `session.in` cutoffs interact with snapshots
+- [Database persistence](/ai-chat/patterns/database-persistence) — the canonical persistence pattern using `onTurnComplete`
+- [v4.5 upgrade guide](/ai-chat/upgrade-guide#v45-wire-format-change) — when this model landed and what changed
diff --git a/docs/ai-chat/patterns/recovery-boot.mdx b/docs/ai-chat/patterns/recovery-boot.mdx
new file mode 100644
index 00000000000..a2b11efeb14
--- /dev/null
+++ b/docs/ai-chat/patterns/recovery-boot.mdx
@@ -0,0 +1,230 @@
+---
+title: "Recovery boot"
+sidebarTitle: "Recovery boot"
+description: "Recover from cancel-mid-stream, crashes, and OOM kills with full conversational context. The smart default Just Works; the onRecoveryBoot hook is the override path for advanced policies."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+When a `chat.agent` run dies in the middle of streaming a response — the user cancels, the worker OOMs, or an unhandled exception kills the process — the durable streams hold what was in flight. The next run boots as a continuation, reads both stream tails, and reconstructs a chain that preserves the partial response so any follow-up (`keep going`, `actually do X instead`, a new question) has full context.
+
+The behavior is automatic. The `onRecoveryBoot` hook is opt-in for policies that need something different.
+
+## The scenario
+
+```ts
+// Turn 1 is mid-essay when the user clicks Cancel.
+window.__chat.send("Write me a long essay about espresso");
+// ... assistant has written 3000 characters ...
+window.__chat.stop();                              // OR: server-side cancel_run
+
+// User decides what they want next.
+window.__chat.send("keep going");                  // OR: "what's 7+8?", or anything
+```
+
+The cancelled run never wrote `onTurnComplete`. The snapshot is stale or absent. `session.out` has a half-written assistant message. `session.in` has the original user message (the run consumed it but never marked the turn complete) plus the new follow-up.
+
+A naive continuation would either re-run the cancelled essay (the user already chose to stop) or drop everything (no context for the follow-up). Recovery boot handles this without either failure mode.
+
+## The smart default
+
+On a continuation boot, the runtime reads:
+
+- **Snapshot** — settled turns persisted by the last successful `onTurnComplete`.
+- **`session.out` tail past the snapshot cursor** — closed assistant turns plus, optionally, a `partialAssistant` (the trailing message whose stream never received a `finish` chunk). `cleanupAbortedParts` has already stripped streaming-in-progress fragments.
+- **`session.in` tail past the last `turn-complete` cursor** — user messages the dead run hadn't acknowledged.
+
+If both `partialAssistant` and `inFlightUsers` are non-empty, the runtime splices `[firstInFlightUser, partialAssistant]` onto the chain. The remaining in-flight users dispatch as fresh turns. The model sees:
+
+```
+[ ...settledMessages,  // chain through the last completed turn
+  firstInFlightUser,   // the question the dead run was answering
+  partialAssistant,    // the dead run's incomplete response
+  followUpUser ]       // the new turn the customer just sent
+```
+
+Modern instruction-following models prioritize the latest user message. The follow-up determines the response:
+
+| Follow-up | Model behavior |
+|---|---|
+| "keep going" / "continue" / "more" | Continues the partial essay from where it stopped. |
+| "actually, what's 7+8?" | Answers the new question. Prior context doesn't derail it. |
+| "scrap that, do something else" | Abandons the partial work and follows the new direction. |
+
+No customer code needed for any of these.
+
+## When to register `onRecoveryBoot`
+
+The hook fires when recovery state is non-empty (either `partialAssistant` is defined or there's at least one in-flight user). Register it when you need a policy different from "preserve context":
+
+- **Drop the partial entirely.** Your UX means "cancel discards the work — start fresh from the follow-up."
+- **Synthesize tool results.** The partial has tool calls in `input-available` state (HITL was mid-call when the run died). Return a chain that has fabricated `output-available` results so the model can continue.
+- **Emit a recovery banner.** Write a `data-chat-recovery` UIMessage chunk via `ctx.writer` so the frontend can render "Recovering interrupted response..." before the model speaks.
+- **Persist recovered state.** Use `beforeBoot` to flush the partial to your own database before the next turn starts.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  onRecoveryBoot: async ({ partialAssistant, inFlightUsers, writer, cause, previousRunId }) => {
+    writer.write({
+      type: "data-chat-recovery",
+      data: { cause, previousRunId, partialPresent: partialAssistant !== undefined },
+      transient: true,
+    });
+    // Return nothing → fall through to smart default.
+  },
+  run: async ({ messages, signal }) =>
+    streamText({ model, messages, abortSignal: signal }),
+});
+```
+
+## Hook reference
+
+### Fires when
+
+The hook fires once on a continuation boot, AFTER both stream tails have been read, AND only when there's a partial assistant — the mid-stream-died signal:
+
+```ts
+const shouldFire = partialAssistant !== undefined;
+```
+
+In-flight users alone don't fire the hook. Graceful exits like `chat.requestUpgrade()` and `chat.endRun()` may leave an unacknowledged user on `session.in` (the message that triggered the upgrade, the next message after endRun), but no partial — that's a normal continuation, not recovery. The next message just dispatches as turn 1 on the new run via the normal session.in pump.
+
+Skipped scenarios (where the hook does NOT fire):
+
+- A clean continuation after `chat.endRun()` with no buffered follow-up.
+- A fresh chat (no continuation, attempt 1).
+- An OOM retry that booted onto a complete snapshot (no partial on the tail).
+- `chat.requestUpgrade()` graceful exit — predecessor ended cleanly before processing, no partial.
+- An agent with [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) registered. Customers using `hydrateMessages` own persistence — recovery decisions live in their own DB query.
+
+### Event shape
+
+```ts
+type RecoveryBootEvent<TUIM extends UIMessage = UIMessage> = {
+  ctx: TaskRunContext;
+  chatId: string;
+  runId: string;
+  previousRunId: string;
+  cause: "cancelled" | "crashed" | "unknown";
+  settledMessages: TUIM[];
+  inFlightUsers: TUIM[];
+  partialAssistant: TUIM | undefined;
+  pendingToolCalls: Array<{
+    toolCallId: string;
+    toolName: string;
+    input: unknown;
+    partIndex: number;
+  }>;
+  writer: ChatWriter;
+};
+```
+
+<Note>
+  `cause` is currently always `"unknown"` — the run engine doesn't yet plumb the
+  real reason into the continuation payload. The enum is forward-looking; don't
+  branch behavior on it for now.
+</Note>
+
+### Return shape
+
+Every field is optional. Returning `undefined` (or nothing) accepts the smart default for every field.
+
+```ts
+type RecoveryBootResult<TUIM extends UIMessage = UIMessage> = {
+  chain?: TUIM[];
+  recoveredTurns?: TUIM[];
+  beforeBoot?: () => Promise<void>;
+};
+```
+
+- **`chain`** — replaces the seed chain. Defaults to `[...settledMessages, firstInFlightUser, partialAssistant]` when both partial and in-flight users exist, otherwise `settledMessages` alone.
+- **`recoveredTurns`** — user messages to dispatch as fresh turns after the chain is restored. Defaults to `inFlightUsers.slice(1)` when the smart default consumed the first user, otherwise `inFlightUsers`.
+- **`beforeBoot`** — runs after the writer flushes and before the first recovered turn fires. Use for blocking persistence (write the partial to your DB so a later turn can reference it). Errors bubble — wrap your own try/catch if you want to soft-fail.
+
+## Examples
+
+### Drop the partial — strict "cancel means discard"
+
+The customer's UX treats cancel as "throw the work away":
+
+```ts
+onRecoveryBoot: async ({ inFlightUsers, partialAssistant }) => {
+  if (!partialAssistant) return;          // No partial → nothing to drop
+  return {
+    chain: undefined,                      // Use settledMessages, don't splice partial
+    recoveredTurns: inFlightUsers.slice(1) // Still skip the first user (the dead run was answering it)
+  };
+}
+```
+
+### Synthesize tool results for a mid-call interruption
+
+The dead run was processing a tool call when it died. The partial has tool parts in `input-available` state with no `output-available`. Synthesize a result so the model can keep going:
+
+```ts
+onRecoveryBoot: async ({ partialAssistant, pendingToolCalls, settledMessages, inFlightUsers }) => {
+  if (pendingToolCalls.length === 0) return;
+
+  // Rebuild the partial with synthetic outputs for any input-available tool call.
+  const repaired = {
+    ...partialAssistant!,
+    parts: partialAssistant!.parts!.map((part, i) => {
+      const pending = pendingToolCalls.find(p => p.partIndex === i);
+      if (!pending) return part;
+      return {
+        ...part,
+        state: "output-available" as const,
+        output: { interrupted: true, reason: "previous run was cancelled" },
+      };
+    }),
+  };
+
+  return {
+    chain: [...settledMessages, inFlightUsers[0]!, repaired],
+    recoveredTurns: inFlightUsers.slice(1),
+  };
+}
+```
+
+### Persist the partial before the next turn fires
+
+```ts
+onRecoveryBoot: async ({ chatId, partialAssistant }) => {
+  return {
+    beforeBoot: async () => {
+      if (partialAssistant) {
+        await db.partial.create({
+          data: { chatId, partialJson: JSON.stringify(partialAssistant) },
+        });
+      }
+    },
+  };
+}
+```
+
+## Interaction with other features
+
+### `hydrateMessages`
+
+If your agent registers [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages), the runtime skips snapshot read, `session.out` replay, `session.in` replay, AND `onRecoveryBoot`. Your DB is the source of truth — recovery decisions live in your own query. To detect a cancel-recovery scenario yourself, persist a `runState: "in-progress"` flag in `onTurnStart` and check for it in `hydrateMessages`.
+
+### `chat.requestUpgrade()`
+
+[`chat.requestUpgrade()`](/ai-chat/patterns/version-upgrades) is a graceful exit — the old run doesn't crash, it returns cleanly. The new continuation run boots with a clean `session.out` tail (`partialAssistant` is undefined) and the upgrade-trigger message on `session.in` (one in-flight user). The smart default doesn't splice (it requires both partial AND in-flight users), so the chain is just `settledMessages` and the in-flight user dispatches as a fresh turn. `onRecoveryBoot` still fires (there's an in-flight user) — use it to emit an "upgraded" signal to the UI if you want.
+
+### Hooks throwing
+
+If the body of `onRecoveryBoot` throws (or rejects), the runtime logs a warning and falls back to the smart default — the run does not fail. Wrap your own try/catch if you want stricter handling.
+
+`beforeBoot` is the exception: it's the contract you opted into for blocking persistence, so errors thrown there **bubble** and fail the run rather than dispatch recovered turns against half-persisted state. Wrap it yourself if you want to soft-fail.
+
+## See also
+
+- [OOM resilience](/ai-chat/patterns/oom-resilience) — `oomMachine` opt-in for automatic memory-driven recovery; uses the same recovery boot path.
+- [Persistence and replay](/ai-chat/patterns/persistence-and-replay) — the snapshot + dual-tail replay model that recovery boot sits on top of.
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks) — where `onRecoveryBoot` sits in the broader hook taxonomy.
diff --git a/docs/ai-chat/patterns/skills.mdx b/docs/ai-chat/patterns/skills.mdx
new file mode 100644
index 00000000000..23151399147
--- /dev/null
+++ b/docs/ai-chat/patterns/skills.mdx
@@ -0,0 +1,219 @@
+---
+title: "Agent Skills"
+sidebarTitle: "Agent Skills"
+description: "Ship reusable capabilities (folders with SKILL.md + scripts) that a chat agent discovers and invokes on demand."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+Agent skills are reusable capabilities you ship as folders — a `SKILL.md` describing when and how to use them, plus optional scripts, references, and assets. The chat agent sees a short description of each skill in its system prompt, loads the full instructions on demand via a `loadSkill` tool, and invokes the bundled scripts via `bash` — all without you wiring anything up manually.
+
+Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills). Works with any provider (OpenAI, Anthropic, Gemini, etc.) — not tied to Anthropic's server-side skills.
+
+## Why skills?
+
+Compared to regular AI SDK tools:
+
+- **Tools** are typed functions you pre-declare. Great when you know up-front exactly what capability the agent needs.
+- **Skills** are folders the model discovers and reads on demand. Great when the capability is a bundle of instructions + helper scripts that would be awkward to encode as a single tool.
+
+PDFs are the canonical example: you don't want to ask the LLM to parse PDF bytes inline. You want it to `bash scripts/extract.py report.pdf` using a bundled `pdfplumber` wrapper. A skill ships the script, the instructions, and any reference notes together.
+
+Dashboard-editable `SKILL.md` is on the roadmap so a platform team can tighten a skill's description or "when to use" text without a redeploy. Today, skills are SDK-only — defined in your task code and shipped with each deploy.
+
+## Trust model
+
+Skills are **developer-authored code**, not end-user-supplied. The same developer who writes the `chat.agent()` writes the skill bundle. The trust boundary is identical to any `tool.execute` handler the developer writes — scripts run directly in the Trigger.dev worker container, no sandboxing required.
+
+This makes skills different from the Claude Code / end-user model where arbitrary user-provided skills need isolation. Don't accept skill paths from untrusted input.
+
+## Skill folder layout
+
+A skill is a directory under your project (conventionally `trigger/skills/{id}/`):
+
+```
+trigger/skills/time-utils/
+├── SKILL.md              # Required — frontmatter + instructions
+├── scripts/
+│   ├── now.sh
+│   └── add.sh
+├── references/
+│   └── timezones.txt
+└── assets/               # Optional — templates, data files, etc.
+```
+
+### SKILL.md
+
+Frontmatter is YAML-subset — only `name` and `description` are required:
+
+```md
+---
+name: time-utils
+description: Compute and format dates/times in arbitrary timezones. Use when the user asks "what time is it", timezone conversions, or date math.
+---
+
+# Time utilities
+
+## When to use
+
+- The user asks for the current time in a timezone
+- The user wants date math ("3 days from now")
+
+## Scripts
+
+### `scripts/now.sh [TZ]`
+Prints the current time in the given IANA timezone (default `UTC`).
+
+### `scripts/add.sh DAYS [TZ]`
+Prints a date `DAYS` days from now.
+
+## Tips
+- IANA timezone names only (`America/New_York`, not `EST`).
+- See `references/timezones.txt` for a cheat-sheet.
+```
+
+The **description** is what the model sees in its system prompt — write it like you're explaining to the agent when to reach for the skill.
+
+The **body** is loaded on demand via the `loadSkill` tool when the agent decides to use the skill. Write it like documentation for the agent.
+
+## Defining and using a skill
+
+```ts trigger/chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { skills } from "@trigger.dev/sdk";
+import { streamText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+const timeUtilsSkill = skills.define({
+  id: "time-utils",
+  path: "./skills/time-utils",
+});
+
+export const agent = chat.agent({
+  id: "docs-chat",
+  onChatStart: async () => {
+    chat.skills.set([await timeUtilsSkill.local()]);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-5"),
+      messages,
+      abortSignal: signal,
+      ...chat.toStreamTextOptions(),
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+`skills.define({ id, path })` does two things:
+
+1. Registers the skill with the Trigger.dev build system so the CLI **automatically bundles the folder** into your deploy image at `/app/.trigger/skills/{id}/`. No `trigger.config.ts` changes, no build extension — it just works.
+2. Returns a `SkillHandle` you use at runtime.
+
+`skill.local()` reads the bundled `SKILL.md` from disk and returns a `ResolvedSkill` with the parsed frontmatter + body + on-disk path.
+
+`chat.skills.set([...])` stores the resolved skills for the current run. `chat.toStreamTextOptions()` spreads them into `streamText` automatically:
+
+- The frontmatter `description` lands in the system prompt under "Available skills:".
+- Three tools are added: `loadSkill`, `readFile`, `bash` — scoped per skill.
+
+## What gets auto-injected
+
+When you spread `chat.toStreamTextOptions()` with skills set, the AI SDK call receives three tools:
+
+### `loadSkill({ name })`
+
+Returns the full `SKILL.md` body for the named skill. The model calls this first when it decides a skill is relevant, to load the full instructions.
+
+### `readFile({ skill, path })`
+
+Reads a file inside the skill's bundled folder. Paths are relative to the skill's root and are rejected if they attempt to escape via `..` or absolute paths. Output is capped at 1 MB per call.
+
+Use for reference files and templates that the model should read literally:
+
+```
+readFile({ skill: "time-utils", path: "references/timezones.txt" })
+```
+
+### `bash({ skill, command })`
+
+Runs a bash command with `cwd` set to the skill's root. Stdout and stderr are captured and returned (each capped at 64 KB per call, with tail truncation). The turn's abort signal propagates — cancelling the run kills the child process.
+
+Use to invoke the skill's bundled scripts:
+
+```
+bash({ skill: "time-utils", command: "bash scripts/now.sh America/Los_Angeles" })
+```
+
+Script runtime expectations are yours to manage. If your skill uses `extract.py`, your deploy image needs Python — add it via your build config the same way you would for any other task dependency.
+
+## How discovery works in the model
+
+The model sees a short preamble appended to your system prompt:
+
+```
+Available skills (call `loadSkill` to read the full instructions before using one):
+- time-utils: Compute and format dates/times in arbitrary timezones...
+- pdf-processing: Extract text from PDFs, fill forms...
+```
+
+When the user asks something that matches a description, the model calls `loadSkill({ name: "time-utils" })` to load the body, then follows the body's instructions — typically by calling `bash` or `readFile` on the bundled scripts.
+
+This is **progressive disclosure**: each skill costs ~100 tokens up front (its one-line description), and only the ones the model actually uses pay the full context cost.
+
+## Mixing skills with custom tools
+
+If you also define your own AI SDK tools, pass them through `chat.toStreamTextOptions()` so the merge is explicit:
+
+```ts
+return streamText({
+  model: anthropic("claude-sonnet-4-5"),
+  messages,
+  abortSignal: signal,
+  ...chat.toStreamTextOptions({
+    tools: {
+      webFetch,       // your tool
+      deepResearch,   // your tool
+    },
+  }),
+  stopWhen: stepCountIs(15),
+});
+```
+
+Your tools win on name conflicts. (Pick names that don't collide with `loadSkill` / `readFile` / `bash` to keep things predictable.)
+
+## Bundling
+
+Bundling is **built-in to the CLI** — there's no extension to import. When you run `trigger deploy` or `trigger dev`:
+
+1. esbuild bundles your task code as usual.
+2. The CLI forks the indexer locally against the bundled output, collects every `skills.define({ path })` registration.
+3. Each skill's folder is copied to `{outputPath}/.trigger/skills/{id}/` via a recursive copy.
+4. The existing Dockerfile `COPY` picks up `.trigger/skills/` along with the rest of the bundle — no Dockerfile changes.
+
+If you're running `trigger dev`, the same layout appears in the local dev output directory, so `skill.local()` works the same way.
+
+## Path scoping rules
+
+- `skill.path` always resolves to `${process.cwd()}/.trigger/skills/{id}/` at runtime. Don't hardcode paths elsewhere.
+- `readFile` rejects `..` segments and absolute paths — the tool only exposes files inside the skill's own directory.
+- `bash` runs with `cwd` set to the skill's root. Inside the script, relative paths resolve against the skill directory.
+- Cross-skill access isn't provided — each skill is isolated by design. If two skills need to share data, either duplicate the shared file or consolidate the skills.
+
+## Current limitations
+
+- `skill.resolve()` (backend-managed overrides) is not available yet — use `.local()` for now. Dashboard-editable `SKILL.md` is on the roadmap.
+- No per-skill metrics in the dashboard yet.
+- No Anthropic `/v1/skills` integration — use the portable path today; we're tracking the Anthropic optimization separately.
+
+## Full example
+
+See `references/ai-chat/src/trigger/skills/time-utils/` in the Trigger.dev monorepo for a working skill that bundles two bash scripts and a reference cheat-sheet, wired into a `chat.agent` that answers timezone questions.
+
+## Related
+
+- [AI SDK cookbook — Agent Skills](https://ai-sdk.dev/cookbook/guides/agent-skills) — the userland pattern we build on
+- [Anthropic Agent Skills](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview) — Anthropic's codified version (server-side, optional future integration)
diff --git a/docs/ai-chat/patterns/sub-agents.mdx b/docs/ai-chat/patterns/sub-agents.mdx
new file mode 100644
index 00000000000..6fd85d8cd6e
--- /dev/null
+++ b/docs/ai-chat/patterns/sub-agents.mdx
@@ -0,0 +1,376 @@
+---
+title: "Sub-Agents"
+sidebarTitle: "Sub-Agents"
+description: "Delegate work to durable sub-agents from within a parent agent's tool calls, with streaming preliminary results."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+Sub-agents let a parent agent delegate work to other agents running as durable Trigger.dev tasks. The sub-agent's response streams back through the parent as preliminary tool results, so the frontend sees the sub-agent working inside the parent's tool call card.
+
+This builds on the AI SDK's [async generator tool pattern](https://ai-sdk.dev/docs/agents/subagents) and Trigger.dev's [AgentChat](/ai-chat/server-chat) for server-side agent interaction.
+
+## How it works
+
+1. The parent LLM calls a tool (e.g., `researchAgent`)
+2. The tool's `execute` is an `async function*` (async generator)
+3. Inside, it creates an `AgentChat` and sends a message to the sub-agent
+4. `yield* stream.messages()` streams each accumulated `UIMessage` snapshot as a preliminary tool result
+5. The frontend renders the sub-agent's response building up inside the parent's tool card
+6. `toModelOutput` compresses the full output into a summary for the parent LLM
+
+```
+Parent LLM
+  │
+  ├─ calls researchAgent tool
+  │    │
+  │    ├─ AgentChat triggers sub-agent run
+  │    ├─ sub-agent streams response (text, tool calls, etc.)
+  │    ├─ yield* sends UIMessage snapshots as preliminary results
+  │    └─ toModelOutput compresses for parent LLM
+  │
+  └─ parent LLM reads compressed summary, continues reasoning
+```
+
+## Single-turn sub-agent
+
+The simplest pattern: one tool call, one sub-agent turn, conversation closes.
+
+```ts
+import { tool, stepCountIs } from "ai";
+import { AgentChat } from "@trigger.dev/sdk/chat";
+import { z } from "zod";
+import type { prReviewAgent } from "./trigger/pr-review";
+
+const prReviewTool = tool({
+  description: "Delegate a PR review to the PR review agent.",
+  inputSchema: z.object({
+    prNumber: z.number().describe("The PR number to review"),
+    repo: z.string().describe("The GitHub repo URL"),
+  }),
+  execute: async function* ({ prNumber, repo }, { abortSignal }) {
+    const chat = new AgentChat<typeof prReviewAgent>({
+      agent: "pr-review",
+      id: `review-${prNumber}`,
+      clientData: { userId: "parent-agent", githubUrl: repo },
+    });
+
+    const stream = await chat.sendMessage(`Review PR #${prNumber}`, { abortSignal });
+
+    // Each yield sends a UIMessage snapshot to the frontend
+    yield* stream.messages();
+
+    await chat.close();
+  },
+  // The parent LLM only sees this compressed summary
+  toModelOutput: ({ output: message }) => {
+    const lastText = message?.parts?.findLast(
+      (p: { type: string }) => p.type === "text"
+    ) as { text?: string } | undefined;
+    return { type: "text", value: lastText?.text ?? "Review complete." };
+  },
+});
+```
+
+Use this tool in a parent agent's `streamText` call:
+
+```ts
+import { streamText } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+const result = streamText({
+  model: anthropic("claude-sonnet-4-6"),
+  tools: { prReview: prReviewTool },
+  prompt: "Review PR #42 on triggerdotdev/trigger.dev",
+  stopWhen: stepCountIs(15),
+});
+```
+
+## Multi-turn sub-agent (LLM-driven)
+
+The parent LLM drives a persistent conversation with a sub-agent across multiple tool calls. Each call with the same `conversationId` hits the same durable agent run.
+
+```ts
+import { tool } from "ai";
+import { AgentChat } from "@trigger.dev/sdk/chat";
+import { z } from "zod";
+
+// Track active sub-agent conversations
+const subAgents = new Map<string, AgentChat>();
+
+const researchTool = tool({
+  description:
+    "Talk to a research agent. Use the same conversationId to continue " +
+    "an existing conversation — the agent remembers full context.",
+  inputSchema: z.object({
+    conversationId: z
+      .string()
+      .describe("Unique ID for this research thread. Reuse to continue."),
+    message: z.string().describe("Your message to the research agent"),
+  }),
+  execute: async function* ({ conversationId, message }, { abortSignal }) {
+    let agent = subAgents.get(conversationId);
+    if (!agent) {
+      agent = new AgentChat({
+        agent: "research-agent",
+        id: conversationId,
+      });
+      subAgents.set(conversationId, agent);
+    }
+
+    const stream = await agent.sendMessage(message, { abortSignal });
+    yield* stream.messages();
+  },
+  toModelOutput: ({ output: message }) => {
+    const lastText = message?.parts?.findLast(
+      (p: { type: string }) => p.type === "text"
+    ) as { text?: string } | undefined;
+    return { type: "text", value: lastText?.text ?? "Done." };
+  },
+});
+```
+
+The parent LLM naturally calls this tool multiple times:
+
+1. `researchAgent({ conversationId: "competitors", message: "Research competitors in AI agents" })` — first call triggers a new sub-agent run
+2. `researchAgent({ conversationId: "competitors", message: "Go deeper on pricing" })` — same run, sub-agent has full context
+3. `researchAgent({ conversationId: "new-topic", message: "..." })` — different ID = different sub-agent
+
+### Cross-turn persistence
+
+Sub-agent conversations persist across **parent turns** because the `Map` lives in the parent's process heap. When the parent suspends and restores via snapshot, the heap is preserved — the Map still has the conversations, the sessions still have the run IDs.
+
+```ts
+export const orchestrator = chat
+  .withClientData({ schema: z.object({ userId: z.string() }) })
+  .customAgent({
+    id: "orchestrator",
+    run: async (payload, { signal: runSignal }) => {
+      // These survive across parent turns via snapshot/restore
+      const subAgents = new Map<string, AgentChat>();
+
+      const researchTool = tool({
+        // ... closes over subAgents Map
+      });
+
+      // Turn loop — subAgents persist across all turns
+      for (let turn = 0; turn < 50; turn++) {
+        // ... streamText with researchTool
+      }
+
+      // Cleanup when parent exits
+      await Promise.all(
+        Array.from(subAgents.values()).map((a) => a.close().catch(() => {}))
+      );
+    },
+  });
+```
+
+## How sub-agents clean up
+
+Sub-agents clean up through three mechanisms:
+
+1. **Explicit close**: Call `chat.close()` or `agent.close()` when done
+2. **Idle timeout**: The sub-agent's idle timeout expires, it suspends
+3. **Suspend timeout**: The sub-agent's suspend timeout expires, the run ends
+
+For the multi-turn pattern, the parent should clean up sub-agents when it exits (in `onComplete` for managed agents, or at the end of the loop for custom agents). Without explicit cleanup, sub-agents close on their own via timeouts — no leaked resources or cost while suspended.
+
+## What the frontend sees
+
+Each `yield` from `stream.messages()` sends a complete `UIMessage` containing all the sub-agent's parts accumulated so far. The AI SDK delivers these as `tool-output-available` chunks with `preliminary: true`.
+
+The frontend renders the tool part with:
+- `state: "output-available"` and `preliminary: true` while streaming
+- `state: "output-available"` and `preliminary: false` (or absent) when done
+
+The tool output contains the full `UIMessage` with nested parts — text, the sub-agent's own tool calls and results, reasoning, etc.
+
+### Controlling what the parent LLM sees
+
+`toModelOutput` transforms the tool's output before it enters the parent LLM's context. The full UIMessage streams to the frontend, but the model only sees the compressed version:
+
+```ts
+toModelOutput: ({ output: message }) => {
+  // Extract just the final text — the model doesn't need
+  // to see all the sub-agent's tool calls and intermediate work
+  const lastText = message?.parts?.findLast(
+    (p: { type: string }) => p.type === "text"
+  ) as { text?: string } | undefined;
+  return { type: "text", value: lastText?.text ?? "Done." };
+},
+```
+
+This is important for token efficiency: the sub-agent might use 100K tokens exploring and reasoning, but the parent LLM only consumes the summary.
+
+## ChatStream.messages()
+
+The `messages()` method on `ChatStream` wraps the AI SDK's `readUIMessageStream`. It reads the raw `UIMessageChunk` stream and yields complete `UIMessage` snapshots — each containing all parts received so far.
+
+```ts
+const stream = await chat.sendMessage("Research this topic");
+
+// Each yield is a complete UIMessage with all accumulated parts
+for await (const message of stream.messages()) {
+  console.log(message.parts.length, "parts so far");
+}
+```
+
+For the sub-agent pattern, use `yield*` to delegate all yields to the parent tool's generator:
+
+```ts
+execute: async function* ({ topic }, { abortSignal }) {
+  const stream = await chat.sendMessage(topic, { abortSignal });
+  yield* stream.messages();
+},
+```
+
+<Tip>
+  `stream.messages()` consumes the stream. You can't also call `stream.text()` or iterate over chunks on the same stream. Pick one consumption mode.
+</Tip>
+
+## Combining with chat.agent()
+
+Sub-agent tools work inside both `chat.agent()` (managed) and `chat.customAgent()` (manual lifecycle):
+
+```ts
+// Managed agent with sub-agent tool
+export const myAgent = chat.agent({
+  id: "orchestrator",
+  run: async ({ messages, stopSignal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-6"),
+      messages,
+      tools: { research: researchTool },
+      abortSignal: stopSignal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+For `chat.customAgent()`, define the tool and sub-agent Map inside the `run` closure so they survive across turns.
+
+## Streaming progress from a subtask to the parent chat
+
+When a tool invokes a subtask via `triggerAndWait`, the subtask can stream custom data parts directly to the parent chat using `chat.stream.writer({ target: "root" })`. The frontend receives these as `DataUIPart` objects in `message.parts` on the **parent's** message stream:
+
+```ts
+import { chat, ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { streamText, tool, generateId } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+
+export const researchTask = schemaTask({
+  id: "research",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    const partId = generateId();
+
+    // Stream a data-* chunk to the root run's chat stream.
+    const { waitUntilComplete } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-research-status",
+          id: partId,
+          data: { query, status: "in-progress" },
+        });
+      },
+    });
+    await waitUntilComplete();
+
+    const result = await doResearch(query);
+
+    // Update the same part with the final status — same type + id replaces it.
+    const { waitUntilComplete: waitDone } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-research-status",
+          id: partId,
+          data: { query, status: "done", resultCount: result.length },
+        });
+      },
+    });
+    await waitDone();
+
+    return result;
+  },
+});
+
+const research = tool({
+  description: researchTask.description ?? "",
+  inputSchema: researchTask.schema!,
+  execute: ai.toolExecute(researchTask),
+});
+```
+
+On the frontend, render the custom data part:
+
+```tsx
+{message.parts.map((part, i) => {
+  if (part.type === "data-research-status") {
+    const { query, status, resultCount } = part.data;
+    return (
+      <div key={i}>
+        {status === "done" ? `Found ${resultCount} results` : `Researching "${query}"...`}
+      </div>
+    );
+  }
+  // ...other part types
+})}
+```
+
+The `target` option accepts:
+- `"self"` — current run (default)
+- `"parent"` — parent task's run
+- `"root"` — root task's run (the chat agent)
+- A specific run ID string
+
+## Inside `ai.toolExecute`: accessing tool + chat context
+
+When a subtask runs via `execute: ai.toolExecute(task)`, it can read the parent's tool call ID and chat context from inside the subtask body:
+
+```ts
+import { ai, chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "./chat";
+
+export const mySubtask = schemaTask({
+  id: "my-subtask",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    // The AI SDK tool call ID — useful as a stable `data-*` chunk id
+    const toolCallId = ai.toolCallId();
+
+    // Typed chat context — `clientData` is typed off your chat's `clientDataSchema`
+    const { chatId, clientData } = ai.chatContextOrThrow<typeof myChat>();
+
+    const { waitUntilComplete } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-progress",
+          id: toolCallId,
+          data: { status: "working", query, userId: clientData?.userId },
+        });
+      },
+    });
+    await waitUntilComplete();
+
+    return { result: "done" };
+  },
+});
+```
+
+| Helper | Returns | Description |
+|--------|---------|-------------|
+| `ai.toolCallId()` | `string \| undefined` | The AI SDK tool call ID |
+| `ai.chatContext<typeof myChat>()` | `{ chatId, turn, continuation, clientData } \| undefined` | Chat context with typed `clientData`. Returns `undefined` if not in a chat context. |
+| `ai.chatContextOrThrow<typeof myChat>()` | `{ chatId, turn, continuation, clientData }` | Same as above but throws if not in a chat context |
+| `ai.currentToolOptions()` | `ToolCallExecutionOptions \| undefined` | Full tool execution options |
+
+The subtask body also has read-only access to any [`chat.local`](/ai-chat/chat-local) values initialized in the parent — auto-hydrated from the parent's metadata on first access.
diff --git a/docs/ai-chat/patterns/tool-result-auditing.mdx b/docs/ai-chat/patterns/tool-result-auditing.mdx
new file mode 100644
index 00000000000..c79da0e6808
--- /dev/null
+++ b/docs/ai-chat/patterns/tool-result-auditing.mdx
@@ -0,0 +1,149 @@
+---
+title: "Tool result auditing"
+sidebarTitle: "Tool result auditing"
+description: "Fire side effects exactly once per resolved tool call — audit logs, billing, notifications — using extractNewToolResults inside hydrateMessages or onTurnComplete."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+When a chat agent uses tools (especially [human-in-the-loop](/ai-chat/patterns/human-in-the-loop) tools that wait on `addToolOutput` from the frontend), you often need to fire side effects exactly once per resolved tool call:
+
+- **Audit logs** — record every tool result for compliance.
+- **Billing** — charge per tool invocation.
+- **Notifications** — alert downstream systems when a specific tool resolves.
+- **Search-index updates** — reflect tool outputs into a derived store.
+
+The naive approach — "log every tool part you see" — over-counts. The same assistant message gets re-shown across re-renders, replays, and retries. You want a function of the form **"is this tool result one I haven't already logged?"** That's exactly what [`chat.history.extractNewToolResults`](/ai-chat/backend#chat-history) returns.
+
+## The pattern
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { auditLog } from "@/lib/audit";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  hydrateMessages: async ({ chatId, incomingMessages }) => {
+    for (const msg of incomingMessages) {
+      for (const r of chat.history.extractNewToolResults(msg)) {
+        await auditLog.record({
+          chatId,
+          toolCallId: r.toolCallId,
+          toolName: r.toolName,
+          output: r.output,
+          errorText: r.errorText,
+        });
+      }
+    }
+    return await db.getMessages(chatId);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+The hook fires per turn. `incomingMessages` is the new wire message (0-or-1-length, see [v4.5 wire format change](/ai-chat/upgrade-guide#v45-wire-format-change)). For each new tool result on that message, write one audit row. Then return the canonical chain from your DB.
+
+`extractNewToolResults` compares the message against the current `chat.history` chain and returns only tool parts whose `toolCallId` is **not** already resolved. That's what makes the call exactly-once:
+
+- A re-emitted message (same id, same toolCallId) returns `[]` — no duplicate log.
+- A genuinely new tool result on a known assistant message returns just the new ones.
+- A first-time tool result returns the full set.
+
+## Why `hydrateMessages` is the right hook
+
+The pattern works in any pre-merge callback, but `hydrateMessages` is the canonical spot for two reasons:
+
+1. **It fires before the runtime merges** the incoming message into the accumulator. Once merged, the tool results are already on the chain, and `extractNewToolResults` returns `[]` for them.
+2. **It always fires per turn** — including HITL turns where the user resolved a tool with `addToolOutput`, which is the highest-volume audit event in most apps.
+
+By the time `onTurnComplete` fires, the chain already contains `responseMessage`, so calling `extractNewToolResults(responseMessage)` there returns `[]`. Don't put audit logging there for the resolution path.
+
+## Without `hydrateMessages` — `onTurnComplete` for self-emitted tool calls
+
+If you don't use `hydrateMessages`, the runtime's snapshot+replay path handles persistence. You can still audit the agent's **own** tool executions in `onTurnComplete` — but compare against the prior message rather than the just-emitted one:
+
+```ts
+onTurnComplete: async ({ chatId, newUIMessages }) => {
+  // The assistant message from this turn is in newUIMessages.
+  for (const msg of newUIMessages) {
+    if (msg.role !== "assistant") continue;
+    for (const part of msg.parts) {
+      if (
+        typeof part.type === "string" &&
+        part.type.startsWith("tool-") &&
+        ((part as any).state === "output-available" ||
+         (part as any).state === "output-error")
+      ) {
+        await auditLog.record({
+          chatId,
+          toolCallId: (part as any).toolCallId,
+          toolName: (part as any).type.slice("tool-".length),
+          output: (part as any).output,
+          errorText: (part as any).errorText,
+        });
+      }
+    }
+  }
+},
+```
+
+`newUIMessages` is just the messages this turn produced — no prior-chain noise. Each tool part shows up exactly once.
+
+This works for tools the agent itself calls (no HITL pause). For HITL flows where the user resolves a tool with `addToolOutput`, the resolution arrives on the **next** turn's wire message, not in `newUIMessages` of the resolving turn — use `hydrateMessages` for those.
+
+## Idempotency at the storage layer
+
+Even with `extractNewToolResults`, transient failures (e.g. an audit-log POST that times out and is retried) can produce duplicates. Make the audit-log writer idempotent on `toolCallId`:
+
+```ts
+await auditLog.upsert({
+  where: { toolCallId: r.toolCallId },
+  create: { /* ... */ },
+  update: { /* timestamp, retry count, etc. */ },
+});
+```
+
+`toolCallId` is unique per tool invocation (assigned by the AI SDK when the model emits the tool call) and stable across retries — perfect for an idempotency key.
+
+## What `extractNewToolResults` returns
+
+```ts
+type ExtractedToolResult = {
+  toolCallId: string;
+  toolName: string;
+  input: unknown;       // The arguments the model passed when calling the tool
+  output?: unknown;     // The tool's return value (output-available state)
+  errorText?: string;   // Error message (output-error state)
+};
+```
+
+Tool parts in `input-available` state (the model called the tool but it hasn't resolved yet) are not returned — only **resolved** results count.
+
+## Combining with HITL
+
+[Human-in-the-loop](/ai-chat/patterns/human-in-the-loop) tools pause the turn waiting for `addToolOutput` from the frontend. When the user submits, the wire message carries an updated assistant message with the tool now in `output-available` state. `extractNewToolResults` against that message returns the just-resolved tool — exactly one audit row per user resolution:
+
+```ts
+hydrateMessages: async ({ chatId, incomingMessages }) => {
+  for (const msg of incomingMessages) {
+    for (const r of chat.history.extractNewToolResults(msg)) {
+      // Fires once per ask_user / approval / similar resolution
+      await auditLog.record({ chatId, /* ... */ });
+    }
+  }
+  return await db.getMessages(chatId);
+}
+```
+
+This is the original motivator for the helper — see the [HITL pattern's net-new-tool-result section](/ai-chat/patterns/human-in-the-loop#acting-once-per-net-new-tool-result).
+
+## See also
+
+- [`chat.history`](/ai-chat/backend#chat-history) — full reference for `extractNewToolResults`, `getPendingToolCalls`, `getResolvedToolCalls`
+- [Human-in-the-loop](/ai-chat/patterns/human-in-the-loop) — the pattern this auditing hook complements
+- [`hydrateMessages`](/ai-chat/lifecycle-hooks#hydratemessages) — where pre-merge auditing lives
+- [Persistence and replay](/ai-chat/patterns/persistence-and-replay) — how the runtime rebuilds chains, and why `extractNewToolResults` works against them
diff --git a/docs/ai-chat/patterns/trusted-edge-signals.mdx b/docs/ai-chat/patterns/trusted-edge-signals.mdx
new file mode 100644
index 00000000000..1dd5f97d3f5
--- /dev/null
+++ b/docs/ai-chat/patterns/trusted-edge-signals.mdx
@@ -0,0 +1,337 @@
+---
+title: "Trusted edge signals"
+sidebarTitle: "Trusted edge signals"
+description: "How to safely deliver server-trusted signals (bot scores, JA4, ASN, ReCAPTCHA verdicts) to a chat.agent run via an edge proxy."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+A common need for chat-style endpoints is to drive agent behavior from **server-trusted signals** that the browser cannot be allowed to declare itself — bot management scores, JA4 fingerprints, ASN, ReCAPTCHA verdicts, or any other anti-abuse data only the edge can see. The agent's [`clientData`](/ai-chat/reference#withclientdata) channel is the right delivery mechanism, but `clientData` set in the browser is by definition spoofable. The fix is to move the value population out of the browser and into a trusted edge proxy.
+
+This page documents the pattern using Cloudflare Workers as the proxy. The same shape applies to any edge layer (custom reverse proxy, Vercel Edge Middleware, AWS Lambda@Edge) — the trust comes from the deployment topology, not from Trigger.dev validating the source.
+
+## Why headers don't work
+
+It's tempting to ask whether `POST /realtime/v1/sessions/{id}/in/append` could carry the signal as an HTTP header. It cannot. The realtime route reads only `Authorization` and `X-Part-Id`; the remaining headers are dropped at the route boundary and the body is persisted to the durable stream as opaque bytes. There is no `headers → run payload` channel.
+
+The trigger.dev wire payload, on the other hand, has a typed per-turn metadata channel ([`ChatTaskWirePayload.metadata`](/ai-chat/client-protocol#chattaskwirepayload)). It already flows from the wire into [`clientData`](/ai-chat/reference#withclientdata) on every hook (`onBoot`, `onChatStart`, `onTurnStart`, `run`, `onTurnComplete`). That field is where signals must land.
+
+## The trust boundary
+
+The pattern has one architectural requirement and one wire-shape convention.
+
+**Topology**: the browser must not be able to reach `trigger.dev` directly. All four chat-related requests (`POST /api/v1/sessions`, `GET /realtime/v1/sessions/{id}/out`, `POST /realtime/v1/sessions/{id}/in/append`, `POST /api/v1/auth/jwt/claims`) flow through your edge proxy. The proxy holds the trust; trigger.dev simply persists whatever the proxy writes.
+
+**Namespace**: pick a key your edge proxy owns exclusively — e.g. `__cf`, `__edge`, `__trust`. The proxy **strips** anything in that key on the way in and **injects** its own value on every request. Nothing else in your system should write that key. This is the convention that converts deployment topology into a guarantee the agent can rely on.
+
+```mermaid
+sequenceDiagram
+  participant Browser
+  participant Edge as Edge Proxy (CF Worker)
+  participant Trigger as trigger.dev API
+  participant Agent as chat.agent run
+
+  Browser->>Edge: POST /api/v1/sessions { triggerConfig.basePayload.metadata: {...} }
+  Edge->>Edge: strip body.triggerConfig.basePayload.metadata.__cf<br/>inject body.triggerConfig.basePayload.metadata.__cf = { botScore, ja4, asn }
+  Edge->>Trigger: POST /api/v1/sessions (rewritten body)
+  Trigger-->>Agent: run boots with payload.metadata.__cf
+  Browser->>Edge: POST /realtime/v1/sessions/{id}/in/append { kind: "message", payload: {...} }
+  Edge->>Edge: strip payload.metadata.__cf<br/>inject payload.metadata.__cf
+  Edge->>Trigger: POST /in/append (rewritten body)
+  Trigger-->>Agent: chat.messages.wait() resolves with payload.metadata.__cf
+```
+
+## Wire payload — the two endpoints to rewrite
+
+The signal needs to land in **two** places. Both bodies are JSON; the edge proxy parses, mutates the namespaced key, and re-serializes.
+
+### `POST /api/v1/sessions` — session create
+
+The browser's session-create call carries the first-turn metadata under `triggerConfig.basePayload.metadata`. The proxy mutates that:
+
+```ts
+// Before
+{
+  "type": "chat.agent",
+  "externalId": "conv-123",
+  "taskIdentifier": "my-agent",
+  "triggerConfig": {
+    "basePayload": {
+      "chatId": "conv-123",
+      "trigger": "preload",
+      "metadata": { "userId": "user-456" }
+    }
+  }
+}
+
+// After
+{
+  "type": "chat.agent",
+  "externalId": "conv-123",
+  "taskIdentifier": "my-agent",
+  "triggerConfig": {
+    "basePayload": {
+      "chatId": "conv-123",
+      "trigger": "preload",
+      "metadata": {
+        "userId": "user-456",
+        "__cf": { "botScore": 95, "ja4": "...", "asn": 13335, "country": "US" }
+      }
+    }
+  }
+}
+```
+
+### `POST /realtime/v1/sessions/{id}/in/append` — every follow-up turn
+
+The body is a JSON-serialized `ChatInputChunk`. The proxy parses it, checks `kind === "message"`, and mutates `payload.metadata`:
+
+```ts
+// Before
+{
+  "kind": "message",
+  "payload": {
+    "message": { "id": "u-2", "role": "user", "parts": [{ "type": "text", "text": "..." }] },
+    "chatId": "conv-123",
+    "trigger": "submit-message",
+    "metadata": { "userId": "user-456" }
+  }
+}
+
+// After
+{
+  "kind": "message",
+  "payload": {
+    "message": { ... },
+    "chatId": "conv-123",
+    "trigger": "submit-message",
+    "metadata": {
+      "userId": "user-456",
+      "__cf": { "botScore": 95, "ja4": "...", "asn": 13335, "country": "US" }
+    }
+  }
+}
+```
+
+Both bodies stay well under the [512 KiB cap on `/in/append`](/ai-chat/client-protocol#step-3-send-messages-stops-and-actions) — a typical trust object is ~200 bytes.
+
+Other paths — `.out` SSE, `/api/v1/auth/jwt/claims`, anything else — pass through the proxy untouched. The SSE stream in particular must not be buffered; preserve the response body as-is.
+
+## Cloudflare Worker reference implementation
+
+A complete worker that proxies all paths to `TRIGGER_API_UPSTREAM` and injects `__cf` on the two body-write endpoints:
+
+```ts
+export interface Env {
+  TRIGGER_API_UPSTREAM: string; // e.g. "https://api.trigger.dev"
+}
+
+type CfTrustData = {
+  botScore: number;
+  ja4: string;
+  asn: number;
+  country: string;
+};
+
+function readCfTrustData(request: Request): CfTrustData {
+  const cf = (request as Request & { cf?: Record<string, unknown> }).cf;
+  const bm = cf?.botManagement as Record<string, unknown> | undefined;
+  return {
+    botScore: (bm?.score as number) ?? 0,
+    ja4: (bm?.ja4 as string) ?? "",
+    asn: (cf?.asn as number) ?? 0,
+    country: (cf?.country as string) ?? "",
+  };
+}
+
+function injectCf(metadata: Record<string, unknown> | undefined, cf: CfTrustData) {
+  // Strip anything the client tried to send under our namespace,
+  // then inject the edge-trusted value. Topology + convention =
+  // trust.
+  const stripped = { ...(metadata ?? {}) };
+  delete stripped.__cf;
+  return { ...stripped, __cf: cf };
+}
+
+function rewriteSessionsCreate(body: string, cf: CfTrustData) {
+  const parsed = JSON.parse(body) as Record<string, unknown>;
+  const tc = (parsed.triggerConfig as Record<string, unknown>) ?? {};
+  const bp = (tc.basePayload as Record<string, unknown>) ?? {};
+  parsed.triggerConfig = {
+    ...tc,
+    basePayload: { ...bp, metadata: injectCf(bp.metadata as Record<string, unknown>, cf) },
+  };
+  return JSON.stringify(parsed);
+}
+
+function rewriteAppend(body: string, cf: CfTrustData) {
+  let parsed: Record<string, unknown>;
+  try {
+    parsed = JSON.parse(body);
+  } catch {
+    return body;
+  }
+  if (parsed.kind !== "message") return body;
+  const payload = (parsed.payload as Record<string, unknown>) ?? {};
+  parsed.payload = { ...payload, metadata: injectCf(payload.metadata as Record<string, unknown>, cf) };
+  return JSON.stringify(parsed);
+}
+
+export default {
+  async fetch(request: Request, env: Env): Promise<Response> {
+    const incoming = new URL(request.url);
+    const target = new URL(incoming.pathname + incoming.search, env.TRIGGER_API_UPSTREAM);
+    const cf = readCfTrustData(request);
+
+    const isSessionsCreate =
+      request.method === "POST" && incoming.pathname === "/api/v1/sessions";
+    const isAppend =
+      request.method === "POST" &&
+      /^\/realtime\/v1\/sessions\/[^/]+\/in\/append$/.test(incoming.pathname);
+
+    let body: BodyInit | null = null;
+    if (request.method !== "GET" && request.method !== "HEAD") {
+      const raw = await request.text();
+      if (isSessionsCreate && raw) body = rewriteSessionsCreate(raw, cf);
+      else if (isAppend && raw) body = rewriteAppend(raw, cf);
+      else body = raw;
+    }
+
+    const headers = new Headers(request.headers);
+    headers.delete("host");
+    headers.delete("content-length");
+
+    return fetch(target.toString(), {
+      method: request.method,
+      headers,
+      body,
+      redirect: "manual",
+    });
+  },
+};
+```
+
+Browser-only deployments also need CORS on the worker — echo `Access-Control-Request-Headers` on preflight and set `Access-Control-Allow-Origin` to your frontend origin. The trigger.dev route itself allows all origins, but the worker becomes the visible cross-origin endpoint to the browser.
+
+### Streaming and latency
+
+The SDK's `baseURL` accepts a function (see [Browser transport configuration](#browser-transport-configuration)), so the recommended setup routes `.in/append` and session-create through the worker but lets `.out` SSE go direct to `api.trigger.dev`. Body-mutation only happens on the POST paths; the SSE stream is read-only, doesn't need rewriting, and routing it direct saves an edge hop on every reconnect.
+
+If you do route `.out` through the proxy (e.g. you want a single origin in front of `api.trigger.dev` and don't care about the extra hop), the template above handles it correctly because the worker returns `response.body` as a `ReadableStream`. **Do not replace that with `await response.text()`** anywhere in your fork; doing so converts the streaming SSE response into a buffered read and breaks per-chunk delivery.
+
+[Cloudflare Workers HTTP requests](https://developers.cloudflare.com/workers/platform/limits/) have no wall-clock duration limit while the client stays connected — the 60-second long-poll runs to completion on every plan, including Free. CPU-time limits (10 ms on Free, 30 s default on Paid) only apply to active computation; relaying bytes through `fetch` doesn't burn CPU. The two body-rewrite paths use sub-millisecond CPU for typical message sizes, well under either ceiling.
+
+Network-wise the proxy adds one edge hop: roughly 10–50 ms per request round trip versus talking to `api.trigger.dev` directly. Routing SSE direct via the function-form `baseURL` eliminates that hop on the long-lived path.
+
+## Agent side — declare the namespace in `clientDataSchema`
+
+Mirror the namespace in the agent so every turn lands typed:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { z } from "zod";
+
+export const myAgent = chat
+  .withClientData({
+    schema: z.object({
+      userId: z.string(),
+      __cf: z.object({
+        botScore: z.number(),
+        ja4: z.string(),
+        asn: z.number(),
+        country: z.string(),
+      }),
+    }),
+  })
+  .agent({
+    id: "my-agent",
+    run: async ({ messages, clientData, signal }) => {
+      // Score-based routing. The values arrive from the edge proxy.
+      if (clientData.__cf.botScore < 30) {
+        return streamText({
+          model: anthropic("claude-haiku-4-5"),
+          messages: [{ role: "system", content: "Reject politely; do not engage." }],
+          abortSignal: signal,
+          stopWhen: stepCountIs(15),
+        });
+      }
+
+      return streamText({
+        model: anthropic("claude-sonnet-4-5"),
+        messages,
+        abortSignal: signal,
+        // ...
+        stopWhen: stepCountIs(15),
+      });
+    },
+  });
+```
+
+Because the schema requires `__cf` on every turn, any request that *doesn't* go through the proxy fails at the agent boundary — the turn produces a `[ERROR]` span on the trace and an empty `turn-complete` on the wire (see [the client protocol error-detection note](/ai-chat/client-protocol#step-3-send-messages-stops-and-actions)). That gives you a server-side enforcement check for "did this request actually come through the trusted path?"
+
+## Browser transport configuration
+
+Point the `TriggerChatTransport` at the worker, not at `api.trigger.dev`:
+
+`baseURL` accepts a function so you can route `.in/append` through the worker while keeping `.out` SSE direct to `api.trigger.dev`. The append path is where the body-mutation matters; the SSE stream is a read-only one-way channel that doesn't need to be proxied. Routing it direct saves an edge hop on every long-poll.
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+
+const WORKER = "https://worker.your-domain.com";
+const DIRECT = "https://api.trigger.dev";
+
+const transport = useTriggerChatTransport({
+  task: "my-agent",
+  baseURL: ({ endpoint }) => (endpoint === "out" ? DIRECT : WORKER),
+  // ... accessToken, startSession, etc.
+  // NOTE: do not set __cf in clientData here. The browser cannot be
+  // trusted to populate it — the worker is the source of truth.
+  clientData: { userId: currentUserId },
+});
+```
+
+If you'd rather route everything through the worker, pass a single string:
+
+```tsx
+baseURL: "https://worker.your-domain.com",
+```
+
+`baseURL` accepts the same string-or-function shape on `chat.createStartSessionAction`, so the Next.js server action that creates the session also flows through the worker — that's how the very first run's `basePayload.metadata.__cf` gets injected before reaching `api.trigger.dev`:
+
+```ts
+// actions.ts — server-only
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const startSession = chat.createStartSessionAction("my-agent", {
+  tokenTTL: "1h",
+  baseURL: ({ endpoint }) =>
+    endpoint === "sessions" ? WORKER : DIRECT,
+});
+```
+
+The session-create endpoint discriminator is `"sessions"` (POST `/api/v1/sessions`) or `"auth"` (POST `/api/v1/auth/jwt/claims`) — distinct from the chat transport's `"in"` / `"out"`. If you want everything proxied, pass a string.
+
+## Threat model
+
+Two important invariants follow from this design:
+
+1. **Direct browser-to-trigger.dev requests cannot succeed**. As long as your agent's `clientDataSchema` requires the namespaced field, any request that doesn't go through the proxy fails schema validation and produces an empty turn. This is your gate.
+2. **Anything inside the namespaced key is trusted only as far as the proxy is the sole writer**. If a client could obtain the public access token and bypass the proxy, they could send arbitrary values under `__cf`. The schema would still validate (it only checks shape, not provenance). The mitigation is operational: the public access token must only be served to clients that reach trigger.dev through the proxy. In practice this means your Next.js server actions and your browser are both behind the same edge layer, and the worker is the only fetch destination for `trigger.dev` baked into either of them.
+
+You can harden further with a shared-secret header the worker injects (e.g. `X-Edge-Signature`) and an agent-side check, but in most CDN deployments the deployment topology is already sufficient.
+
+## Recipe summary
+
+1. Pick a namespaced key the edge proxy owns (`__cf`, `__edge`, `__trust`).
+2. Deploy a proxy in front of `trigger.dev` that rewrites POST `/api/v1/sessions` and POST `/realtime/v1/sessions/{id}/in/append` to inject your trusted values under that key.
+3. Declare the namespace in the agent's `clientDataSchema` so missing or malformed signals fail at the agent boundary.
+4. Point your transport's `baseURL` at the proxy. Never expose `api.trigger.dev` directly to the browser.
+
+## See also
+
+- [Client Protocol](/ai-chat/client-protocol) — the full wire shape the proxy is rewriting.
+- [`withClientData`](/ai-chat/reference#withclientdata) — agent-side typed metadata channel.
+- [Large payloads](/ai-chat/patterns/large-payloads) — for when injected signals or hooks need to ship more than the 1 MiB stream cap allows.
diff --git a/docs/ai-chat/patterns/version-upgrades.mdx b/docs/ai-chat/patterns/version-upgrades.mdx
new file mode 100644
index 00000000000..75a29f4febb
--- /dev/null
+++ b/docs/ai-chat/patterns/version-upgrades.mdx
@@ -0,0 +1,172 @@
+---
+title: "Version upgrades"
+sidebarTitle: "Version upgrades"
+description: "Gracefully migrate suspended chat agents to a new deployment using chat.requestUpgrade() and the continuation mechanism."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+Chat agent runs are pinned to the worker version they started on. When you deploy a new version, suspended runs resume on the **old** code. If your deploy includes breaking changes (new tools, changed schemas, updated API contracts), this can cause issues.
+
+`chat.requestUpgrade()` lets the agent opt out of the current run so the transport triggers a new one on the latest version.
+
+## How it works
+
+When `chat.requestUpgrade()` is called in `onTurnStart` or `onValidateMessages`:
+
+1. `run()` is **skipped** — no response is generated on old code
+2. The agent calls the server-side `endAndContinueSession` endpoint, which atomically swaps the Session's `currentRunId` to a freshly-triggered run on the latest deployment (optimistic-claim against `currentRunVersion`)
+3. The new run picks up the conversation and produces the response
+4. The transport's existing SSE subscription to `session.out` keeps receiving chunks across the swap — no client-side reconnect
+
+The new run lives on the **same Session** as the old one. `chatId` is the durable identity; only the underlying `currentRunId` rotates. The audit log records the new run with `reason: "upgrade"`.
+
+When called from inside `run()` or `chat.defer()`, the current turn completes normally first and the run exits afterward. The next message triggers the continuation on the same session.
+
+```mermaid
+sequenceDiagram
+  participant User
+  participant Transport
+  participant RunV1 as Run (v1)
+  participant RunV2 as Run (v2)
+
+  User->>Transport: send message
+  Transport->>RunV1: input stream
+  RunV1->>RunV1: onTurnStart → requestUpgrade()
+  RunV1-->>Transport: trigger:upgrade-required
+  RunV1->>RunV1: exit (run() never called)
+  Transport->>RunV2: trigger new run (continuation, same message)
+  RunV2-->>Transport: response stream
+  Transport-->>User: response (seamless)
+```
+
+## Contract versioning
+
+Define an explicit version for the contract between your frontend and agent. The frontend sends a `protocolVersion` via `clientData`, and the agent declares which versions it supports. When a breaking change ships (new tools, changed data parts, updated response format), bump the version.
+
+This gives you full control — the frontend can be backwards-compatible across multiple agent versions, and the agent only upgrades when it sees a version it doesn't support.
+
+```tsx title="app/components/Chat.tsx"
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+
+export function Chat() {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+    // Bump this when you ship a breaking change to the chat UI or tools
+    clientData: { userId: user.id, protocolVersion: "v2" },
+  });
+
+  const { messages, sendMessage } = useChat({ transport });
+  // ...
+}
+```
+
+On the agent side, declare which versions the current code supports:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+// The set of frontend protocol versions this agent code supports.
+// When you deploy a breaking change, remove old versions from this set.
+const SUPPORTED_VERSIONS = new Set(["v2", "v3"]);
+
+export const myChat = chat
+  .withClientData({
+    schema: z.object({
+      userId: z.string(),
+      protocolVersion: z.string(),
+    }),
+  })
+  .agent({
+    id: "my-chat",
+    onTurnStart: async ({ clientData }) => {
+      if (clientData?.protocolVersion && !SUPPORTED_VERSIONS.has(clientData.protocolVersion)) {
+        chat.requestUpgrade();
+      }
+    },
+    run: async ({ messages, signal }) => {
+      return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+    },
+  });
+```
+
+The transport includes `clientData` in every payload — both the initial trigger and subsequent records on the session's `.in` channel — so the agent always has the current value.
+
+This pattern is useful when:
+- Your frontend is backwards-compatible across several agent versions, but occasionally ships breaking changes
+- You want explicit control over when upgrades happen rather than upgrading on every deploy
+- Multiple frontend versions may be active at the same time (e.g., users with cached tabs)
+
+## Auto-detect from build ID (Next.js / Vercel)
+
+For automatic upgrade on every deploy, pass your platform's build ID via `clientData` instead of a manual version. The agent stores the ID from the first message and upgrades when it changes:
+
+```tsx title="app/components/Chat.tsx"
+// Vercel sets this at build time, or use your own build ID
+const APP_VERSION = process.env.NEXT_PUBLIC_VERCEL_DEPLOYMENT_ID
+  ?? process.env.NEXT_PUBLIC_BUILD_ID
+  ?? "dev";
+
+export function Chat() {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+    clientData: { userId: user.id, appVersion: APP_VERSION },
+  });
+  // ...
+}
+```
+
+```ts title="trigger/chat.ts"
+const initialAppVersion = chat.local<{ version: string }>({ id: "appVersion" });
+
+export const myChat = chat
+  .withClientData({
+    schema: z.object({
+      userId: z.string(),
+      appVersion: z.string(),
+    }),
+  })
+  .agent({
+    id: "my-chat",
+    onBoot: async ({ clientData }) => {
+      initialAppVersion.init({ version: clientData.appVersion });
+    },
+    onTurnStart: async ({ clientData }) => {
+      if (clientData?.appVersion && clientData.appVersion !== initialAppVersion.version) {
+        chat.requestUpgrade();
+      }
+    },
+    run: async ({ messages, signal }) => {
+      return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+    },
+  });
+```
+
+This upgrades on **every** deploy, not just breaking changes. Good for fast-moving projects where you always want the latest code.
+
+## Other agent types
+
+- **`chat.agent()`** and **`chat.createSession()`** — use `chat.requestUpgrade()` as shown above
+- **`chat.customAgent()`** — you control the turn loop, so just `return` from `run()` when you want to exit
+
+## Interaction with recovery boot
+
+`chat.requestUpgrade()` is a graceful exit — the old run returns cleanly, never writing a partial assistant. The new continuation run boots with an empty `session.out` tail and the upgrade-trigger message on `session.in`. The trigger message dispatches as turn 1 on the new version via the normal continuation-wait path. [`onRecoveryBoot`](/ai-chat/patterns/recovery-boot) does NOT fire on this path — the hook is reserved for mid-stream interruptions (cancel / crash / OOM) where a partial assistant exists on the tail.
+
+## See also
+
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks) — where `onTurnStart` and `onChatResume` fit in the turn cycle
+- [Recovery boot](/ai-chat/patterns/recovery-boot) — the sibling hook for mid-stream interruptions (does NOT fire on `requestUpgrade`)
+- [Database persistence](/ai-chat/patterns/database-persistence) — how continuations interact with session state
+- [Client Protocol](/ai-chat/client-protocol#step-4-handle-continuations) — how clients handle continuations at the wire level
diff --git a/docs/ai-chat/pending-messages.mdx b/docs/ai-chat/pending-messages.mdx
new file mode 100644
index 00000000000..80dbdaab2eb
--- /dev/null
+++ b/docs/ai-chat/pending-messages.mdx
@@ -0,0 +1,339 @@
+---
+title: "Pending Messages"
+sidebarTitle: "Pending Messages"
+description: "Inject user messages mid-execution to steer agents between tool-call steps."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## Overview
+
+When an AI agent is executing tool calls, users may want to send a message that **steers the agent mid-execution** — adding context, correcting course, or refining the request without waiting for the response to finish.
+
+The `pendingMessages` option enables this by injecting user messages between tool-call steps via the AI SDK's `prepareStep`. Messages that arrive during streaming are queued and injected at the next step boundary. If there are no more step boundaries (single-step response or final text generation), the message becomes the next turn automatically.
+
+## How it works
+
+1. User sends a message while the agent is streaming
+2. The message is sent to the backend via input stream (`transport.sendPendingMessage`)
+3. The backend queues it in the steering queue
+4. At the next `prepareStep` boundary (between tool-call steps), `shouldInject` is called
+5. If it returns `true`, the message is injected into the LLM's context
+6. A `data-pending-message-injected` stream chunk confirms injection to the frontend
+7. If `prepareStep` never fires (no tool calls), the message becomes the next turn
+
+## Backend: chat.agent
+
+Add `pendingMessages` to your `chat.agent` configuration:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  pendingMessages: {
+    // Only inject when there are completed steps (tool calls happened)
+    shouldInject: ({ steps }) => steps.length > 0,
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      tools: { /* ... */ },
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+The `prepareStep` for injection is automatically included when you spread `chat.toStreamTextOptions()`. If you provide your own `prepareStep` after the spread, it overrides the auto-injected one.
+
+### Options
+
+| Option | Type | Description |
+|--------|------|-------------|
+| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean` | Decide whether to inject the batch. Called once per step boundary. If absent, no injection happens. |
+| `prepare` | `(event: PendingMessagesBatchEvent) => ModelMessage[]` | Transform the batch before injection. Default: convert each message via `convertToModelMessages`. |
+| `onReceived` | `(event) => void` | Called when a message arrives during streaming (per-message). |
+| `onInjected` | `(event) => void` | Called after a batch is injected. |
+
+### shouldInject
+
+Called once per step boundary with the full batch of pending messages. Return `true` to inject all of them, `false` to skip (they'll be available at the next boundary or become the next turn).
+
+```ts
+pendingMessages: {
+  // Always inject
+  shouldInject: () => true,
+
+  // Only inject after tool calls
+  shouldInject: ({ steps }) => steps.length > 0,
+
+  // Only inject if there's one message
+  shouldInject: ({ messages }) => messages.length === 1,
+},
+```
+
+The event includes:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `UIMessage[]` | All pending messages (batch) |
+| `modelMessages` | `ModelMessage[]` | Current conversation |
+| `steps` | `CompactionStep[]` | Completed steps |
+| `stepNumber` | `number` | Current step (0-indexed) |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn |
+| `clientData` | `unknown` | Frontend metadata |
+
+### prepare
+
+Transform the batch of pending messages before they're injected into the LLM's context. By default, each UIMessage is converted to ModelMessages individually. Use `prepare` to combine multiple messages or add context:
+
+```ts
+pendingMessages: {
+  shouldInject: ({ steps }) => steps.length > 0,
+  prepare: ({ messages }) => [{
+    role: "user",
+    content: messages.length === 1
+      ? messages[0].parts[0]?.text ?? ""
+      : `The user sent ${messages.length} messages:\n${
+          messages.map((m, i) => `${i + 1}. ${m.parts[0]?.text}`).join("\n")
+        }`,
+  }],
+},
+```
+
+### Stream chunk
+
+When messages are injected, the SDK automatically writes a `data-pending-message-injected` stream chunk containing the message IDs and text. The frontend uses this to:
+- Confirm which messages were injected
+- Remove them from the pending overlay
+- Render them inline at the injection point in the assistant response
+
+A "pending message injected" span also appears in the run trace.
+
+## Backend: chat.createSession
+
+Pass `pendingMessages` to the session options:
+
+```ts
+const session = chat.createSession(payload, {
+  signal,
+  idleTimeoutInSeconds: 60,
+  pendingMessages: {
+    shouldInject: () => true,
+  },
+});
+
+for await (const turn of session) {
+  const result = streamText({
+    model: anthropic("claude-sonnet-4-5"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+    prepareStep: turn.prepareStep(), // Handles injection + compaction
+    stopWhen: stepCountIs(15),
+  });
+
+  await turn.complete(result);
+}
+```
+
+Use `turn.prepareStep()` to get a prepareStep function that handles both injection and compaction. Users who spread `chat.toStreamTextOptions()` get it automatically.
+
+## Backend: MessageAccumulator (raw task)
+
+Pass `pendingMessages` to the constructor and wire up the message listener manually:
+
+```ts
+const conversation = new chat.MessageAccumulator({
+  pendingMessages: {
+    shouldInject: () => true,
+    prepare: ({ messages }) => [{
+      role: "user",
+      content: `[Steering]: ${messages.map(m => m.parts[0]?.text).join(", ")}`,
+    }],
+  },
+});
+
+for (let turn = 0; turn < 100; turn++) {
+  const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+  // Listen for steering messages during streaming
+  const sub = chat.messages.on(async (msg) => {
+    const lastMsg = msg.messages?.[msg.messages.length - 1];
+    if (lastMsg) await conversation.steerAsync(lastMsg);
+  });
+
+  const result = streamText({
+    model: anthropic("claude-sonnet-4-5"),
+    messages,
+    prepareStep: conversation.prepareStep(), // Handles injection + compaction
+    stopWhen: stepCountIs(15),
+  });
+
+  const response = await chat.pipeAndCapture(result);
+  sub.off();
+
+  if (response) await conversation.addResponse(response);
+  await chat.writeTurnComplete();
+}
+```
+
+### MessageAccumulator methods
+
+| Method | Description |
+|--------|-------------|
+| `steer(message, modelMessages?)` | Queue a UIMessage for injection (sync) |
+| `steerAsync(message)` | Queue a UIMessage, converting to model messages automatically |
+| `drainSteering()` | Get and clear unconsumed steering messages |
+| `prepareStep()` | Returns a prepareStep function handling injection + compaction |
+
+## Frontend: usePendingMessages hook
+
+The `usePendingMessages` hook manages all the frontend complexity — tracking pending messages, detecting injections, and handling the turn lifecycle.
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, usePendingMessages } from "@trigger.dev/sdk/chat/react";
+
+function Chat({ chatId }: { chatId: string }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+
+  const { messages, setMessages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    transport,
+  });
+
+  const pending = usePendingMessages({
+    transport,
+    chatId,
+    status,
+    messages,
+    setMessages,
+    sendMessage,
+    metadata: { model: "gpt-4o" },
+  });
+
+  return (
+    <div>
+      {/* Render messages */}
+      {messages.map((msg) => (
+        <div key={msg.id}>
+          {msg.role === "assistant" ? (
+            msg.parts.map((part, i) =>
+              pending.isInjectionPoint(part) ? (
+                // Render injected messages inline at the injection point
+                <div key={i}>
+                  {pending.getInjectedMessages(part).map((m) => (
+                    <div key={m.id} className="injected-message">{m.text}</div>
+                  ))}
+                </div>
+              ) : (
+                <Part key={i} part={part} />
+              )
+            )
+          ) : (
+            <UserMessage msg={msg} />
+          )}
+        </div>
+      ))}
+
+      {/* Render pending messages */}
+      {pending.pending.map((msg) => (
+        <div key={msg.id}>
+          <span>{msg.text}</span>
+          <span>{msg.mode === "steering" ? "Steering" : "Queued"}</span>
+          {msg.mode === "queued" && status === "streaming" && (
+            <button onClick={() => pending.promoteToSteering(msg.id)}>
+              Steer instead
+            </button>
+          )}
+        </div>
+      ))}
+
+      {/* Send form */}
+      <form onSubmit={(e) => {
+        e.preventDefault();
+        pending.steer(input); // Steers during streaming, sends normally when ready
+        setInput("");
+      }}>
+        <input value={input} onChange={(e) => setInput(e.target.value)} />
+        <button type="submit">Send</button>
+        {status === "streaming" && (
+          <button type="button" onClick={() => { pending.queue(input); setInput(""); }}>
+            Queue
+          </button>
+        )}
+      </form>
+    </div>
+  );
+}
+```
+
+### Hook API
+
+| Property/Method | Type | Description |
+|----------------|------|-------------|
+| `pending` | `PendingMessage[]` | Current pending messages with `id`, `text`, `mode`, and `injected` status |
+| `steer(text)` | `(text: string) => void` | Send a steering message during streaming, or normal message when ready |
+| `queue(text)` | `(text: string) => void` | Queue for next turn during streaming, or send normally when ready |
+| `promoteToSteering(id)` | `(id: string) => void` | Convert a queued message to steering (sends via input stream immediately) |
+| `isInjectionPoint(part)` | `(part: unknown) => boolean` | Check if an assistant message part is an injection confirmation |
+| `getInjectedMessageIds(part)` | `(part: unknown) => string[]` | Get message IDs from an injection point |
+| `getInjectedMessages(part)` | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point |
+
+### PendingMessage
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `string` | Unique message ID |
+| `text` | `string` | Message text |
+| `mode` | `"steering" \| "queued"` | How the message is being handled |
+| `injected` | `boolean` | Whether the backend confirmed injection |
+
+### Message lifecycle
+
+- **Steering messages** are sent via `transport.sendPendingMessage()` immediately. They appear as purple pending bubbles. If injected, they disappear from the overlay and render inline at the injection point. If not injected (no more step boundaries), they auto-send as the next turn when the response finishes.
+
+- **Queued messages** stay client-side until the turn completes, then auto-send as the next turn via `sendMessage()`. They can be promoted to steering mid-stream by clicking "Steer instead".
+
+- **Promoted messages** are queued messages that were converted to steering. They get sent via input stream immediately and follow the steering lifecycle from that point.
+
+## Transport: sendPendingMessage
+
+The `TriggerChatTransport` exposes a `sendPendingMessage` method for sending messages via input stream without disrupting the active stream subscription:
+
+```ts
+const sent = await transport.sendPendingMessage(chatId, {
+  id: crypto.randomUUID(),
+  role: "user",
+  parts: [{ type: "text", text: "and compare to vercel" }],
+}, { model: "gpt-4o" });
+```
+
+Unlike `sendMessage()` from useChat, this does NOT:
+- Add the message to useChat's local state
+- Cancel the active stream subscription
+- Start a new response stream
+
+The `usePendingMessages` hook calls this internally — you typically don't need to use it directly.
+
+## Coexistence with compaction
+
+Pending message injection and compaction both use `prepareStep`. When both are configured, the auto-injected `prepareStep` handles them in order:
+
+1. **Compaction** runs first — checks threshold, generates summary if needed
+2. **Injection** runs second — pending messages are appended to either the compacted or original messages
+
+This means injected messages are always included after compaction, ensuring the LLM sees both the compressed history and the new steering input.
diff --git a/docs/ai-chat/quick-start.mdx b/docs/ai-chat/quick-start.mdx
new file mode 100644
index 00000000000..4d1e204dbec
--- /dev/null
+++ b/docs/ai-chat/quick-start.mdx
@@ -0,0 +1,158 @@
+---
+title: "Quick Start"
+sidebarTitle: "Quick Start"
+description: "Get a working AI agent in 3 steps — define an agent, generate a token, and wire up the frontend."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+These steps assume you already have a Trigger.dev project with the SDK installed and the CLI authenticated — if you don't, follow [Manual setup](/manual-setup) (or `npx trigger.dev@latest init` in an existing project) first. You should be able to run `pnpm exec trigger dev` from your project root before continuing.
+
+<Steps>
+  <Step title="Define a chat agent">
+    Use `chat.agent` from `@trigger.dev/sdk/ai` to define an agent that handles chat messages. The `run` function receives `ModelMessage[]` (already converted from the frontend's `UIMessage[]`) — pass them directly to `streamText`.
+
+    If you return a `StreamTextResult`, it's **automatically piped** to the frontend.
+
+    ```ts trigger/chat.ts
+    import { chat } from "@trigger.dev/sdk/ai";
+    import { streamText, stepCountIs } from "ai";
+    import { anthropic } from "@ai-sdk/anthropic";
+
+    export const myChat = chat.agent({
+      id: "my-chat",
+      run: async ({ messages, signal }) => {
+        return streamText({
+          // Spread chat.toStreamTextOptions() FIRST — it wires up
+          // prepareStep (compaction, steering, background injection),
+          // the system prompt set via chat.prompt(), and telemetry.
+          // Skipping this is the single most common cause of subtle
+          // bugs (silent broken compaction, missing steering, etc.).
+          ...chat.toStreamTextOptions(),
+          model: anthropic("claude-sonnet-4-5"),
+          messages,
+          abortSignal: signal,
+          stopWhen: stepCountIs(15),
+        });
+      },
+    });
+    ```
+
+    <Warning>
+      **Always spread `chat.toStreamTextOptions()` into your `streamText` call.** It wires up the `prepareStep` callback that drives compaction, mid-turn steering, and background injection — features that silently no-op if the spread is missing. Spread it **first** so any explicit overrides (e.g. a custom `prepareStep`) win.
+    </Warning>
+
+    <Tip>
+      For a **custom** [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype (typed `data-*` parts, tool map, etc.), define the agent with [`chat.withUIMessage<...>().agent({...})`](/ai-chat/types) instead of `chat.agent`.
+    </Tip>
+
+  </Step>
+
+  <Step title="Add two server actions">
+    On your server (e.g. as Next.js server actions), expose two helpers the transport will call: one that creates the chat session, and one that mints a fresh session-scoped access token for refresh.
+
+    ```ts app/actions.ts
+    "use server";
+
+    import { auth } from "@trigger.dev/sdk";
+    import { chat } from "@trigger.dev/sdk/ai";
+
+    // Creates the Session row + triggers the first run, returns the
+    // session PAT. Idempotent on (env, chatId) so concurrent calls
+    // converge to the same session.
+    export const startChatSession = chat.createStartSessionAction("my-chat");
+
+    // Pure mint — fresh session-scoped PAT for an existing session.
+    // The transport calls this on 401/403 to refresh.
+    export async function mintChatAccessToken(chatId: string) {
+      return auth.createPublicToken({
+        scopes: {
+          read: { sessions: chatId },
+          write: { sessions: chatId },
+        },
+        expirationTime: "1h",
+      });
+    }
+    ```
+
+    The browser never holds your environment's secret key — both helpers run on your server, where customer-side authorization (per-user, per-plan, etc.) lives alongside any DB writes you want to pair with session creation.
+
+  </Step>
+
+  <Step title="Use in the frontend">
+    Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to create a memoized transport instance, then pass it to `useChat`. Wire both server actions into the transport's `accessToken` and `startSession` callbacks.
+
+    The example below uses the Next.js `@/*` path alias for imports from `@/trigger/chat` and `@/app/actions`. If you're not using Next.js (or haven't configured the alias), swap them for relative imports.
+
+    ```tsx app/components/chat.tsx
+    "use client";
+
+    import { useState } from "react";
+    import { useChat } from "@ai-sdk/react";
+    import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+    import type { myChat } from "@/trigger/chat";
+    import { mintChatAccessToken, startChatSession } from "@/app/actions";
+
+    export function Chat() {
+      const transport = useTriggerChatTransport<typeof myChat>({
+        task: "my-chat",
+        accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+        startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+      });
+
+      const { messages, sendMessage, stop, status } = useChat({ transport });
+      const [input, setInput] = useState("");
+
+      return (
+        <div>
+          {messages.map((m) => (
+            <div key={m.id}>
+              <strong>{m.role}:</strong>
+              {m.parts.map((part, i) =>
+                part.type === "text" ? <span key={i}>{part.text}</span> : null
+              )}
+            </div>
+          ))}
+
+          <form
+            onSubmit={(e) => {
+              e.preventDefault();
+              if (input.trim()) {
+                sendMessage({ text: input });
+                setInput("");
+              }
+            }}
+          >
+            <input
+              value={input}
+              onChange={(e) => setInput(e.target.value)}
+              placeholder="Type a message..."
+            />
+            <button type="submit" disabled={status === "streaming"}>
+              Send
+            </button>
+            {status === "streaming" && (
+              <button type="button" onClick={stop}>
+                Stop
+              </button>
+            )}
+          </form>
+        </div>
+      );
+    }
+    ```
+
+  </Step>
+</Steps>
+
+## Next steps
+
+- [Backend](/ai-chat/backend) — Lifecycle hooks, persistence, session iterator, raw task primitives
+- [Frontend](/ai-chat/frontend) — Session management, client data, reconnection
+- [Types](/ai-chat/types) — `chat.withUIMessage`, `InferChatUIMessage`, and related typing
+- [`chat.local`](/ai-chat/chat-local) — Per-run typed state across hooks, run, tools, subtasks
+- [Sub-agents pattern](/ai-chat/patterns/sub-agents) — Subtask-as-tool, `target: "root"` streaming, `ai.toolExecute` helpers
+- [Background injection](/ai-chat/background-injection) — `chat.inject()` and `chat.defer()` for between-turn work
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
new file mode 100644
index 00000000000..82cc28610b7
--- /dev/null
+++ b/docs/ai-chat/reference.mdx
@@ -0,0 +1,847 @@
+---
+title: "API Reference"
+sidebarTitle: "API Reference"
+description: "Complete API reference for the AI Agents SDK — backend options, events, frontend transport, and hooks."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## Compatibility
+
+| Dependency | Supported | Notes |
+|---|---|---|
+| `@trigger.dev/sdk` | `>=4.5.0-rc.0` | The chat agent surface lives in this SDK release. Install with `@trigger.dev/sdk@rc`. |
+| `ai` (Vercel AI SDK) | `^5.0.0 \|\| ^6.0.0` | Declared as a peer. v6 is what we develop against day to day. |
+| `@ai-sdk/react` | matches your `ai` major | Pulled in by `useChat`. The transport works with whichever React hook ships in the same major as your `ai` version. |
+| `react` | `^18.0 \|\| ^19.0` | Required only if you use `@trigger.dev/sdk/chat/react` (the frontend transport). Server-only consumers can skip React entirely. |
+| Node.js | `>=18.20.0` | The SDK's engine constraint. The chat agent itself works on any version the SDK supports. |
+| Provider packages (`@ai-sdk/openai`, `@ai-sdk/anthropic`, etc.) | versions that target your `ai` major | Pick a provider package whose `ai` peer matches yours. The chat agent doesn't depend on any specific provider — pass whatever model you want into `streamText`. |
+
+The `ai` peer is **optional** — server-only setups that don't call `streamText` (raw `task()` with chat primitives) can skip the AI SDK entirely.
+
+## ChatAgentOptions
+
+Options for `chat.agent()`.
+
+| Option                        | Type                                                        | Default                        | Description                                                                                         |
+| ----------------------------- | ----------------------------------------------------------- | ------------------------------ | --------------------------------------------------------------------------------------------------- |
+| `id`                          | `string`                                                    | required                       | Task identifier                                                                                     |
+| `run`                         | `(payload: ChatTaskRunPayload) => Promise<unknown>`         | required                       | Handler for each turn                                                                               |
+| `clientDataSchema`            | `TaskSchema`                                                | —                              | Schema for validating and typing `clientData`                                                       |
+| `onBoot`                      | `(event: BootEvent) => Promise<void> \| void`               | —                              | Fires once per worker process — initial, preloaded, AND reactive continuation. Use for `chat.local` init and per-process resources. See [onBoot](/ai-chat/lifecycle-hooks#onboot). |
+| `onRecoveryBoot`              | `(event: RecoveryBootEvent) => Promise<RecoveryBootResult \| void> \| RecoveryBootResult \| void` | — | Fires on a continuation boot when the dead predecessor left recovered state (partial assistant or in-flight users). Override the smart default — drop partial, synthesize tool results, emit a recovery banner. See [Recovery boot](/ai-chat/patterns/recovery-boot). |
+| `onPreload`                   | `(event: PreloadEvent) => Promise<void> \| void`            | —                              | Fires on preloaded runs before the first message                                                    |
+| `onChatStart`                 | `(event: ChatStartEvent) => Promise<void> \| void`          | —                              | Fires once per chat, on the very first user message. Does NOT fire on continuation runs or OOM-retries — see [onChatStart](/ai-chat/lifecycle-hooks#onchatstart). |
+| `onValidateMessages`          | `(event: ValidateMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>` | —                | Validate/transform UIMessages before model conversion. See [onValidateMessages](/ai-chat/lifecycle-hooks#onvalidatemessages) |
+| `hydrateMessages`             | `(event: HydrateMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>` | —                 | Load message history from backend, replacing the linear accumulator. See [hydrateMessages](/ai-chat/lifecycle-hooks#hydratemessages) |
+| `actionSchema`                | `TaskSchema`                                                | —                              | Schema for validating custom actions sent via `transport.sendAction()`. See [Actions](/ai-chat/actions) |
+| `onAction`                    | `(event: ActionEvent) => Promise<unknown> \| unknown`       | —                              | Handle custom actions. Actions are not turns — only `hydrateMessages` + `onAction` fire. Return a `StreamTextResult` (or `string` / `UIMessage`) for a model response; return `void` for side-effect-only. See [Actions](/ai-chat/actions) |
+| `onTurnStart`                 | `(event: TurnStartEvent) => Promise<void> \| void`          | —                              | Fires every turn before `run()`                                                                     |
+| `onBeforeTurnComplete`        | `(event: BeforeTurnCompleteEvent) => Promise<void> \| void` | —                              | Fires after response but before stream closes. Includes `writer`.                                   |
+| `onTurnComplete`              | `(event: TurnCompleteEvent) => Promise<void> \| void`       | —                              | Fires after each turn completes (stream closed)                                                     |
+| `onCompacted`                 | `(event: CompactedEvent) => Promise<void> \| void`          | —                              | Fires when compaction occurs. Includes `writer`. See [Compaction](/ai-chat/compaction)              |
+| `compaction`                  | `ChatAgentCompactionOptions`                                | —                              | Automatic context compaction. See [Compaction](/ai-chat/compaction)                                 |
+| `pendingMessages`             | `PendingMessagesOptions`                                    | —                              | Mid-execution message injection. See [Pending Messages](/ai-chat/pending-messages)                  |
+| `prepareMessages`             | `(event: PrepareMessagesEvent) => ModelMessage[]`           | —                              | Transform model messages before use (cache breaks, context injection, etc.)                         |
+| `maxTurns`                    | `number`                                                    | `100`                          | Max conversational turns per run                                                                    |
+| `turnTimeout`                 | `string`                                                    | `"1h"`                         | How long to wait for next message                                                                   |
+| `idleTimeoutInSeconds`        | `number`                                                    | `30`                           | Seconds to stay idle before suspending                                                              |
+| `chatAccessTokenTTL`          | `string`                                                    | `"1h"`                         | How long the scoped access token remains valid                                                      |
+| `preloadIdleTimeoutInSeconds` | `number`                                                    | Same as `idleTimeoutInSeconds` | Idle timeout after `onPreload` fires                                                                |
+| `preloadTimeout`              | `string`                                                    | Same as `turnTimeout`          | Suspend timeout for preloaded runs                                                                  |
+| `uiMessageStreamOptions`      | `ChatUIMessageStreamOptions`                                | —                              | Default options for `toUIMessageStream()`. Per-turn override via `chat.setUIMessageStreamOptions()` |
+| `onChatSuspend`               | `(event: ChatSuspendEvent) => Promise<void> \| void`        | —                              | Fires right before the run suspends. See [onChatSuspend](/ai-chat/lifecycle-hooks#onchatsuspend--onchatresume) |
+| `onChatResume`                | `(event: ChatResumeEvent) => Promise<void> \| void`         | —                              | Fires right after the run resumes from suspension                                                   |
+| `exitAfterPreloadIdle`        | `boolean`                                                   | `false`                        | Exit run after preload idle timeout instead of suspending. See [exitAfterPreloadIdle](/ai-chat/lifecycle-hooks#exitafterpreloadidle) |
+| `oomMachine`                  | `MachinePresetName`                                         | —                              | Fallback machine when an attempt fails with OOM. Setting it enables a single OOM retry on the larger machine. See [OOM resilience](/ai-chat/patterns/oom-resilience) |
+
+Plus most standard [TaskOptions](/tasks/overview) — `queue`, `machine`, `maxDuration`, **`onWait`**, **`onResume`**, **`onComplete`**, and other lifecycle hooks. Generic `retry` is **not** exposed on `chat.agent`; use `oomMachine` for OOM recovery, or drop down to a raw [`task()`](/ai-chat/backend#raw-task-with-primitives) if you need richer retry semantics. Standard hooks use the same parameter shapes as on a normal `task()` (including `ctx`).
+
+## Task context (`ctx`)
+
+All **`chat.agent`** lifecycle events (**`onBoot`**, **`onPreload`**, **`onChatStart`**, **`onTurnStart`**, **`onBeforeTurnComplete`**, **`onTurnComplete`**, **`onCompacted`**) and the object passed to **`run`** include **`ctx`**: the same **`TaskRunContext`** shape as the `ctx` in `task({ run: (payload, { ctx }) => ... })`.
+
+<Note>
+  **`onValidateMessages`** does not include `ctx` — it fires before message accumulation and is designed for pure validation/transformation of incoming messages.
+</Note>
+
+Use **`ctx`** for run metadata, tags, parent links, or any API that needs the full run record. The chat-specific string **`runId`** on events is always **`ctx.run.id`**; both are provided for convenience.
+
+```ts
+import type { TaskRunContext } from "@trigger.dev/sdk";
+// Equivalent alias (same type):
+import type { Context } from "@trigger.dev/sdk";
+```
+
+<Note>
+  Prefer `import type { TaskRunContext } from "@trigger.dev/sdk"` in application code. Do not depend on `@trigger.dev/core` directly.
+</Note>
+
+## ChatTaskRunPayload
+
+The payload passed to the `run` function.
+
+| Field          | Type                                       | Description                                                          |
+| -------------- | ------------------------------------------ | -------------------------------------------------------------------- |
+| `ctx`          | `TaskRunContext`                           | Full task run context — same as `task` `run`’s `{ ctx }`             |
+| `messages`     | `ModelMessage[]`                           | Model-ready messages — pass directly to `streamText`                 |
+| `chatId`       | `string`                                   | Your conversation ID (the session's `externalId`)                    |
+| `sessionId`    | `string`                                   | Friendly ID of the backing Session (`session_*`). Use with `sessions.open()` for advanced cases. Always set — every chat.agent run is bound to a Session. |
+| `trigger`      | `"submit-message" \| "regenerate-message"` | What triggered the request                                           |
+| `messageId`    | `string \| undefined`                      | Message ID (for regenerate)                                          |
+| `clientData`   | Typed by `clientDataSchema`                | Custom data from the frontend (typed when schema is provided)        |
+| `continuation` | `boolean`                                  | Whether this run is continuing an existing chat (previous run ended) |
+| `signal`       | `AbortSignal`                              | Combined stop + cancel signal                                        |
+| `cancelSignal` | `AbortSignal`                              | Cancel-only signal                                                   |
+| `stopSignal`   | `AbortSignal`                              | Stop-only signal (per-turn)                                          |
+| `previousTurnUsage` | `LanguageModelUsage \| undefined`       | Token usage from the previous turn (undefined on turn 0)        |
+| `totalUsage`   | `LanguageModelUsage`                       | Cumulative token usage across completed turns so far              |
+
+## BootEvent
+
+Passed to the `onBoot` callback.
+
+| Field             | Type                        | Description                                                                                          |
+| ----------------- | --------------------------- | ---------------------------------------------------------------------------------------------------- |
+| `ctx`             | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx)                                         |
+| `chatId`          | `string`                    | Chat session ID                                                                                      |
+| `runId`           | `string`                    | The Trigger.dev run ID for this run boot                                                             |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                                                                     |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                                                        |
+| `continuation`    | `boolean`                   | `true` when this run is taking over from a prior dead run (cancel / crash / `endRun` / OOM retry)    |
+| `previousRunId`   | `string \| undefined`       | Public id of the prior run when `continuation` is true                                               |
+| `preloaded`       | `boolean`                   | Whether this run was triggered as a preload                                                          |
+
+## RecoveryBootEvent
+
+Passed to the `onRecoveryBoot` callback. See [Recovery boot](/ai-chat/patterns/recovery-boot) for the full guide.
+
+| Field              | Type                                                              | Description                                                                                          |
+| ------------------ | ----------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- |
+| `ctx`              | `TaskRunContext`                                                  | Full task run context — see [Task context](#task-context-ctx)                                         |
+| `chatId`           | `string`                                                          | Chat session ID                                                                                      |
+| `runId`            | `string`                                                          | The Trigger.dev run ID for this run boot                                                             |
+| `previousRunId`    | `string`                                                          | Public id of the prior run that died                                                                 |
+| `cause`            | `"cancelled" \| "crashed" \| "unknown"`                           | Best-effort cause. Currently always `"unknown"` — forward-looking, don't branch on it                |
+| `settledMessages`  | `TUIMessage[]`                                                    | Chain persisted by the predecessor's last `onTurnComplete`                                            |
+| `inFlightUsers`    | `TUIMessage[]`                                                    | User messages on `session.in` past the cursor — the message(s) the predecessor never acknowledged    |
+| `partialAssistant` | `TUIMessage \| undefined`                                         | The trailing assistant message whose stream never received `finish`                                  |
+| `pendingToolCalls` | [`RecoveryPendingToolCall[]`](#recoverypendingtoolcall)           | Tool calls in `input-available` state extracted from `partialAssistant`                              |
+| `writer`           | [`ChatWriter`](#chatwriter)                                       | Lazy session.out writer — emit a recovery banner / signal here                                       |
+
+## RecoveryBootResult
+
+Return value of `onRecoveryBoot`. Every field is optional — omit to accept the smart default.
+
+| Field            | Type                          | Description                                                                                                                                   |
+| ---------------- | ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
+| `chain`          | `TUIMessage[]`                | Replaces the seed chain. Default: `[...settledMessages, firstInFlightUser, partialAssistant]` when both present; `settledMessages` otherwise. |
+| `recoveredTurns` | `TUIMessage[]`                | User messages to dispatch as fresh turns. Default: `inFlightUsers.slice(1)` when smart-default fires; `inFlightUsers` otherwise.              |
+| `beforeBoot`     | `() => Promise<void>`         | Runs after the writer flushes and before the first recovered turn fires. Use for blocking persistence work.                                   |
+
+## RecoveryPendingToolCall
+
+| Field         | Type      | Description                                                  |
+| ------------- | --------- | ------------------------------------------------------------ |
+| `toolCallId`  | `string`  | The AI SDK tool call id                                      |
+| `toolName`    | `string`  | The tool name (the `tool-${name}` suffix on the part type)   |
+| `input`       | `unknown` | The input the model produced for the call                    |
+| `partIndex`   | `number`  | Index into `partialAssistant.parts` for in-place edits       |
+
+## PreloadEvent
+
+Passed to the `onPreload` callback.
+
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `ctx`             | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx)   |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+
+## ChatStartEvent
+
+Passed to the `onChatStart` callback.
+
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `ctx`             | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx)   |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `messages`        | `ModelMessage[]`            | Initial model-ready messages                                   |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `continuation`    | `boolean`                   | Whether this run is continuing an existing chat                |
+| `previousRunId`   | `string \| undefined`       | Previous run ID (only when `continuation` is true)             |
+| `preloaded`       | `boolean`                   | Whether this run was preloaded before the first message        |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+
+## ValidateMessagesEvent
+
+Passed to the `onValidateMessages` callback.
+
+| Field     | Type                                                            | Description                              |
+| --------- | --------------------------------------------------------------- | ---------------------------------------- |
+| `messages` | `UIMessage[]`                                                  | Incoming UI messages for this turn       |
+| `chatId`  | `string`                                                        | Chat session ID                          |
+| `turn`    | `number`                                                        | Turn number (0-indexed)                  |
+| `trigger` | `"submit-message" \| "regenerate-message" \| "preload" \| "close"` | The trigger type for this turn        |
+
+## HydrateMessagesEvent
+
+Passed to the `hydrateMessages` callback. See [hydrateMessages](/ai-chat/lifecycle-hooks#hydratemessages).
+
+| Field              | Type                                                  | Description                                               |
+| ------------------ | ----------------------------------------------------- | --------------------------------------------------------- |
+| `chatId`           | `string`                                              | Chat session ID                                           |
+| `turn`             | `number`                                              | Turn number (0-indexed)                                   |
+| `trigger`          | `"submit-message" \| "regenerate-message" \| "action"` | The trigger type for this turn                           |
+| `incomingMessages` | `UIMessage[]`                                         | Validated wire messages from the frontend (empty for actions) |
+| `previousMessages` | `UIMessage[]`                                         | Accumulated UI messages before this turn (`[]` on turn 0) |
+| `clientData`       | Typed by `clientDataSchema`                           | Custom data from the frontend                             |
+| `continuation`     | `boolean`                                             | Whether this run is continuing an existing chat           |
+| `previousRunId`    | `string \| undefined`                                 | Previous run ID (only when `continuation` is true)        |
+
+## ActionEvent
+
+Passed to the `onAction` callback. See [Actions](/ai-chat/actions).
+
+| Field        | Type                        | Description                                              |
+| ------------ | --------------------------- | -------------------------------------------------------- |
+| `action`     | Typed by `actionSchema`     | The parsed and validated action payload                  |
+| `chatId`     | `string`                    | Chat session ID                                          |
+| `turn`       | `number`                    | Turn number (0-indexed)                                  |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend                            |
+| `uiMessages` | `UIMessage[]`               | Accumulated UI messages (after hydration, if set)        |
+| `messages`   | `ModelMessage[]`            | Accumulated model messages (after hydration, if set)     |
+
+## TurnStartEvent
+
+Passed to the `onTurnStart` callback.
+
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `ctx`             | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx)   |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `messages`        | `ModelMessage[]`            | Full accumulated conversation (model format)                   |
+| `uiMessages`      | `UIMessage[]`               | Full accumulated conversation (UI format)                      |
+| `turn`            | `number`                    | Turn number (0-indexed)                                        |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `continuation`    | `boolean`                   | Whether this run is continuing an existing chat                |
+| `previousRunId`   | `string \| undefined`       | Previous run ID (only when `continuation` is true)             |
+| `preloaded`       | `boolean`                   | Whether this run was preloaded                                 |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+
+## TurnCompleteEvent
+
+Passed to the `onTurnComplete` callback.
+
+| Field                | Type                              | Description                                          |
+| -------------------- | --------------------------------- | ---------------------------------------------------- |
+| `ctx`                | `TaskRunContext`                  | Full task run context — see [Task context](#task-context-ctx) |
+| `chatId`             | `string`                          | Chat session ID                                      |
+| `messages`           | `ModelMessage[]`                  | Full accumulated conversation (model format)         |
+| `uiMessages`         | `UIMessage[]`                     | Full accumulated conversation (UI format)            |
+| `newMessages`        | `ModelMessage[]`                  | Only this turn's messages (model format)             |
+| `newUIMessages`      | `UIMessage[]`                     | Only this turn's messages (UI format)                |
+| `responseMessage`    | `UIMessage \| undefined`          | The assistant's response for this turn               |
+| `rawResponseMessage` | `UIMessage \| undefined`          | Raw response before abort cleanup                    |
+| `turn`               | `number`                          | Turn number (0-indexed)                              |
+| `runId`              | `string`                          | The Trigger.dev run ID                               |
+| `chatAccessToken`    | `string`                          | Scoped access token for this run                     |
+| `lastEventId`        | `string \| undefined`             | Stream position for resumption                       |
+| `stopped`            | `boolean`                         | Whether the user stopped generation during this turn |
+| `continuation`       | `boolean`                         | Whether this run is continuing an existing chat      |
+| `usage`              | `LanguageModelUsage \| undefined` | Token usage for this turn                            |
+| `totalUsage`         | `LanguageModelUsage`              | Cumulative token usage across all turns              |
+
+## BeforeTurnCompleteEvent
+
+Passed to the `onBeforeTurnComplete` callback. Same fields as `TurnCompleteEvent` (including **`ctx`**) plus a `writer`.
+
+| Field                            | Type                        | Description                                                                   |
+| -------------------------------- | --------------------------- | ----------------------------------------------------------------------------- |
+| _(all TurnCompleteEvent fields)_ |                             | See [TurnCompleteEvent](#turncompleteevent) (includes `ctx`)                  |
+| `writer`                         | [`ChatWriter`](#chatwriter) | Stream writer — the stream is still open so chunks appear in the current turn |
+
+## ChatSuspendEvent
+
+Passed to the `onChatSuspend` callback. A discriminated union on `phase`.
+
+| Field        | Type                        | Description                                              |
+| ------------ | --------------------------- | -------------------------------------------------------- |
+| `phase`      | `"preload" \| "turn"`       | Whether this is a preload or post-turn suspension        |
+| `ctx`        | `TaskRunContext`            | Full task run context                                    |
+| `chatId`     | `string`                    | Chat session ID                                          |
+| `runId`      | `string`                    | The Trigger.dev run ID                                   |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend                            |
+| `turn`       | `number`                    | Turn number (**`"turn"` phase only**)                    |
+| `messages`   | `ModelMessage[]`            | Accumulated model messages (**`"turn"` phase only**)     |
+| `uiMessages` | `UIMessage[]`               | Accumulated UI messages (**`"turn"` phase only**)        |
+
+## ChatResumeEvent
+
+Passed to the `onChatResume` callback. Same discriminated union shape as `ChatSuspendEvent`.
+
+| Field        | Type                        | Description                                              |
+| ------------ | --------------------------- | -------------------------------------------------------- |
+| `phase`      | `"preload" \| "turn"`       | Whether this is a preload or post-turn resumption        |
+| `ctx`        | `TaskRunContext`            | Full task run context                                    |
+| `chatId`     | `string`                    | Chat session ID                                          |
+| `runId`      | `string`                    | The Trigger.dev run ID                                   |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend                            |
+| `turn`       | `number`                    | Turn number (**`"turn"` phase only**)                    |
+| `messages`   | `ModelMessage[]`            | Accumulated model messages (**`"turn"` phase only**)     |
+| `uiMessages` | `UIMessage[]`               | Accumulated UI messages (**`"turn"` phase only**)        |
+
+## ChatWriter
+
+A stream writer passed to lifecycle callbacks. Write custom `UIMessageChunk` parts (e.g. `data-*` parts) to the chat stream.
+
+The writer is lazy — no stream is opened unless you call `write()` or `merge()`, so there's zero overhead for callbacks that don't use it.
+
+| Method          | Type                                               | Description                                        |
+| --------------- | -------------------------------------------------- | -------------------------------------------------- |
+| `write(part)`   | `(part: UIMessageChunk) => void`                   | Write a single chunk to the chat stream            |
+| `merge(stream)` | `(stream: ReadableStream<UIMessageChunk>) => void` | Merge another stream's chunks into the chat stream |
+
+```ts
+onTurnStart: async ({ writer }) => {
+  // Write a custom data part — render it on the frontend
+  writer.write({ type: "data-status", data: { loading: true } });
+},
+onBeforeTurnComplete: async ({ writer, usage }) => {
+  // Stream is still open — these chunks arrive before the turn ends
+  writer.write({ type: "data-usage", data: { tokens: usage?.totalTokens } });
+},
+```
+
+## ChatAgentCompactionOptions
+
+Options for the `compaction` field on `chat.agent()`. See [Compaction](/ai-chat/compaction) for usage guide.
+
+| Option                 | Type                                                                         | Required | Description                                                                  |
+| ---------------------- | ---------------------------------------------------------------------------- | -------- | ---------------------------------------------------------------------------- |
+| `shouldCompact`        | `(event: ShouldCompactEvent) => boolean \| Promise<boolean>`                 | Yes      | Decide whether to compact. Return `true` to trigger                          |
+| `summarize`            | `(event: SummarizeEvent) => Promise<string>`                                 | Yes      | Generate a summary from the current messages                                 |
+| `compactUIMessages`    | `(event: CompactMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>`       | No       | Transform UI messages after compaction. Default: preserve all                |
+| `compactModelMessages` | `(event: CompactMessagesEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No       | Transform model messages after compaction. Default: replace all with summary |
+
+## CompactMessagesEvent
+
+Passed to `compactUIMessages` and `compactModelMessages` callbacks.
+
+| Field           | Type                 | Description                                          |
+| --------------- | -------------------- | ---------------------------------------------------- |
+| `summary`       | `string`             | The generated summary text                           |
+| `uiMessages`    | `UIMessage[]`        | Current UI messages (full conversation)              |
+| `modelMessages` | `ModelMessage[]`     | Current model messages (full conversation)           |
+| `chatId`        | `string`             | Chat session ID                                      |
+| `turn`          | `number`             | Current turn (0-indexed)                             |
+| `clientData`    | `unknown`            | Custom data from the frontend                        |
+| `source`        | `"inner" \| "outer"` | Whether compaction is between steps or between turns |
+
+## CompactedEvent
+
+Passed to the `onCompacted` callback.
+
+| Field          | Type                        | Description                                       |
+| -------------- | --------------------------- | ------------------------------------------------- |
+| `ctx`          | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx) |
+| `summary`      | `string`                    | The generated summary text                        |
+| `messages`     | `ModelMessage[]`            | Messages that were compacted (pre-compaction)     |
+| `messageCount` | `number`                    | Number of messages before compaction              |
+| `usage`        | `LanguageModelUsage`        | Token usage from the triggering step/turn         |
+| `totalTokens`  | `number \| undefined`       | Total token count that triggered compaction       |
+| `inputTokens`  | `number \| undefined`       | Input token count                                 |
+| `outputTokens` | `number \| undefined`       | Output token count                                |
+| `stepNumber`   | `number`                    | Step number (-1 for outer loop)                   |
+| `chatId`       | `string \| undefined`       | Chat session ID                                   |
+| `turn`         | `number \| undefined`       | Current turn                                      |
+| `writer`       | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks during compaction |
+
+## PendingMessagesOptions
+
+Options for the `pendingMessages` field. See [Pending Messages](/ai-chat/pending-messages) for usage guide.
+
+| Option         | Type                                                                              | Required | Description                                                                               |
+| -------------- | --------------------------------------------------------------------------------- | -------- | ----------------------------------------------------------------------------------------- |
+| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean \| Promise<boolean>`               | No       | Decide whether to inject the batch between tool-call steps. If absent, no injection.      |
+| `prepare`      | `(event: PendingMessagesBatchEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No       | Transform the batch before injection. Default: convert each via `convertToModelMessages`. |
+| `onReceived`   | `(event: PendingMessageReceivedEvent) => void \| Promise<void>`                   | No       | Called when a message arrives during streaming (per-message).                             |
+| `onInjected`   | `(event: PendingMessagesInjectedEvent) => void \| Promise<void>`                  | No       | Called after a batch is injected via prepareStep.                                         |
+
+## PendingMessagesBatchEvent
+
+Passed to `shouldInject` and `prepare` callbacks.
+
+| Field           | Type               | Description                   |
+| --------------- | ------------------ | ----------------------------- |
+| `messages`      | `UIMessage[]`      | All pending messages (batch)  |
+| `modelMessages` | `ModelMessage[]`   | Current conversation          |
+| `steps`         | `CompactionStep[]` | Completed steps so far        |
+| `stepNumber`    | `number`           | Current step (0-indexed)      |
+| `chatId`        | `string`           | Chat session ID               |
+| `turn`          | `number`           | Current turn (0-indexed)      |
+| `clientData`    | `unknown`          | Custom data from the frontend |
+
+## PendingMessagesInjectedEvent
+
+Passed to `onInjected` callback.
+
+| Field                   | Type             | Description                           |
+| ----------------------- | ---------------- | ------------------------------------- |
+| `messages`              | `UIMessage[]`    | All injected UI messages              |
+| `injectedModelMessages` | `ModelMessage[]` | The model messages that were injected |
+| `chatId`                | `string`         | Chat session ID                       |
+| `turn`                  | `number`         | Current turn                          |
+| `stepNumber`            | `number`         | Step where injection occurred         |
+
+## UsePendingMessagesReturn
+
+Return value of `usePendingMessages` hook. See [Pending Messages — Frontend](/ai-chat/pending-messages#frontend-usependingmessages-hook).
+
+| Property/Method         | Type                                   | Description                                                     |
+| ----------------------- | -------------------------------------- | --------------------------------------------------------------- |
+| `pending`               | `PendingMessage[]`                     | Current pending messages with mode and injection status         |
+| `steer`                 | `(text: string) => void`               | Send a steering message (or normal message when not streaming)  |
+| `queue`                 | `(text: string) => void`               | Queue for next turn (or send normally when not streaming)       |
+| `promoteToSteering`     | `(id: string) => void`                 | Convert a queued message to steering                            |
+| `isInjectionPoint`      | `(part: unknown) => boolean`           | Check if an assistant message part is an injection confirmation |
+| `getInjectedMessageIds` | `(part: unknown) => string[]`          | Get message IDs from an injection point                         |
+| `getInjectedMessages`   | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point                |
+
+## ChatSessionOptions
+
+Options for `chat.createSession()`.
+
+| Option                 | Type          | Default  | Description                         |
+| ---------------------- | ------------- | -------- | ----------------------------------- |
+| `signal`               | `AbortSignal` | required | Run-level cancel signal             |
+| `idleTimeoutInSeconds` | `number`      | `30`     | Seconds to stay idle between turns  |
+| `timeout`              | `string`      | `"1h"`   | Duration string for suspend timeout |
+| `maxTurns`             | `number`      | `100`    | Max turns before ending             |
+
+## ChatTurn
+
+Each turn yielded by `chat.createSession()`.
+
+| Field          | Type             | Description                                   |
+| -------------- | ---------------- | --------------------------------------------- |
+| `number`       | `number`         | Turn number (0-indexed)                       |
+| `chatId`       | `string`         | Chat session ID                               |
+| `trigger`      | `string`         | What triggered this turn                      |
+| `clientData`   | `unknown`        | Client data from the transport                |
+| `messages`     | `ModelMessage[]` | Full accumulated model messages               |
+| `uiMessages`   | `UIMessage[]`    | Full accumulated UI messages                  |
+| `signal`       | `AbortSignal`    | Combined stop+cancel signal (fresh each turn) |
+| `stopped`      | `boolean`        | Whether the user stopped generation this turn |
+| `continuation` | `boolean`        | Whether this is a continuation run            |
+
+| Method                  | Returns                           | Description                                                  |
+| ----------------------- | --------------------------------- | ------------------------------------------------------------ |
+| `complete(source)`      | `Promise<UIMessage \| undefined>` | Pipe, capture, accumulate, cleanup, and signal turn-complete |
+| `done()`                | `Promise<void>`                   | Signal turn-complete (when you've piped manually)            |
+| `addResponse(response)` | `Promise<void>`                   | Add response to accumulator manually                         |
+
+## chat namespace
+
+All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
+
+| Method                                      | Description                                                                                                                  |
+| ------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- |
+| `chat.agent(options)`                       | Create a chat agent                                                                                                          |
+| `chat.createSession(payload, options)`      | Create an async iterator for chat turns                                                                                      |
+| `chat.pipe(source, options?)`               | Pipe a stream to the frontend (from anywhere inside a task)                                                                  |
+| `chat.pipeAndCapture(source, options?)`     | Pipe and capture the response `UIMessage`                                                                                    |
+| `chat.writeTurnComplete(options?)`          | Signal the frontend that the current turn is complete                                                                        |
+| `chat.createStopSignal()`                   | Create a managed stop signal wired to the stop input stream                                                                  |
+| `chat.messages`                             | Input stream for incoming messages — use `.waitWithIdleTimeout()`                                                            |
+| `chat.local<T>({ id })`                     | Create a per-run typed local (see [`chat.local`](/ai-chat/chat-local))                             |
+| `chat.createStartSessionAction(taskId, options?)` | Returns a server action that creates a chat Session + triggers the first run + returns a session-scoped PAT. Idempotent on `(env, externalId)`.   |
+| `chat.requestUpgrade()`                     | End the current run after this turn so the next message starts on the latest agent version. Server-orchestrated handoff.    |
+| `chat.setTurnTimeout(duration)`             | Override turn timeout at runtime (e.g. `"2h"`)                                                                               |
+| `chat.setTurnTimeoutInSeconds(seconds)`     | Override turn timeout at runtime (in seconds)                                                                                |
+| `chat.setIdleTimeoutInSeconds(seconds)`     | Override idle timeout at runtime                                                                                             |
+| `chat.setUIMessageStreamOptions(options)`   | Override `toUIMessageStream()` options for the current turn                                                                  |
+| `chat.defer(promise)`                       | Run background work in parallel with streaming, awaited before `onTurnComplete`                                              |
+| `chat.isStopped()`                          | Check if the current turn was stopped by the user                                                                            |
+| `chat.cleanupAbortedParts(message)`         | Remove incomplete parts from a stopped response message                                                                      |
+| `chat.response.write(chunk)`                | Write a data part that streams to the frontend AND persists in `onTurnComplete`'s `responseMessage`                          |
+| `chat.stream`                               | Raw chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()`. Chunks are NOT accumulated into the response.   |
+| `chat.history.all()`                        | Read the current accumulated UI messages (returns a copy). See [chat.history](/ai-chat/backend#chat-history)                  |
+| `chat.history.set(messages)`                | Replace all accumulated messages (same as `chat.setMessages()`)                                                              |
+| `chat.history.remove(messageId)`            | Remove a specific message by ID                                                                                              |
+| `chat.history.rollbackTo(messageId)`        | Keep messages up to and including the given ID (undo/rollback)                                                               |
+| `chat.history.replace(messageId, message)`  | Replace a specific message by ID (edit)                                                                                      |
+| `chat.history.slice(start, end?)`           | Keep only messages in the given range                                                                                        |
+| `chat.MessageAccumulator`                   | Class that accumulates conversation messages across turns                                                                    |
+| `chat.withUIMessage(config?)`               | Returns a [ChatBuilder](/ai-chat/types#chatbuilder) with a fixed `UIMessage` subtype. See [Types](/ai-chat/types)            |
+| `chat.withClientData({ schema })`           | Returns a [ChatBuilder](/ai-chat/types#chatbuilder) with a fixed client data schema. See [Types](/ai-chat/types#typed-client-data-with-chatwithclientdata) |
+
+## `chat.withUIMessage`
+
+Returns a [`ChatBuilder`](/ai-chat/types#chatbuilder) with a fixed `UIMessage` subtype. Chain `.withClientData()`, hook methods, and `.agent()`.
+
+```ts
+chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): ChatBuilder<TUIM>;
+```
+
+| Parameter              | Type                               | Description                                                                                                                                           |
+| ---------------------- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `config.streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Optional defaults for `toUIMessageStream()`. Shallow-merged with `uiMessageStreamOptions` on the inner `.agent({ ... })` (agent wins on key conflicts). |
+
+Use this when you need [`InferChatUIMessage`](#inferchatuimessage) / typed `data-*` parts / `InferUITools` to line up across backend hooks and `useChat`. Full guide: [Types](/ai-chat/types).
+
+## `chat.withClientData`
+
+Returns a [`ChatBuilder`](/ai-chat/types#chatbuilder) with a fixed client data schema. All hooks and `run` get typed `clientData` without passing `clientDataSchema` in `.agent()` options.
+
+```ts
+chat.withClientData<TSchema>({ schema: TSchema }): ChatBuilder<UIMessage, TSchema>;
+```
+
+| Parameter | Type         | Description                                        |
+| --------- | ------------ | -------------------------------------------------- |
+| `schema`  | `TaskSchema` | Zod, ArkType, Valibot, or any supported schema lib |
+
+Full guide: [Typed client data](/ai-chat/types#typed-client-data-with-chatwithclientdata).
+
+## `ChatWithUIMessageConfig`
+
+| Field           | Type                               | Description                                                           |
+| --------------- | ---------------------------------- | --------------------------------------------------------------------- |
+| `streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Default `toUIMessageStream()` options for agents created via `.agent()` |
+
+## `InferChatUIMessage`
+
+Type helper: extracts the `UIMessage` subtype from a chat agent’s wire payload.
+
+```ts
+import type { InferChatUIMessage } from "@trigger.dev/sdk/ai";
+// Use the /chat/react re-export when you're already importing other React helpers.
+
+type Msg = InferChatUIMessage<typeof myChat>;
+```
+
+Use with `useChat<Msg>({ transport })` when using [`chat.withUIMessage`](/ai-chat/types). For agents defined with plain `chat.agent()` (no custom generic), this resolves to the base `UIMessage`.
+
+## AI helpers (`ai` from `@trigger.dev/sdk/ai`)
+
+| Export                                                                              | Status         | Description                                                                                                                                                                                |
+| ----------------------------------------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `ai.toolExecute(task)`                                                              | **Preferred**  | Returns the `execute` function for AI SDK `tool()`. Runs the task via `triggerAndSubscribe` and attaches tool/chat metadata (same behavior the deprecated wrapper used internally).        |
+| `ai.tool(task, options?)`                                                           | **Deprecated** | Wraps `tool()` / `dynamicTool()` and the same execute path. Migrate to `tool({ ..., execute: ai.toolExecute(task) })`. See [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools). |
+| `ai.toolCallId`, `ai.chatContext`, `ai.chatContextOrThrow`, `ai.currentToolOptions` | Supported      | Work for any task-backed tool execute path, including `ai.toolExecute`.                                                                                                                    |
+
+## ChatUIMessageStreamOptions
+
+Options for customizing `toUIMessageStream()`. Set as static defaults via `uiMessageStreamOptions` on `chat.agent()`, or override per-turn via `chat.setUIMessageStreamOptions()`. See [Stream options](/ai-chat/backend#stream-options) for usage examples.
+
+Derived from the AI SDK's `UIMessageStreamOptions` with `onFinish` and `originalMessages` omitted (managed internally — `onFinish` for response capture, `originalMessages` for cross-turn message ID reuse).
+
+| Option              | Type                              | Default              | Description                                                                                                                         |
+| ------------------- | --------------------------------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| `onError`           | `(error: unknown) => string`      | Raw error message    | Called on LLM errors and tool execution errors. Return a sanitized string — sent as `{ type: "error", errorText }` to the frontend. |
+| `sendReasoning`     | `boolean`                         | `true`               | Send reasoning parts to the client                                                                                                  |
+| `sendSources`       | `boolean`                         | `false`              | Send source parts to the client                                                                                                     |
+| `sendFinish`        | `boolean`                         | `true`               | Send the finish event. Set to `false` when chaining multiple `streamText` calls.                                                    |
+| `sendStart`         | `boolean`                         | `true`               | Send the message start event. Set to `false` when chaining.                                                                         |
+| `messageMetadata`   | `(options: { part }) => metadata` | —                    | Extract message metadata to send to the client. Called on `start` and `finish` events.                                              |
+| `generateMessageId` | `() => string`                    | AI SDK's `generateId` | Custom message ID generator for response messages (e.g. UUID-v7). IDs are shared between frontend and backend via the stream's `start` chunk. |
+
+## TriggerChatTransport options
+
+Options for the frontend transport constructor and `useTriggerChatTransport` hook.
+
+| Option                 | Type                                                                 | Default                     | Description                                                                 |
+| ---------------------- | -------------------------------------------------------------------- | --------------------------- | --------------------------------------------------------------------------- |
+| `task`                 | `string`                                                             | required                    | Task ID the transport's session is bound to. Threaded into `startSession`'s params. |
+| `accessToken`          | `(params: AccessTokenParams) => string \| Promise<string>`           | required                    | Pure refresh — mints a fresh session-scoped PAT. Called on 401/403. See [callback shape](#accesstoken-callback). |
+| `startSession`         | `(params: StartSessionParams<TClientData>) => Promise<StartSessionResult>` | optional                    | Creates the chat Session and returns the session-scoped PAT. Called on `transport.preload(chatId)` and lazily on the first `sendMessage` for any chatId without a cached PAT. See [callback shape](#startsession-callback). |
+| `baseURL`              | `string \| (ctx: { endpoint: "in" \| "out"; chatId: string }) => string` | `"https://api.trigger.dev"` | API base URL. String form applies to every endpoint; function form lets you pick per endpoint — e.g. route `.in/append` through a trusted edge proxy while keeping `.out` SSE direct (see [Trusted edge signals](/ai-chat/patterns/trusted-edge-signals)). |
+| `fetch`                | `(url: string, init: RequestInit, ctx: { endpoint: "in" \| "out"; chatId: string }) => Promise<Response>` | —                           | Per-request fetch override. Invoked for both `.in/append` POSTs and the `.out` SSE GET. Use for header injection (tracing), custom retries, or proxy rewrites beyond what `baseURL` can express. |
+| `headers`              | `Record<string, string>`                                             | —                           | Extra headers for API requests                                              |
+| `streamTimeoutSeconds` | `number`                                                             | `120`                       | How long to wait for stream data                                            |
+| `clientData`           | Typed by `clientDataSchema`                                          | —                           | Default client data merged into per-turn `metadata` and threaded through `startSession`'s params (so the first run's `payload.metadata` matches per-turn `metadata`). Live-updated when the option value changes. |
+| `sessions`             | `Record<string, ChatSession>`                                        | —                           | Restore sessions from storage. See [ChatSession](#chatsession).             |
+| `onSessionChange`      | `(chatId, session \| null) => void`                                  | —                           | Fires when session state changes. `session` is the full `ChatSession` or `null` when the run ends. |
+| `multiTab`             | `boolean`                                                            | `false`                     | Enable multi-tab claim coordination via `BroadcastChannel`. See [Frontend → multi-tab](/ai-chat/frontend#multi-tab-coordination). |
+| `watch`                | `boolean`                                                            | `false`                     | Read-only watcher mode — keep the SSE subscription open across `trigger:turn-complete` so a viewer sees turns 2, 3, … through one long-lived stream. |
+| `headStart`            | `string`                                                             | —                           | URL of a [`chat.headStart`](/ai-chat/fast-starts#head-start) route handler. When set, the FIRST message of a brand-new chat POSTs to this URL so step 1's LLM call runs in your warm process while the agent run boots in parallel. Subsequent turns bypass it. |
+
+### `accessToken` callback
+
+The transport invokes `accessToken` whenever it needs a *fresh* session-scoped PAT — initial use after no PAT is cached, or after a 401/403 from any session-PAT-authed request. The callback's job is to **return a token, not to start a run.**
+
+`AccessTokenParams`:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| `chatId` | `string` | The conversation id. |
+
+Customer implementation typically wraps `auth.createPublicToken` server-side:
+
+```ts
+"use server";
+import { auth } from "@trigger.dev/sdk";
+
+export async function mintChatAccessToken(chatId: string) {
+  return auth.createPublicToken({
+    scopes: { read: { sessions: chatId }, write: { sessions: chatId } },
+    expirationTime: "1h",
+  });
+}
+```
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+});
+```
+
+### `startSession` callback
+
+The transport invokes `startSession` when it needs to *create* the session — on `transport.preload(chatId)`, and lazily on the first `sendMessage` for any chatId without a cached PAT. Concurrent and repeat calls dedupe via an in-flight promise, and the customer's wrapped helper is idempotent on `(env, externalId)` so two tabs / two `preload` calls converge on the same session.
+
+`StartSessionParams<TClientData>`:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| `taskId` | `string` | The transport's `task` value. |
+| `chatId` | `string` | The conversation id (the session's `externalId`). |
+| `clientData` | `TClientData` | The transport's current `clientData` option. Pass through to `triggerConfig.basePayload.metadata` so the first run's `payload.metadata` matches per-turn `metadata`. |
+
+Customer implementation wraps `chat.createStartSessionAction(taskId)`:
+
+```ts
+"use server";
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const startChatSession = chat.createStartSessionAction("my-chat");
+```
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+});
+```
+
+`startSession` is optional only when the customer fully manages the session lifecycle externally (e.g. by hydrating `sessions: { [chatId]: ... }` and never calling `preload`). Most customers should provide it.
+
+### multiTab
+
+Enable multi-tab coordination. When `true`, only one browser tab can send messages to a given chatId at a time. Other tabs enter read-only mode with real-time message updates via `BroadcastChannel`.
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken,
+  multiTab: true,
+});
+```
+
+No-op when `BroadcastChannel` is unavailable (SSR, Node.js). See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination).
+
+### Trigger configuration
+
+Trigger config (machine, queue, tags, maxAttempts, idleTimeoutInSeconds) lives server-side in `chat.createStartSessionAction(taskId, options?)`. The transport doesn't accept these options directly — pass them when wrapping the action:
+
+```ts
+"use server";
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const startChatSession = chat.createStartSessionAction("my-chat", {
+  triggerConfig: {
+    machine: "small-1x",
+    queue: "chat-queue",
+    tags: ["user:123"],
+    maxAttempts: 3,
+    idleTimeoutInSeconds: 60,
+  },
+});
+```
+
+A `chat:{chatId}` tag is automatically added to every run.
+
+For per-call values that vary by chatId (e.g. plan-tier-driven machine), accept extra params on the customer's server action and pass them into `chat.createStartSessionAction(...)`'s options at call time.
+
+### transport.stopGeneration()
+
+Stop the current generation for a chat session. Sends a stop signal to the backend task and closes the active SSE connection.
+
+```ts
+transport.stopGeneration(chatId: string): Promise<boolean>
+```
+
+Returns `true` if the stop signal was sent, `false` if there's no active session. Works for both initial connections and reconnected streams (after page refresh with `resume: true`).
+
+Use alongside `useChat`'s `stop()` for a complete stop experience:
+
+```tsx
+const { stop: aiStop } = useChat({ transport });
+
+const stop = useCallback(() => {
+  transport.stopGeneration(chatId);
+  aiStop();
+}, [transport, chatId, aiStop]);
+```
+
+See [Stop generation](/ai-chat/frontend#stop-generation) for full details.
+
+### transport.sendAction()
+
+Send a custom action to the agent. Actions wake the agent from suspension and fire `onAction`. They are not turns — `run()` and turn lifecycle hooks do not fire. If `onAction` returns a `StreamTextResult`, the response is auto-piped to the frontend.
+
+```ts
+transport.sendAction(chatId: string, action: unknown): Promise<ReadableStream<UIMessageChunk>>
+```
+
+The action payload is validated against the agent's `actionSchema` on the backend.
+
+```tsx
+// Undo button
+<button onClick={() => transport.sendAction(chatId, { type: "undo" })}>
+  Undo
+</button>
+```
+
+See [Actions](/ai-chat/actions) for backend setup and [Sending actions](/ai-chat/frontend#sending-actions) for frontend usage.
+
+### transport.preload()
+
+Eagerly trigger a run before the first message.
+
+```ts
+transport.preload(chatId, { idleTimeoutInSeconds?: number }): Promise<void>
+```
+
+No-op if a session already exists for this chatId. See [Preload](/ai-chat/fast-starts#preload) for full details.
+
+## useTriggerChatTransport
+
+React hook that creates and memoizes a `TriggerChatTransport` instance. Import from `@trigger.dev/sdk/chat/react`.
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  sessions: savedSessions,
+  onSessionChange: handleSessionChange,
+});
+```
+
+The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
+
+## AgentChat options
+
+Options for the server-side chat client constructor. Import `AgentChat` from `@trigger.dev/sdk/chat`.
+
+| Option                 | Type                                                                 | Default                                | Description                                                                 |
+| ---------------------- | -------------------------------------------------------------------- | -------------------------------------- | --------------------------------------------------------------------------- |
+| `agent`                | `string`                                                             | required                               | Task ID of the chat agent to converse with.                                 |
+| `id`                   | `string`                                                             | `crypto.randomUUID()`                  | Conversation ID. Used as the Session `externalId` and for tagging runs.     |
+| `clientData`           | Typed by `clientDataSchema`                                          | —                                      | Client data included in every request. Same shape as the agent's `clientDataSchema`. |
+| `session`              | `ChatSession`                                                        | —                                      | Restore a previous session (pass `lastEventId` to resume SSE).              |
+| `triggerConfig`        | `Partial<SessionTriggerConfig>`                                      | —                                      | Default trigger config used when starting a new session (machine, tags, etc.). |
+| `streamTimeoutSeconds` | `number`                                                             | `120`                                  | SSE timeout in seconds.                                                     |
+| `onTriggered`          | `(event) => void \| Promise<void>`                                   | —                                      | Fires when a new run is triggered for this session.                         |
+| `onTurnComplete`       | `(event) => void \| Promise<void>`                                   | —                                      | Fires when a turn completes. Persist `event.lastEventId` for stream resumption. |
+| `baseURL`              | `string \| (ctx: { endpoint: "in" \| "out"; chatId: string }) => string` | `apiClientManager.baseURL`             | API base URL. String form applies to every endpoint; function form picks per endpoint. Defaults to whatever `@trigger.dev/sdk` was configured with (typically `TRIGGER_API_URL`). |
+| `fetch`                | `(url: string, init: RequestInit, ctx: { endpoint: "in" \| "out"; chatId: string }) => Promise<Response>` | —                                      | Per-request fetch override. Invoked for both `.in/append` POSTs and the `.out` SSE GET. Use for header injection, custom retries, or proxy rewrites. |
+
+## createStartSessionAction options
+
+Second argument to `chat.createStartSessionAction(taskId, options?)`. Controls how the server-mediated session-create call reaches the trigger.dev API.
+
+| Option           | Type                                                                                 | Default                       | Description                                                                 |
+| ---------------- | ------------------------------------------------------------------------------------ | ----------------------------- | --------------------------------------------------------------------------- |
+| `tokenTTL`       | `string \| number \| Date`                                                           | `"1h"`                        | TTL for the session-scoped public access token returned to the browser.     |
+| `triggerConfig`  | `Partial<SessionTriggerConfig>`                                                      | —                             | Default trigger config (machine, tags, queue, etc.). Per-call config shallow-merges on top. |
+| `baseURL`        | `string \| (ctx: { endpoint: "sessions" \| "auth"; chatId: string }) => string`      | `apiClientManager.baseURL`    | API base URL. `endpoint` is `"sessions"` for `POST /api/v1/sessions` or `"auth"` for `POST /api/v1/auth/jwt/claims` (only fires when `tokenTTL` is set). |
+| `fetch`          | `(url: string, init: RequestInit, ctx: { endpoint: "sessions" \| "auth"; chatId: string }) => Promise<Response>` | —                             | Per-request fetch override. Use to route session-create through a trusted edge proxy so `basePayload.metadata` is rewritten before reaching `api.trigger.dev`. |
+
+## useMultiTabChat
+
+React hook for multi-tab message coordination. Import from `@trigger.dev/sdk/chat/react`.
+
+```tsx
+import { useMultiTabChat } from "@trigger.dev/sdk/chat/react";
+
+const { isReadOnly } = useMultiTabChat(transport, chatId, messages, setMessages);
+```
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `transport` | `TriggerChatTransport` | Transport instance with `multiTab: true` |
+| `chatId` | `string` | The chat session ID |
+| `messages` | `UIMessage[]` | Current messages from `useChat` |
+| `setMessages` | `(messages) => void` | Message setter from `useChat` |
+
+**Returns:** `{ isReadOnly: boolean }` — `true` when another tab is actively sending to this chatId.
+
+The hook handles:
+- Tracking read-only state from the transport's `BroadcastChannel` coordinator
+- Broadcasting messages when this tab is the active sender
+- Receiving messages from other tabs and updating via `setMessages`
+
+See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination).
+
+## ChatSession
+
+Persistable session state for the frontend `TriggerChatTransport` and the server-side `AgentChat`. The underlying Session row is keyed on `chatId` (durable across runs); the persistable shape is just the SSE resume cursor and a refresh token.
+
+| Field | Type | Description |
+| --- | --- | --- |
+| `publicAccessToken` | `string` | Session-scoped JWT (`read:sessions:{chatId} + write:sessions:{chatId}`). Refreshed automatically on 401/403 via the transport's `accessToken` callback. |
+| `lastEventId` | `string \| undefined` | Last SSE event received on `.out`. Used to resume mid-stream after a disconnect. |
+| `isStreaming` | `boolean \| undefined` | Optional. If persisted, `reconnectToStream` uses it as a fast-path short-circuit. If omitted, the server decides via the session's [`X-Session-Settled`](/ai-chat/client-protocol#x-session-settled-fast-close-on-idle-reconnects) response header. |
+
+## ChatInputChunk
+
+The wire shape for records sent on `.in`. Consumed by `chat.agent` internally — you typically don't write these yourself; `transport.sendMessage`, `transport.stopGeneration`, and `transport.sendAction` all serialize into this shape.
+
+```ts
+type ChatInputChunk<TMessage = UIMessage, TMetadata = unknown> =
+  | { kind: "message"; payload: ChatTaskWirePayload<TMessage, TMetadata> }
+  | { kind: "stop"; message?: string };
+```
+
+| Variant | When | Payload |
+| --- | --- | --- |
+| `kind: "message"` | New message, action, approval response, or close | `payload` is a full `ChatTaskWirePayload` — its `trigger` field (`"submit-message"` / `"action"` / `"close"`) determines the agent's dispatch |
+| `kind: "stop"` | Client aborted the active turn | Optional `message` surfaces in the stop handler |
+
+For the raw wire format, see [Client Protocol — ChatInputChunk](/ai-chat/client-protocol#chatinputchunk).
+
+## Session token scopes
+
+Tokens minted for `TriggerChatTransport` and `AgentChat` are session-scoped — keyed on the chat's `externalId` (the `chatId` you assign).
+
+| Scope | Grants |
+| --- | --- |
+| `read:sessions:<chatId>` | Subscribe to `.out`, HEAD probe the stream, retrieve the session row |
+| `write:sessions:<chatId>` | Append to `.in`, close the session, end-and-continue, update metadata |
+
+Tokens are produced by `auth.createPublicToken({ scopes: { read: { sessions: chatId }, write: { sessions: chatId } } })` (used by the customer's `accessToken` server action) or returned automatically from `chat.createStartSessionAction` / `POST /api/v1/sessions`. Either form authorizes both URL forms (`/sessions/{chatId}/...` and `/sessions/session_*/...`) on every read and write route.
+
+## Related
+
+- [Realtime Streams](/tasks/streams) — How streams work under the hood
+- [Using the Vercel AI SDK](/guides/examples/vercel-ai-sdk) — Basic AI SDK usage with Trigger.dev
+- [Realtime React Hooks](/realtime/react-hooks/overview) — Lower-level realtime hooks
+- [Authentication](/realtime/auth) — Public access tokens and trigger tokens
diff --git a/docs/ai-chat/server-chat.mdx b/docs/ai-chat/server-chat.mdx
new file mode 100644
index 00000000000..c2c5928a640
--- /dev/null
+++ b/docs/ai-chat/server-chat.mdx
@@ -0,0 +1,263 @@
+---
+title: "Server-Side Chat"
+sidebarTitle: "Server-Side Chat"
+description: "Use AgentChat to interact with chat agents from server-side code — tasks, webhooks, scripts, or other agents."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+`AgentChat` lets you chat with agents from server-side code. It works inside tasks (agent-to-agent), request handlers, webhook processors, and scripts.
+
+```ts
+import { AgentChat } from "@trigger.dev/sdk/chat";
+
+const chat = new AgentChat({ agent: "my-agent" });
+const stream = await chat.sendMessage("Hello!");
+const text = await stream.text();
+await chat.close();
+```
+
+## Type-safe client data
+
+Pass `typeof yourAgent` as a type parameter and `clientData` is automatically typed from the agent's `withClientData` schema:
+
+```ts
+import { AgentChat } from "@trigger.dev/sdk/chat";
+import type { myAgent } from "./trigger/my-agent";
+
+const chat = new AgentChat<typeof myAgent>({
+  agent: "my-agent",
+  clientData: { userId: "user_123" }, // ← typed from agent definition
+});
+```
+
+## Conversation lifecycle
+
+Each `AgentChat` instance represents one conversation. The conversation ID is auto-generated or can be set explicitly:
+
+```ts
+// Auto-generated ID
+const chat = new AgentChat({ agent: "my-agent" });
+
+// Explicit ID — useful for persistence or finding the run later
+const chat = new AgentChat({ agent: "my-agent", id: `review-${prNumber}` });
+```
+
+### Sending messages
+
+`sendMessage()` triggers a new run on the first call, then reuses the same run for subsequent messages via input streams:
+
+```ts
+// First message — triggers a new run
+const stream1 = await chat.sendMessage("Review PR #42");
+const review = await stream1.text();
+
+// Follow-up — same run, agent has full context
+const stream2 = await chat.sendMessage("Can you fix the main bug?");
+const fix = await stream2.text();
+```
+
+### Preloading (optional)
+
+If you want the agent to initialize before the first message (e.g., load data, authenticate), call `preload()`. This is optional — `sendMessage()` triggers the run automatically if needed.
+
+```ts
+await chat.preload();
+// Agent's onPreload hook fires now, before user types anything
+const stream = await chat.sendMessage("Hello");
+```
+
+### Closing
+
+Signal the agent to exit its loop gracefully:
+
+```ts
+await chat.close();
+```
+
+Without `close()`, the agent exits on its own when its idle/suspend timeout expires.
+
+## Reading responses
+
+`sendMessage()` returns a `ChatStream` — a typed wrapper around the response.
+
+### Get the full text
+
+```ts
+const stream = await chat.sendMessage("What is Trigger.dev?");
+const text = await stream.text();
+```
+
+### Get structured results
+
+```ts
+const stream = await chat.sendMessage("Research this topic");
+const { text, toolCalls, toolResults } = await stream.result();
+
+for (const tc of toolCalls) {
+  console.log(`Tool: ${tc.toolName}, Input: ${JSON.stringify(tc.input)}`);
+}
+```
+
+### Stream chunks in real-time
+
+```ts
+const stream = await chat.sendMessage("Write a report");
+
+for await (const chunk of stream) {
+  if (chunk.type === "text-delta") {
+    process.stdout.write(chunk.delta);
+  }
+  if (chunk.type === "tool-input-available") {
+    console.log(`Using tool: ${chunk.toolName}`);
+  }
+}
+```
+
+## Stateless request handlers
+
+In a stateless environment (HTTP handler, serverless function), you need to persist and restore the session across requests.
+
+Each chat is backed by a durable Session row that outlives any single run. `AgentChat` exposes the persistable state via `chat.session` (the SSE resume cursor) and surfaces the current run id via the `onTriggered` callback for telemetry / dashboard linking.
+
+```ts
+import { AgentChat } from "@trigger.dev/sdk/chat";
+
+export async function POST(req: Request) {
+  const { chatId, message } = await req.json();
+  const saved = await db.sessions.find({ chatId });
+
+  const chat = new AgentChat({
+    agent: "my-agent",
+    id: chatId,
+    // Restore from previous request — `lastEventId` is the SSE resume
+    // cursor; the underlying Session is keyed on `chatId` so it's
+    // implicit and durable.
+    session: saved ? { lastEventId: saved.lastEventId } : undefined,
+    // Useful for telemetry / dashboard linking. The `runId` is the
+    // current run, which may change across continuations and upgrades.
+    onTriggered: async ({ runId }) => {
+      await db.sessions.upsert({ chatId, runId });
+    },
+    // Persist after each turn for stream resumption
+    onTurnComplete: async ({ lastEventId }) => {
+      await db.sessions.update({ chatId, lastEventId });
+    },
+  });
+
+  const stream = await chat.sendMessage(message);
+  const text = await stream.text();
+
+  return Response.json({ text });
+}
+```
+
+<Info>
+  The Session row is the run manager — a chat that was active yesterday
+  resumes against the same chatId today, even if the original run has
+  long since exited. `AgentChat` (server-side) and `TriggerChatTransport`
+  (browser) both rely on this: send a new message and the server
+  triggers a fresh continuation run on the same session, carrying the
+  conversation forward without losing history or identity.
+</Info>
+
+## Sub-agent tool pattern
+
+`AgentChat` can be used inside an AI SDK tool to delegate work to a durable sub-agent. The sub-agent's response streams as preliminary tool results:
+
+```ts
+import { tool } from "ai";
+import { AgentChat } from "@trigger.dev/sdk/chat";
+import { z } from "zod";
+
+const researchTool = tool({
+  description: "Delegate research to a specialist agent.",
+  inputSchema: z.object({ topic: z.string() }),
+  execute: async function* ({ topic }, { abortSignal }) {
+    const chat = new AgentChat({ agent: "research-agent" });
+    const stream = await chat.sendMessage(topic, { abortSignal });
+    yield* stream.messages();
+    await chat.close();
+  },
+  toModelOutput: ({ output: message }) => {
+    const lastText = message?.parts?.findLast(
+      (p: { type: string }) => p.type === "text"
+    ) as { text?: string } | undefined;
+    return { type: "text", value: lastText?.text ?? "Done." };
+  },
+});
+```
+
+This supports single-turn delegation, multi-turn LLM-driven conversations with persistent sub-agents, and cross-turn state that survives snapshot/restore.
+
+See the [Sub-Agents guide](/ai-chat/patterns/sub-agents) for the full pattern including multi-turn conversations, cleanup, and what the frontend sees.
+
+## Additional methods
+
+### Steering
+
+Send a message during an active stream without interrupting it:
+
+```ts
+await chat.steer("Focus on security issues specifically");
+```
+
+### Stop generation
+
+Abort the current `streamText` call without ending the run:
+
+```ts
+await chat.stop();
+```
+
+### Raw messages
+
+For full control over the UIMessage shape:
+
+```ts
+const rawStream = await chat.sendRaw([
+  {
+    id: "msg-1",
+    role: "user",
+    parts: [
+      { type: "text", text: "Hello" },
+      { type: "file", url: "https://...", mediaType: "image/png" },
+    ],
+  },
+]);
+```
+
+### Reconnect
+
+Resume a stream subscription after a disconnect:
+
+```ts
+const stream = await chat.reconnect();
+```
+
+## AgentChat options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `agent` | `string` | required | The agent task ID to trigger |
+| `id` | `string` | `crypto.randomUUID()` | Conversation ID for tagging and correlation |
+| `clientData` | typed from agent | `undefined` | Client data included in every request |
+| `session` | `ChatSession` (`{ lastEventId?: string }`) | `undefined` | Restore a previous session's SSE resume cursor. The Session row itself is keyed on `chatId` (durable) — no other state to thread. |
+| `onTriggered` | `(event) => void` | `undefined` | Called when a new run is created |
+| `onTurnComplete` | `(event) => void` | `undefined` | Called when a turn's stream ends |
+| `streamTimeoutSeconds` | `number` | `120` | SSE timeout in seconds |
+| `triggerConfig` | `SessionTriggerConfig` | `undefined` | Tags, queue, machine, `maxAttempts`, `idleTimeoutInSeconds`, `basePayload` — folded into `sessions.start({...})` |
+| `baseURL` | `string \| (ctx: { endpoint: "in" \| "out"; chatId: string }) => string` | `apiClientManager.baseURL` | API base URL. String form applies to every endpoint; function form picks per endpoint — useful for routing `.in/append` through an edge proxy while keeping `.out` SSE direct. Defaults to whatever `@trigger.dev/sdk` was configured with (typically `TRIGGER_API_URL`). |
+| `fetch` | `(url: string, init: RequestInit, ctx: { endpoint: "in" \| "out"; chatId: string }) => Promise<Response>` | `undefined` | Per-request fetch override. Invoked for both `.in/append` POSTs and the `.out` SSE GET. Use for header injection, custom retries, or proxy rewrites. |
+
+## ChatStream methods
+
+| Method | Returns | Description |
+|---|---|---|
+| `text()` | `Promise<string>` | Consume stream, return accumulated text |
+| `result()` | `Promise<ChatStreamResult>` | Consume stream, return `{ text, toolCalls, toolResults }` |
+| `messages()` | `AsyncGenerator<UIMessage>` | Yield accumulated UIMessage snapshots (sub-agent pattern) |
+| `[Symbol.asyncIterator]` | `UIMessageChunk` | Iterate over typed stream chunks |
+| `.stream` | `ReadableStream<UIMessageChunk>` | Raw stream for AI SDK utilities |
diff --git a/docs/ai-chat/sessions.mdx b/docs/ai-chat/sessions.mdx
new file mode 100644
index 00000000000..9e03279b218
--- /dev/null
+++ b/docs/ai-chat/sessions.mdx
@@ -0,0 +1,263 @@
+---
+title: "Sessions"
+sidebarTitle: "Sessions"
+description: "The durable, task-bound, bi-directional I/O primitive that backs chat.agent — sessions.list / open / start / close plus the SessionHandle (in/out) API."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+A **Session** is a durable, task-bound, bi-directional I/O channel pair. It outlives any single run: a Session row is keyed on a stable `externalId` (e.g. `chatId`), holds the conversation's identity across run boundaries, and exposes two realtime streams — `.in` (clients → task) and `.out` (task → clients).
+
+`chat.agent` is built on Sessions. You can also use them directly for any pattern that needs durable bi-directional streaming across runs: long-lived agent inboxes, multi-step approval flows, server-to-server pipelines that survive worker restarts.
+
+## When to reach for Sessions directly
+
+`chat.agent` handles 90% of chat-shaped workloads — message accumulation, the turn loop, stop signals, lifecycle hooks. Use the raw `sessions` API when you need any of:
+
+- **Non-chat conversational state**: an agent inbox where each "turn" is a webhook event rather than a UI message.
+- **Server-to-server bi-directional streaming** where an external service produces records the task consumes (and vice-versa) over the same durable channel.
+- **A custom turn loop** where the agent abstraction doesn't fit but you still want session-survival across runs.
+
+For chat use cases, prefer [`chat.agent`](/ai-chat/backend#chat-agent) or [`chat.createSession`](/ai-chat/backend#chat-createsession).
+
+## `sessions` namespace
+
+```ts
+import { sessions } from "@trigger.dev/sdk";
+```
+
+### `sessions.start(body, requestOptions?)`
+
+Atomically create a Session row and trigger its first run. Idempotent on `(env, externalId)` — two concurrent calls with the same `externalId` converge to one session.
+
+```ts
+const { id, runId, publicAccessToken, isCached } = await sessions.start({
+  type: "chat.agent",
+  externalId: chatId,
+  taskIdentifier: "my-chat",
+  triggerConfig: {
+    tags: [`chat:${chatId}`],
+    basePayload: { /* whatever your task's payload shape is */ },
+  },
+});
+```
+
+| Field | Type | Notes |
+|---|---|---|
+| `type` | `string` | Free-form discriminator. `chat.agent` uses `"chat.agent"`. |
+| `externalId` | `string?` | Your stable identity. Cannot start with `session_` (reserved). |
+| `taskIdentifier` | `string` | Task this session triggers runs against. |
+| `triggerConfig` | `SessionTriggerConfig` | Trigger options applied to every run: `tags`, `queue`, `machine`, `maxAttempts`, `idleTimeoutInSeconds`, `basePayload`. |
+| `tags` | `string[]?` | Up to 10 tags on the Session row (separate from `triggerConfig.tags`). |
+| `metadata` | `Record<string, unknown>?` | Arbitrary JSON. |
+| `expiresAt` | `Date?` | Hard retention deadline. |
+
+Returns `CreatedSessionResponseBody`:
+
+| Field | Type | Notes |
+|---|---|---|
+| `id` | `string` | Server-assigned `session_*` friendlyId. |
+| `runId` | `string` | The first run created alongside the session. |
+| `publicAccessToken` | `string` | Session-scoped PAT (`read:sessions:{id} + write:sessions:{id}`). |
+| `isCached` | `boolean` | `true` if the session already existed (idempotent upsert). |
+
+### `sessions.retrieve(idOrExternalId, requestOptions?)`
+
+Retrieve a Session by either its server-assigned `session_*` id or your user-supplied `externalId`. The server disambiguates via the `session_` prefix.
+
+```ts
+const session = await sessions.retrieve(chatId);
+console.log(session.currentRunId, session.tags, session.closedAt);
+```
+
+### `sessions.update(idOrExternalId, body, requestOptions?)`
+
+Mutate `tags`, `metadata`, or `externalId` on an existing Session. Pass `externalId: null` to explicitly clear it.
+
+### `sessions.close(idOrExternalId, body?, requestOptions?)`
+
+Mark a Session as closed. Terminal and idempotent. The optional `reason` is stored on the row.
+
+```ts
+await sessions.close(chatId, { reason: "user signed out" });
+```
+
+### `sessions.list(options?, requestOptions?)`
+
+Cursor-paginated list of Sessions in the current environment. Returns a `CursorPagePromise` you can iterate with `for await`.
+
+```ts
+for await (const s of sessions.list({
+  type: "chat.agent",
+  tag: `user:${userId}`,
+  status: "ACTIVE",
+  limit: 50,
+})) {
+  console.log(s.id, s.externalId, s.createdAt);
+}
+```
+
+| Filter | Type | Notes |
+|---|---|---|
+| `type` | `string \| string[]` | e.g. `"chat.agent"` |
+| `tag` | `string \| string[]` | Matches `triggerConfig.tags` |
+| `taskIdentifier` | `string \| string[]` | Filter by task |
+| `externalId` | `string` | Exact match |
+| `status` | `"ACTIVE" \| "CLOSED" \| "EXPIRED"` | Lifecycle state |
+| `period` / `from` / `to` | window | Time-range filter |
+| `limit` / `after` / `before` | cursor | Pagination (1–100 per page; default 20) |
+
+### `sessions.open(idOrExternalId)`
+
+Open a lightweight `SessionHandle` to the realtime channels. Does **not** hit the network — each handle method calls the corresponding endpoint lazily.
+
+```ts
+const session = sessions.open(chatId);
+await session.out.append({ kind: "message", text: "hello" });
+const next = await session.in.once<MyEvent>({ timeoutMs: 30_000 });
+```
+
+## `SessionHandle`
+
+```ts
+class SessionHandle {
+  readonly id: string;
+  readonly in: SessionInputChannel;
+  readonly out: SessionOutputChannel;
+}
+```
+
+The two channels mirror the producer/consumer pair in `streams.define` (out) and `streams.input` (in), but are **session-scoped** rather than run-scoped — they survive across run boundaries.
+
+## `session.out` — task → clients
+
+The output channel. The task writes; external clients (browser, server action, another task) read via SSE.
+
+### `out.append(value, options?)`
+
+Append a single record. Routes through `writer` internally so SSE consumers see the same parsed-object shape on every event.
+
+### `out.pipe(stream, options?)`
+
+Pipe an `AsyncIterable` or `ReadableStream` directly to S2 (the durable backing store). Returns `{ stream, waitUntilComplete }`.
+
+### `out.writer({ execute, ... })`
+
+Imperative writer. `execute({ write, merge })` runs against an in-memory queue whose records are piped to S2.
+
+```ts
+session.out.writer<MyChunk>({
+  execute: ({ write }) => {
+    write({ type: "text", text: "hi" });
+    write({ type: "text", text: " there" });
+  },
+});
+```
+
+### `out.read(options?)`
+
+Subscribe to SSE records on `.out`. Returns an async-iterable stream with auto-retry and `Last-Event-ID` resume.
+
+```ts
+const stream = await session.out.read<MyChunk>({
+  signal: AbortSignal.timeout(30_000),
+  lastEventId: lastSeenSeqNum,
+});
+for await (const chunk of stream) {
+  // ...
+}
+```
+
+### `out.writeControl(subtype, extraHeaders?)`
+
+Write a Trigger control record. Carries a `trigger-control` header valued with `subtype` (e.g. `turn-complete`, `upgrade-required`); the body is empty. The SDK transport filters control records out of the consumer-facing chunk stream — readers route them via `onControl` instead.
+
+Returns `{ lastEventId }` — useful for trim chains.
+
+### `out.trimTo(earliestSeqNum)`
+
+Append an S2 `trim` command. Records with `seq_num < earliestSeqNum` are eventually deleted. Idempotent and monotonic. `chat.agent` uses this to keep `session.out` bounded to roughly one turn at steady state.
+
+## `session.in` — clients → task
+
+The input channel. External clients call `send`; the task consumes via `on` / `once` / `peek` / `wait` / `waitWithIdleTimeout`.
+
+### `in.send(value, requestOptions?)`
+
+Append a single record. Called from outside the task (browser, server action, another task).
+
+```ts
+const session = sessions.open(chatId);
+await session.in.send({ kind: "user-event", payload: { ... } });
+```
+
+### `in.on(handler)`
+
+Register a handler that fires for every record landing on `.in`. Buffered records flush on attach. Returns `{ off }`.
+
+### `in.once(options?)`
+
+Wait for the next record without suspending the run. `{ ok: true, output }` or `{ ok: false, error }` on timeout. Chain `.unwrap()` to get the data directly.
+
+```ts
+const result = await session.in.once<MyEvent>({ timeoutMs: 5_000 });
+if (result.ok) handle(result.output);
+```
+
+### `in.peek()`
+
+Non-blocking peek at the head of the `.in` buffer.
+
+### `in.wait(options?)`
+
+Suspend the current run until the next record arrives — frees compute while blocked. Only callable from inside `task.run()`.
+
+```ts
+const next = await session.in.wait<MyEvent>({ timeout: "1h" });
+```
+
+### `in.waitWithIdleTimeout({ idleTimeoutInSeconds, timeout, ... })`
+
+Hybrid: stay warm for `idleTimeoutInSeconds`, then suspend via `wait` if nothing arrives. `chat.agent`'s turn loop uses this to balance responsiveness and cost.
+
+```ts
+const next = await session.in.waitWithIdleTimeout<MyEvent>({
+  idleTimeoutInSeconds: 30,
+  timeout: "1h",
+  onSuspend: () => { /* persist before suspending */ },
+  onResume: () => { /* re-hydrate after resume */ },
+});
+```
+
+### `in.lastDispatchedSeqNum()`
+
+The highest S2 `seq_num` this channel has delivered to a consumer. Used by `chat.agent` to persist a resume cursor on each `turn-complete` so the next worker boot subscribes past already-processed records.
+
+## Authorization
+
+Browser and server-side clients use a session-scoped Public Access Token:
+
+```ts
+import { auth } from "@trigger.dev/sdk";
+
+const pat = await auth.createPublicToken({
+  scopes: {
+    read: { sessions: chatId },
+    write: { sessions: chatId },
+  },
+  expirationTime: "1h",
+});
+```
+
+Tokens authorize **both** URL forms: `/sessions/{externalId}/...` and `/sessions/session_*/...`.
+
+For the `chat.agent` transport, `auth.createPublicToken` is wrapped by `accessToken` in `useTriggerChatTransport`; for direct session access from your server, mint a token per request just like any other realtime resource.
+
+## See also
+
+- [How it works](/ai-chat/how-it-works) — How `chat.agent` builds on Sessions.
+- [Backend](/ai-chat/backend) — `chat.agent` / `chat.createSession` / raw `task()` with chat primitives.
+- [Client Protocol](/ai-chat/client-protocol) — The wire-level view of `.in/append` and `.out` SSE.
+- [Persistence and replay](/ai-chat/patterns/persistence-and-replay) — How tails are read at boot.
diff --git a/docs/ai-chat/testing.mdx b/docs/ai-chat/testing.mdx
new file mode 100644
index 00000000000..2a655b261ab
--- /dev/null
+++ b/docs/ai-chat/testing.mdx
@@ -0,0 +1,682 @@
+---
+title: "Testing"
+sidebarTitle: "Testing"
+description: "Drive a chat.agent through real turns in unit tests — no network, no task runtime, no mocking the SDK."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## Overview
+
+`@trigger.dev/sdk/ai/test` exports `mockChatAgent`, an offline harness that runs your `chat.agent` definition's `run()` function inside an in-memory task runtime. You send messages, actions, and stop signals through driver methods and assert against the chunks the agent emits.
+
+Under the hood the harness drives the agent's backing Session channels — `.in` receives the records your `sendMessage` / `sendStop` / `sendAction` produce, `.out` captures the chunks the agent emits. The harness API itself is session-agnostic; you don't need to manage `sessionId` in tests.
+
+The harness exercises the real turn loop, lifecycle hooks, validation, hydration, and action routing — only the language model and the surrounding Trigger.dev runtime are replaced. Pair it with [`MockLanguageModelV3`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/mock-language-model-v3) and `simulateReadableStream` from `ai` to control LLM responses.
+
+<Note>
+  Import `@trigger.dev/sdk/ai/test` **before** your agent module. It installs the resource catalog so `chat.agent({ id, ... })` can register tasks during testing.
+</Note>
+
+## Quick start
+
+```ts trigger/my-chat.test.ts
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+
+import { describe, expect, it } from "vitest";
+import { simulateReadableStream, stepCountIs } from "ai";
+import { MockLanguageModelV3 } from "ai/test";
+import type { LanguageModelV3StreamPart } from "@ai-sdk/provider";
+import { myChatAgent } from "./my-chat.js";
+
+function modelWithText(text: string) {
+  const chunks: LanguageModelV3StreamPart[] = [
+    { type: "text-start", id: "t1" },
+    { type: "text-delta", id: "t1", delta: text },
+    { type: "text-end", id: "t1" },
+    {
+      type: "finish",
+      finishReason: { unified: "stop", raw: "stop" },
+      usage: {
+        inputTokens: { total: 10, noCache: 10, cacheRead: undefined, cacheWrite: undefined },
+        outputTokens: { total: 10, text: 10, reasoning: undefined },
+      },
+    },
+  ];
+  return new MockLanguageModelV3({
+    doStream: async () => ({ stream: simulateReadableStream({ chunks }) }),
+  });
+}
+
+describe("myChatAgent", () => {
+  it("streams the model's response", async () => {
+    const model = modelWithText("hello world");
+    const harness = mockChatAgent(myChatAgent, {
+      chatId: "test-1",
+      clientData: { model },
+    });
+
+    try {
+      const turn = await harness.sendMessage({
+        id: "u1",
+        role: "user",
+        parts: [{ type: "text", text: "hi" }],
+      });
+
+      const text = turn.chunks
+        .filter((c) => c.type === "text-delta")
+        .map((c) => (c as { delta: string }).delta)
+        .join("");
+      expect(text).toBe("hello world");
+    } finally {
+      await harness.close();
+    }
+  });
+});
+```
+
+The agent reads the mock model from `clientData`:
+
+```ts trigger/my-chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, type LanguageModel } from "ai";
+import { z } from "zod";
+
+type ClientData = { model: LanguageModel };
+
+export const myChatAgent = chat
+  .withClientData({
+    schema: z.custom<ClientData>(
+      (v) => !!v && typeof v === "object" && "model" in (v as object)
+    ),
+  })
+  .agent({
+    id: "my-chat",
+    run: async ({ messages, clientData, signal }) => {
+      return streamText({
+        model: clientData?.model ?? "openai/gpt-4o-mini",
+        messages,
+        abortSignal: signal,
+        stopWhen: stepCountIs(15),
+      });
+    },
+  });
+```
+
+## Setup
+
+### Install dev dependencies
+
+The harness itself ships with `@trigger.dev/sdk`. You need a test runner and the AI SDK's mock model utilities:
+
+```bash
+pnpm add -D vitest ai @ai-sdk/provider
+```
+
+`@ai-sdk/provider` is only needed to type the chunk array as `LanguageModelV3StreamPart[]` — drop it if you cast inline.
+
+### Vitest config
+
+A minimal `vitest.config.ts` for a Trigger.dev project:
+
+```ts
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+  test: {
+    include: ["src/**/*.test.ts"],
+    environment: "node",
+  },
+});
+```
+
+### Import order
+
+`mockChatAgent` must be imported **first** so the resource catalog is installed before any `chat.agent({ id, ... })` registration runs:
+
+```ts
+// ✅ Correct
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+import { myAgent } from "./my-agent.js";
+
+// ❌ Wrong — agent loads before the catalog exists
+import { myAgent } from "./my-agent.js";
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+```
+
+If the agent isn't registered when `mockChatAgent` runs, you'll get:
+
+```
+mockChatAgent: no task registered with id "my-chat".
+```
+
+## Inject the model via clientData
+
+`MockLanguageModelV3` lives in test code and shouldn't leak into your agent module. Pass it through `clientData` so the agent picks it up at runtime in tests, and falls back to a real model in production:
+
+```ts trigger/agent.ts
+type ClientData = { model?: LanguageModel };
+
+export const agent = chat
+  .withClientData({ schema: z.custom<ClientData>() })
+  .agent({
+    id: "agent",
+    run: async ({ messages, clientData, signal }) => {
+      return streamText({
+        model: clientData?.model ?? anthropic("claude-haiku-4-5"),
+        messages,
+        abortSignal: signal,
+        stopWhen: stepCountIs(15),
+      });
+    },
+  });
+```
+
+```ts agent.test.ts
+const harness = mockChatAgent(agent, {
+  chatId: "test",
+  clientData: { model: mockModel },
+});
+```
+
+## Driving turns
+
+The harness exposes one method per chat trigger. Each waits for the next `trigger:turn-complete` chunk before resolving.
+
+### sendMessage
+
+```ts
+const turn = await harness.sendMessage({
+  id: "u1",
+  role: "user",
+  parts: [{ type: "text", text: "hi" }],
+});
+```
+
+Pass an array to send multiple messages at once.
+
+### sendRegenerate
+
+```ts
+const turn = await harness.sendRegenerate(messages);
+```
+
+Equivalent to the frontend's `useChat().regenerate()` — replays a turn with the given message history.
+
+### sendAction
+
+Routes a payload through `actionSchema` + `onAction`. Actions are not turns: only `hydrateMessages` and `onAction` fire on the agent side — no turn lifecycle hooks, no `run()`. The returned `turn.rawChunks` contains whatever `onAction` produced (a streamed model response if it returned a `StreamTextResult`, otherwise just `trigger:turn-complete`):
+
+```ts
+const turn = await harness.sendAction({ type: "undo" });
+```
+
+If the action fails schema validation, an `error` chunk appears in `turn.rawChunks`.
+
+### sendStop
+
+Fires a stop signal. Does **not** wait for a turn — the agent's `signal.aborted` becomes `true` and the current turn unwinds:
+
+```ts
+await harness.sendStop("user requested stop");
+```
+
+### close
+
+Sends a `close` trigger, closes the session's `.in` channel, and aborts the run signal so the task exits cleanly. Always call this at the end of every test:
+
+```ts
+afterEach(() => harness.close());
+// or with a try/finally
+try {
+  await harness.sendMessage(...);
+} finally {
+  await harness.close();
+}
+```
+
+## Inspecting output
+
+Each turn returns:
+
+```ts
+type MockChatAgentTurn = {
+  chunks: UIMessageChunk[];   // text-delta, tool-call, etc.
+  rawChunks: unknown[];       // includes control chunks (turn-complete, errors)
+};
+```
+
+The harness also exposes accumulators across all turns:
+
+```ts
+harness.allChunks;     // every UIMessageChunk since creation
+harness.allRawChunks;  // every raw chunk including control frames
+```
+
+A small helper to assemble streamed text:
+
+```ts
+function collectText(chunks: UIMessageChunk[]): string {
+  return chunks
+    .filter((c) => c.type === "text-delta")
+    .map((c) => (c as { delta: string }).delta)
+    .join("");
+}
+```
+
+## Common patterns
+
+### Asserting hook order
+
+```ts
+const events: string[] = [];
+const agent = chat.agent({
+  id: "hook-order",
+  onChatStart: async () => { events.push("onChatStart"); },
+  onTurnStart: async () => { events.push("onTurnStart"); },
+  onBeforeTurnComplete: async () => { events.push("onBeforeTurnComplete"); },
+  onTurnComplete: async () => { events.push("onTurnComplete"); },
+  run: async ({ messages, signal }) => {
+    events.push("run");
+    return streamText({ model, messages, abortSignal: signal });
+  },
+});
+
+const harness = mockChatAgent(agent, { chatId: "t" });
+await harness.sendMessage(userMessage("hi"));
+
+// onTurnComplete fires after the turn-complete chunk is written —
+// give it a tick before asserting.
+await new Promise((r) => setTimeout(r, 20));
+expect(events).toEqual([
+  "onChatStart",
+  "onTurnStart",
+  "run",
+  "onBeforeTurnComplete",
+  "onTurnComplete",
+]);
+await harness.close();
+```
+
+### Testing onValidateMessages
+
+```ts
+const turn = await harness.sendMessage(userMessage("hello blocked-word"));
+
+// The turn completes with an error chunk, not text
+expect(collectText(turn.chunks)).toBe("");
+expect(turn.rawChunks.some((c) =>
+  typeof c === "object" && c !== null &&
+  (c as { type?: string }).type === "trigger:turn-complete"
+)).toBe(true);
+```
+
+### Testing actions and rejection
+
+```ts
+// Valid action
+await harness.sendAction({ type: "undo" });
+
+// Invalid action — schema validation fails, error chunk emitted
+const turn = await harness.sendAction({ type: "not-a-real-action" });
+const errors = turn.rawChunks.filter((c) =>
+  typeof c === "object" && c !== null &&
+  (c as { type?: string }).type === "error"
+);
+expect(errors.length).toBeGreaterThan(0);
+```
+
+### Multi-turn accumulation
+
+The harness preserves chat history across turns, just like the real runtime:
+
+```ts
+const seenLengths: number[] = [];
+const agent = chat.agent({
+  id: "multi-turn",
+  run: async ({ messages, signal }) => {
+    seenLengths.push(messages.length);
+    return streamText({ model, messages, abortSignal: signal });
+  },
+});
+
+const harness = mockChatAgent(agent, { chatId: "t" });
+await harness.sendMessage(userMessage("first"));
+await harness.sendMessage(userMessage("second"));
+await harness.sendMessage(userMessage("third"));
+
+// Turn 1: 1 message; turn 2: user + assistant + user = 3; turn 3: 5
+expect(seenLengths).toEqual([1, 3, 5]);
+```
+
+### Hydrating from a "database"
+
+Use `clientData` to seed a synthetic prior context for `hydrateMessages`:
+
+```ts
+const hydrated = [
+  { id: "h1", role: "user", parts: [{ type: "text", text: "prior question" }] },
+  { id: "h2", role: "assistant", parts: [{ type: "text", text: "prior answer" }] },
+];
+
+const harness = mockChatAgent(agent, {
+  chatId: "test-hydrate",
+  clientData: { model, hydrated: [...hydrated, userMessage("follow up")] },
+});
+
+await harness.sendMessage(userMessage("follow up"));
+
+// Model should have been called with the hydrated context
+expect(model.doStreamCalls[0]!.prompt.length).toBeGreaterThanOrEqual(3);
+```
+
+The agent reads `clientData.hydrated` inside its `hydrateMessages` hook:
+
+```ts
+hydrateMessages: async ({ clientData, incomingMessages }) => {
+  return clientData?.hydrated ?? incomingMessages;
+},
+```
+
+### Testing continuation runs
+
+A continuation run is a new run picking up an existing session after the prior run ended — `chat.endRun`, waitpoint timeout, or `chat.requestUpgrade`. The contract differs from a fresh run in two ways:
+
+- `onChatStart` does **not** fire (it's once-per-chat — fires only on the chat's very first user message ever).
+- The boot payload arrives with `continuation: true` and no `message`. The SDK waits silently on `session.in` until the next user message arrives.
+
+Pass `continuation: true` to drive this path:
+
+```ts
+const onChatStart = vi.fn();
+const onTurnStart = vi.fn();
+
+const agent = chat.agent({
+  id: "my-chat",
+  onChatStart,
+  onTurnStart,
+  run: async ({ messages, signal }) =>
+    streamText({ model, messages, abortSignal: signal }),
+});
+
+const harness = mockChatAgent(agent, {
+  chatId: "test-continuation",
+  // Auto-selects `mode: "continuation"` — boots with `trigger` omitted
+  // and `continuation: true` in the wire payload, exactly as the server
+  // produces it on continuation runs in production.
+  continuation: true,
+  previousRunId: "run_test_prior",
+});
+
+try {
+  // The SDK enters continuation-wait; sendMessage wakes it and drives turn 0.
+  await harness.sendMessage({
+    id: "u1",
+    role: "user",
+    parts: [{ type: "text", text: "where were we?" }],
+  });
+  await new Promise((r) => setTimeout(r, 20));
+
+  expect(onChatStart).not.toHaveBeenCalled();
+  expect(onTurnStart).toHaveBeenCalledTimes(1);
+} finally {
+  await harness.close();
+}
+```
+
+To simulate an **OOM-retry attempt** (also a continuation by contract — same `onChatStart` skip), bump `ctx.attempt.number`:
+
+```ts
+const harness = mockChatAgent(agent, {
+  chatId: "test-oom-retry",
+  taskContext: {
+    ctx: { attempt: { number: 2, startedAt: new Date(0), status: "EXECUTING" } },
+  },
+});
+
+await harness.sendMessage(/* ... */);
+expect(onChatStart).not.toHaveBeenCalled();
+```
+
+### Testing recovery boot
+
+`onRecoveryBoot` fires when the dead predecessor left state behind — a partial assistant on `session.out`, in-flight users on `session.in`, or both. The harness exposes two seeders to drive this state at boot time:
+
+- `harness.seedSessionOutPartial(message)` — pre-seed a trailing partial assistant. The next boot's replay surfaces it as `event.partialAssistant`.
+- `harness.seedSessionInTail(messages)` — pre-seed user messages on the input tail. The next boot's replay surfaces them as `event.inFlightUsers`.
+
+Combined with `continuation: true`, this drives the full recovery boot path:
+
+```ts
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+
+const onRecoveryBoot = vi.fn(async () => {
+  // accept smart default
+});
+
+const agent = chat.agent({
+  id: "my-chat",
+  onRecoveryBoot,
+  run: async ({ messages, signal }) =>
+    streamText({ model, messages, abortSignal: signal }),
+});
+
+const harness = mockChatAgent(agent, {
+  chatId: "test-recovery",
+  continuation: true,
+  previousRunId: "run_prior",
+});
+
+// Predecessor was answering "write an essay" and got cut off mid-stream
+// after producing some text. Customer then sent a follow-up.
+harness.seedSessionOutPartial({
+  id: "a-orphan",
+  role: "assistant",
+  parts: [{ type: "text", text: "Espresso originated in..." }],
+});
+harness.seedSessionInTail([
+  { id: "u-1", role: "user", parts: [{ type: "text", text: "Write an essay about espresso." }] },
+  { id: "u-2", role: "user", parts: [{ type: "text", text: "keep going" }] },
+]);
+
+await new Promise((r) => setTimeout(r, 50));
+
+expect(onRecoveryBoot).toHaveBeenCalledTimes(1);
+const event = onRecoveryBoot.mock.calls[0]![0];
+expect(event.partialAssistant?.id).toBe("a-orphan");
+expect(event.inFlightUsers).toHaveLength(2);
+```
+
+Use `harness.seedSnapshot({ messages: [...] })` alongside these to model a continuation where settled history exists. See the [Recovery boot](/ai-chat/patterns/recovery-boot) pattern for what each field means and what the smart default does with it.
+
+## Testing against a database
+
+Most agents call into a database from `hydrateMessages` or `onTurnComplete` to load history and persist replies. You shouldn't pass database clients through `clientData` — that's wire-data from the browser. Use **`locals` for dependency injection** instead.
+
+`locals` are task-scoped, server-side only, and untyped to the wire format. The mock harness exposes a `setupLocals` callback that pre-seeds them before the agent's `run()` starts.
+
+### Define a locals key for the dependency
+
+Create a single key per dependency, exported from your project:
+
+```ts db.ts
+import { locals } from "@trigger.dev/sdk";
+import { PrismaClient } from "@prisma/client";
+
+export type Db = PrismaClient;
+export const dbKey = locals.create<Db>("db");
+
+export function getDb(): Db {
+  // Returns the seeded test instance if present, otherwise lazy-creates prod.
+  return locals.get(dbKey) ?? locals.set(dbKey, new PrismaClient());
+}
+```
+
+### Use the dependency from agent hooks
+
+Hooks read from `locals` instead of constructing clients themselves:
+
+```ts trigger/agent.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { getDb } from "../db";
+
+export const agent = chat.agent({
+  id: "agent",
+  hydrateMessages: async ({ chatId }) => {
+    const db = getDb();
+    const row = await db.chat.findUnique({ where: { id: chatId } });
+    return (row?.messages as UIMessage[]) ?? [];
+  },
+  onTurnComplete: async ({ chatId, messages }) => {
+    const db = getDb();
+    await db.chat.upsert({
+      where: { id: chatId },
+      create: { id: chatId, messages },
+      update: { messages },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+  },
+});
+```
+
+### Inject a test database in the harness
+
+`setupLocals` runs *before* the agent starts, so `getDb()` returns the test instance for every hook:
+
+```ts agent.test.ts
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+import { dbKey } from "./db";
+import { agent } from "./trigger/agent";
+
+const harness = mockChatAgent(agent, {
+  chatId: "test-1",
+  setupLocals: ({ set }) => {
+    set(dbKey, testDb); // testDb = your testcontainers Prisma client, sqlite stub, etc.
+  },
+});
+```
+
+### Pick a backing database
+
+You still need to decide what `testDb` actually is:
+
+- **Testcontainers (recommended).** Spin up Postgres in Docker via `@internal/testcontainers` (or `testcontainers` directly), run migrations, hand the resulting `PrismaClient` to `set(dbKey, ...)`. Highest fidelity — catches schema drift, migration bugs, transaction issues.
+- **Embedded SQLite / PGlite.** Fast and no Docker, but a different SQL dialect from production. Fine for hooks that only do simple CRUD; risky for raw SQL or Postgres-specific features.
+- **In-memory fake.** Hand-rolled object with the same interface as your DB module. Fastest, lowest fidelity — works when you only care about whether the agent *called* the right method, not what the DB *did* with it.
+
+### Drizzle, Kysely, etc.
+
+The pattern is the same — replace `PrismaClient` with your client class:
+
+```ts db.ts
+import { drizzle } from "drizzle-orm/node-postgres";
+import { Pool } from "pg";
+
+export type Db = ReturnType<typeof drizzle>;
+export const dbKey = locals.create<Db>("db");
+
+export function getDb(): Db {
+  return locals.get(dbKey) ?? locals.set(
+    dbKey,
+    drizzle(new Pool({ connectionString: process.env.DATABASE_URL })),
+  );
+}
+```
+
+<Tip>
+  The same `setupLocals` pattern works for any server-side dependency: feature flag clients, Stripe SDK, internal HTTP clients, Sentry. Anything you'd normally inject via constructor parameters in a class-based design.
+</Tip>
+
+## API reference
+
+### mockChatAgent(agent, options?)
+
+```ts
+function mockChatAgent(
+  agent: { id: string },
+  options?: MockChatAgentOptions,
+): MockChatAgentHarness;
+```
+
+#### MockChatAgentOptions
+
+| Option           | Type                                                                  | Default       | Description                                                                                            |
+| ---------------- | --------------------------------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------ |
+| `chatId`         | `string`                                                              | `"test-chat"` | Chat session id passed in every wire payload.                                                          |
+| `clientData`     | `unknown`                                                             | `undefined`   | Client-provided data forwarded to `run()` and every hook.                                              |
+| `taskContext`    | `MockTaskContextOptions`                                              | `{}`          | Overrides for the mock `TaskRunContext`. Use `ctx.attempt.number > 1` to simulate an OOM-retry attempt — the agent skips `onChatStart` (same as continuation runs). |
+| `preload`        | `boolean`                                                             | `true`        | Start in preload mode. When `false`, the first `sendMessage()` starts turn 0 directly without preload. Ignored when `mode` is set explicitly.                       |
+| `mode`           | `"preload" \| "submit-message" \| "handover-prepare" \| "continuation"` | derived       | Initial boot trigger. Defaults to `"preload"` (or `"submit-message"` when `preload: false`, or `"continuation"` when `continuation: true`). See [Boot modes](#boot-modes) below. |
+| `continuation`   | `boolean`                                                             | `false`       | Boot as a continuation run (a new run on an existing session). Auto-selects `mode: "continuation"` if `mode` is not set — boots with `trigger` omitted and `continuation: true` in the payload, exercising the SDK's continuation-wait branch. `onChatStart` does NOT fire on continuation runs. |
+| `previousRunId`  | `string`                                                              | `undefined`   | Set `payload.previousRunId` on the initial wire payload. Typically paired with `continuation: true`.   |
+| `snapshot`       | `ChatSnapshotV1`                                                      | `undefined`   | Pre-seed the snapshot the agent reads at run boot (replaces the real S3 GET). Use to drive resume scenarios with prior history. See [Persistence and replay](/ai-chat/patterns/persistence-and-replay) for the production snapshot model. |
+| `setupLocals`    | `({ set }) => void \| Promise<void>`                                   | `undefined`   | Callback invoked before `run()` starts. Use `set(key, value)` to inject server-side dependencies (DB clients, service stubs) that the agent reads via `locals.get()`. |
+
+##### Boot modes
+
+The harness's initial wire payload depends on `mode`:
+
+| Mode                  | Wire payload                                                       | Use when                                                                       |
+| --------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------------------------ |
+| `"preload"`           | `{ trigger: "preload" }`                                           | Simulating a `transport.preload(chatId)` warm-up. Fires `onPreload`, waits for the first `sendMessage()`. |
+| `"submit-message"`    | `{ trigger: "submit-message" }`                                    | Skipping preload — `sendMessage()` drives turn 0 directly.                     |
+| `"continuation"`      | `{ continuation: true }` (no `trigger`)                            | A new run picking up an existing session after the prior run ended (`chat.endRun`, waitpoint timeout, `chat.requestUpgrade`). Mirrors the boot payload the server's `ensureRunForSession` / `swapSessionRun` produce. The SDK enters its continuation-wait branch — `onPreload` and `onChatStart` do NOT fire. |
+| `"handover-prepare"`  | `{ trigger: "handover-prepare" }`                                  | Driving the `chat.handover` wait path. Use `sendHandover()` / `sendHandoverSkip()` to dispatch the handover signal. |
+
+#### MockChatAgentHarness
+
+| Member                                | Description                                                                                            |
+| ------------------------------------- | ------------------------------------------------------------------------------------------------------ |
+| `chatId`                              | The chat session id used by this harness.                                                              |
+| `sendMessage(message)`                | Send a single user message (or tool-approval-responded assistant message). Slim wire: at most ONE message per record. Returns the chunks produced during the resulting turn. |
+| `sendRegenerate()`                    | Send a regenerate-message trigger (no body — slim wire). The agent trims trailing assistant messages from its accumulator and re-runs. |
+| `sendHeadStart({ messages })`         | Drive the head-start path: sends `trigger: "handover-prepare"` with `headStartMessages` carrying the first-turn UIMessage history. Used only at the very first turn before any snapshot exists. |
+| `sendHandover({ partialAssistantMessage, isFinal?, messageId? })` | Dispatch a `handover` signal — only meaningful when started with `mode: "handover-prepare"`. The agent picks up partial assistant messages and continues the turn. |
+| `sendHandoverSkip()`                  | Dispatch a `handover-skip` signal — only meaningful when started with `mode: "handover-prepare"`. The agent exits cleanly without firing turn hooks. |
+| `sendAction(action)`                  | Route a custom action through `actionSchema` + `onAction`.                                             |
+| `sendStop(message?)`                  | Fire a stop signal. Does not wait for the turn — the run's `signal.aborted` becomes `true`.            |
+| `seedSnapshot(snapshot)`              | Pre-seed the snapshot read for the next boot. Effective on the next run boot only.                     |
+| `seedSessionOutTail(chunks?)`         | Pre-seed `session.out` chunks for the next boot's replay. Reduces to settled assistant turns.          |
+| `seedSessionOutPartial(message?)`     | Pre-seed a trailing partial assistant for the next boot's replay. Surfaces as `event.partialAssistant` in `onRecoveryBoot`. |
+| `seedSessionInTail(messages)`         | Pre-seed user messages on `session.in` for the next boot. Surfaces as `event.inFlightUsers` in `onRecoveryBoot`. |
+| `getSnapshot()`                       | The most recently written snapshot, or `undefined` if no snapshot was written.                         |
+| `close()`                             | Send a `close` trigger, abort the signal, wait for `run()` to return. Always call at end of test.      |
+| `allChunks`                           | Every `UIMessageChunk` emitted since the harness was created.                                          |
+| `allRawChunks`                        | Every raw chunk emitted since creation, including control chunks (`trigger:turn-complete`, errors).    |
+
+### runInMockTaskContext
+
+`mockChatAgent` is a higher-level wrapper around `runInMockTaskContext`, re-exported from `@trigger.dev/sdk/ai/test` so you don't need to depend on `@trigger.dev/core` directly. Use it when you need to drive a non-chat task offline:
+
+```ts
+import { runInMockTaskContext } from "@trigger.dev/sdk/ai/test";
+
+await runInMockTaskContext(
+  async ({ inputs, outputs, ctx }) => {
+    setTimeout(() => {
+      inputs.send("chat-messages", { messages: [], chatId: "c1" });
+    }, 0);
+
+    await myTask.fns.run(payload, {
+      ctx,
+      signal: new AbortController().signal,
+    });
+
+    expect(outputs.chunks("chat")).toContainEqual(
+      expect.objectContaining({ type: "text-delta", delta: "hi" }),
+    );
+  },
+  { ctx: { run: { id: "run_abc" } } },
+);
+```
+
+## Limitations
+
+- **No network.** The mock task context replaces realtime streams, run metadata, lifecycle managers, and the runtime. Anything that bypasses these (raw `fetch`, direct DB clients) runs against the real network.
+- **Single agent per process.** The resource catalog is process-global; tests within a file are sequential by default. If you parallelize across files, vitest runs each file in its own worker, which avoids registry collisions.
+- **Time-sensitive hooks.** `onTurnComplete` runs *after* the `turn-complete` chunk is written, so `sendMessage()` resolves before that hook finishes. Add a brief `await new Promise((r) => setTimeout(r, 20))` if you need to assert on hook side-effects.
+- **No real LLM.** The harness does not call providers — you must inject `MockLanguageModelV3` (or another mock) yourself.
diff --git a/docs/ai-chat/types.mdx b/docs/ai-chat/types.mdx
new file mode 100644
index 00000000000..0afbc7b1c8d
--- /dev/null
+++ b/docs/ai-chat/types.mdx
@@ -0,0 +1,236 @@
+---
+title: "Types"
+sidebarTitle: "Types"
+description: "TypeScript types for AI Agents, UI messages, and the frontend transport."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+TypeScript patterns for [AI Chat](/ai-chat/overview). This page covers how to pin a custom AI SDK [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype with `chat.withUIMessage`, fix a typed `clientData` schema with `chat.withClientData`, chain builder-level hooks, and align types on the client.
+
+## Custom `UIMessage` with `chat.withUIMessage`
+
+`chat.agent()` types the wire payload with the base AI SDK `UIMessage`. That is enough for many apps.
+
+When you add **custom `data-*` parts** (via `chat.stream` / `writer`) or a **typed tool map** (e.g. `InferUITools<typeof tools>`), you want a **narrower** `UIMessage` generic so that:
+
+- `onTurnStart`, `onTurnComplete`, and similar hooks expose correctly typed `uiMessages`
+- Stream options like `sendReasoning` align with your message shape
+- The frontend can treat `useChat` messages as the same subtype end-to-end
+
+`chat.withUIMessage<YourUIMessage>(config?)` returns a [ChatBuilder](#chatbuilder) where `.agent(...)` accepts the **same options as** [`chat.agent()`](/ai-chat/backend#chat-agent) but fixes `YourUIMessage` as the UI message type for that chat agent.
+
+### Defining a `UIMessage` subtype
+
+Build the type from AI SDK helpers and your tools object:
+
+```ts
+import type { InferUITools, UIDataTypes, UIMessage } from "ai";
+import { tool, stepCountIs } from "ai";
+import { z } from "zod";
+
+const myTools = {
+  lookup: tool({
+    description: "Look up a record",
+    inputSchema: z.object({ id: z.string() }),
+    execute: async ({ id }) => ({ id, label: "example" }),
+  }),
+};
+
+type MyChatTools = InferUITools<typeof myTools>;
+
+type MyChatDataTypes = UIDataTypes & {
+  "turn-status": { status: "preparing" | "streaming" | "done" };
+};
+
+export type MyChatUIMessage = UIMessage<unknown, MyChatDataTypes, MyChatTools>;
+```
+
+Task-backed tools should use AI SDK [`tool()`](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling) with `execute: ai.toolExecute(schemaTask)` where needed — see [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools).
+
+### Backend: `chat.withUIMessage(...).agent(...)`
+
+Call `withUIMessage` **once**, then chain `.agent({ ... })` instead of `chat.agent({ ... })`. You can also chain `.withClientData()` and hook methods before `.agent()`:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+import { z } from "zod";
+import type { MyChatUIMessage } from "./my-chat-types";
+
+const myTools = {
+  lookup: tool({
+    description: "Look up a record",
+    inputSchema: z.object({ id: z.string() }),
+    execute: async ({ id }) => ({ id, label: "example" }),
+  }),
+};
+
+export const myChat = chat
+  .withUIMessage<MyChatUIMessage>({
+    streamOptions: {
+      sendReasoning: true,
+      onError: (error) =>
+        error instanceof Error ? error.message : "Something went wrong.",
+    },
+  })
+  .withClientData({
+    schema: z.object({ userId: z.string() }),
+  })
+  .agent({
+    id: "my-chat",
+    onTurnStart: async ({ uiMessages, writer }) => {
+      // uiMessages is MyChatUIMessage[] — custom data parts are typed
+      writer.write({
+        type: "data-turn-status",
+        data: { status: "preparing" },
+      });
+    },
+    run: async ({ messages, signal }) => {
+      return streamText({
+        model: anthropic("claude-sonnet-4-5"),
+        messages,
+        tools: myTools,
+        abortSignal: signal,
+        stopWhen: stepCountIs(15),
+      });
+    },
+  });
+```
+
+### Default stream options
+
+The optional `streamOptions` object becomes the **default** [`uiMessageStreamOptions`](/ai-chat/reference#chatagentoptions) for `toUIMessageStream()`.
+
+If you also set `uiMessageStreamOptions` on the inner `.agent({ ... })`, the two objects are **shallow-merged** — keys on the **agent** win on conflicts. Per-turn overrides via [`chat.setUIMessageStreamOptions()`](/ai-chat/backend#stream-options) still apply on top.
+
+### Frontend: `InferChatUIMessage`
+
+Import the helper type and pass it to `useChat` so `messages` and render logic match the backend:
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, type InferChatUIMessage } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "./myChat";
+
+type Msg = InferChatUIMessage<typeof myChat>;
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+
+  const { messages } = useChat<Msg>({ transport });
+
+  return messages.map((m) => (
+    <div key={m.id}>{/* m.parts narrowed for your UIMessage subtype */}</div>
+  ));
+}
+```
+
+You can also import `InferChatUIMessage` from `@trigger.dev/sdk/ai` in non-React modules.
+
+## Typed client data with `chat.withClientData`
+
+`chat.withClientData({ schema })` returns a [ChatBuilder](#chatbuilder) that fixes the client data schema. All hooks and `run` receive typed `clientData` without needing `clientDataSchema` in `.agent()` options.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { z } from "zod";
+
+export const myChat = chat
+  .withClientData({
+    schema: z.object({ userId: z.string(), model: z.string().optional() }),
+  })
+  .agent({
+    id: "my-chat",
+    onPreload: async ({ clientData }) => {
+      // clientData is typed as { userId: string; model?: string }
+      await initUser(clientData.userId);
+    },
+    run: async ({ messages, clientData, signal }) => {
+      return streamText({
+        model: getModel(clientData.model),
+        messages,
+        abortSignal: signal,
+        stopWhen: stepCountIs(15),
+      });
+    },
+  });
+```
+
+## ChatBuilder
+
+Both `chat.withUIMessage()` and `chat.withClientData()` return a **ChatBuilder** — a chainable object that accumulates configuration before creating the agent with `.agent()`.
+
+Builder methods can be chained in any order:
+
+```ts
+export const myChat = chat
+  .withUIMessage<MyChatUIMessage>({
+    streamOptions: { sendReasoning: true },
+  })
+  .withClientData({
+    schema: z.object({ userId: z.string() }),
+  })
+  .onChatSuspend(async ({ ctx }) => {
+    await disposeCodeSandbox(ctx.run.id);
+  })
+  .onChatResume(async ({ ctx }) => {
+    warmCache(ctx.run.id);
+  })
+  .agent({
+    id: "my-chat",
+    run: async ({ messages, signal }) => {
+      return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+    },
+  });
+```
+
+### Builder-level hooks
+
+All [lifecycle hooks](/ai-chat/lifecycle-hooks) can be set on the builder: `onPreload`, `onChatStart`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`, `onCompacted`, `onChatSuspend`, `onChatResume`.
+
+Builder hooks and task-level hooks **coexist**. When both are defined for the same event, the builder hook runs first, then the task hook:
+
+```ts
+chat
+  .withUIMessage<MyChatUIMessage>()
+  .onPreload(async (event) => {
+    // Runs first — shared setup across tasks using this builder
+    await initializeSharedState(event.chatId);
+  })
+  .agent({
+    id: "my-chat",
+    onPreload: async (event) => {
+      // Runs second — task-specific logic
+      await createChatRecord(event.chatId);
+    },
+    run: async ({ messages, signal }) => {
+      return streamText({ model: anthropic("claude-sonnet-4-5"), messages, abortSignal: signal });
+    },
+  });
+```
+
+<Tip>
+  Set types first (`.withUIMessage()`, `.withClientData()`), then hooks. Hook parameters are typed based on the builder's current generics — so hooks registered after `.withClientData()` get typed `clientData`.
+</Tip>
+
+### When plain `chat.agent()` is enough
+
+If you do not rely on custom `UIMessage` generics (only default text, reasoning, and built-in tool UI types), **`chat.agent()` alone is fine** — no need for `withUIMessage`.
+
+## See also
+
+- [Backend — `chat.agent()`](/ai-chat/backend#chat-agent)
+- [Lifecycle hooks](/ai-chat/lifecycle-hooks)
+- [Frontend — transport & `useChat`](/ai-chat/frontend)
+- [API reference — `chat.withUIMessage`](/ai-chat/reference#chat-withuimessage)
+- [API reference — `chat.withClientData`](/ai-chat/reference#chat-withclientdata)
+- [Task-backed AI tools — `ai.toolExecute`](/tasks/schemaTask#task-backed-ai-tools)
diff --git a/docs/ai-chat/upgrade-guide.mdx b/docs/ai-chat/upgrade-guide.mdx
new file mode 100644
index 00000000000..fce47c0aa4b
--- /dev/null
+++ b/docs/ai-chat/upgrade-guide.mdx
@@ -0,0 +1,515 @@
+---
+title: "Upgrade Guide: prerelease → Sessions-as-run-manager"
+sidebarTitle: "Sessions Upgrade Guide"
+description: "Migrating chat.agent code from the prerelease API to the Sessions-as-run-manager release."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+This guide is for customers who tried `chat.agent` during the prerelease period.
+The public surface of `chat.agent({...})`, `useTriggerChatTransport`,
+`AgentChat`, `chat.defer`, and `chat.history` is largely
+unchanged — but the transport's auth callbacks and the server-side helpers
+that feed them were reshaped, so most prerelease apps need a small wiring
+update.
+
+## TL;DR
+
+<CodeGroup>
+
+```ts before.ts
+// Single accessToken callback, dispatches on purpose
+accessToken: async ({ chatId, purpose }) => {
+  if (purpose === "trigger") {
+    return chat.createAccessToken<typeof myChat>("my-chat");
+  }
+  // purpose === "preload" — same call, same trigger token
+  return chat.createAccessToken<typeof myChat>("my-chat");
+};
+```
+
+```ts after.ts
+// Two callbacks: pure refresh + server action that creates the session
+accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+```
+
+</CodeGroup>
+
+What changed:
+
+- `accessToken` is now a **pure session-PAT mint** — called only on 401/403
+  to refresh. It must return a token scoped to the session, not a
+  `trigger:tasks` JWT.
+- `startSession` is a **new callback** that wraps a server action calling
+  `chat.createStartSessionAction(taskId)`. The transport invokes it on
+  `transport.preload(chatId)` and lazily on the first `sendMessage` for
+  any chatId without a cached PAT.
+- `ChatSession` persistable state drops `runId` — store only
+  `{publicAccessToken, lastEventId?}`.
+- Per-call options on `transport.preload(chatId, ...)` are gone. Trigger
+  config (machine, idleTimeoutInSeconds, tags, queue, maxAttempts) lives
+  server-side in `chat.createStartSessionAction(taskId, options)`.
+
+<Note>
+  The architectural shift is that `chat.agent` no longer rolls its own
+  per-run streams. It runs on top of a durable **Session** row that owns
+  its current run, persists across run lifecycles, and orchestrates
+  upgrades server-side. The customer-facing surface is similar; the wire
+  path beneath it changed completely.
+</Note>
+
+## Step 1: Replace your access-token server action with two server actions
+
+The old pattern was a single helper that minted a trigger token:
+
+```ts app/actions.ts (before)
+"use server";
+
+import { chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "@/trigger/chat";
+
+export const getChatToken = () =>
+  chat.createAccessToken<typeof myChat>("my-chat");
+```
+
+Replace with two helpers — one for session creation, one for PAT refresh:
+
+```ts app/actions.ts (after)
+"use server";
+
+import { auth } from "@trigger.dev/sdk";
+import { chat } from "@trigger.dev/sdk/ai";
+
+// Server-side wrapper for session creation. Idempotent on (env, chatId).
+// The customer's server is the only entry point that creates Session rows;
+// the browser never holds a `trigger:tasks` JWT.
+export const startChatSession = chat.createStartSessionAction("my-chat");
+
+// Pure session-PAT mint for the transport's 401/403 retry path.
+export async function mintChatAccessToken(chatId: string) {
+  return auth.createPublicToken({
+    scopes: {
+      read: { sessions: chatId },
+      write: { sessions: chatId },
+    },
+    expirationTime: "1h",
+  });
+}
+```
+
+`chat.createStartSessionAction(taskId)` returns a server action that:
+
+1. Creates the Session row for `chatId` (idempotent on the
+   `(env, externalId)` unique pair).
+2. Triggers the agent task's first run with
+   `basePayload: {messages: [], trigger: "preload"}` defaults plus any
+   overrides you pass.
+3. Returns `{sessionId, runId, publicAccessToken}` to the browser.
+
+## Step 2: Update the transport wiring
+
+The transport now takes two callbacks instead of one:
+
+```tsx app/components/chat.tsx (after)
+"use client";
+
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+import { mintChatAccessToken, startChatSession } from "@/app/actions";
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  });
+
+  const { messages, sendMessage, status } = useChat({ transport });
+  // ...
+}
+```
+
+The transport calls them in two distinct flows:
+
+| Trigger | Callback fired |
+|---|---|
+| `transport.preload(chatId)` | `startSession` |
+| First `sendMessage` for a chatId with no cached PAT | `startSession` (auto) |
+| Any 401/403 from `.in/append`, `.out` SSE, or `end-and-continue` | `accessToken` |
+| Page hydrates with `sessions: { [chatId]: ... }` | Neither (uses hydrated PAT) |
+
+`startSession` is deduped via an in-flight promise — concurrent
+`preload` + `sendMessage` calls converge to one server action invocation.
+
+## Step 3: Drop transport-level trigger config
+
+The prerelease transport accepted `triggerConfig`, `triggerOptions`, and
+per-call options on `preload`. All of that moved server-side:
+
+```ts before
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: getChatToken,
+  triggerConfig: { basePayload: { /* ... */ } },
+  triggerOptions: { tags: [...], machine: "small-1x", maxAttempts: 3 },
+});
+
+transport.preload(chatId, { idleTimeoutInSeconds: 60, metadata: { ... } });
+```
+
+```ts after
+// Trigger config now lives in chat.createStartSessionAction
+export const startChatSession = chat.createStartSessionAction("my-chat", {
+  triggerConfig: {
+    machine: "small-1x",
+    maxAttempts: 3,
+    tags: ["my-tag"],
+    idleTimeoutInSeconds: 60,
+  },
+});
+
+// Browser side
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+});
+
+transport.preload(chatId);  // no second arg
+```
+
+For metadata that varies per chat, use `clientData` on the transport (see
+the next step) — it's typed and threaded through `startSession` automatically.
+
+## Step 4: Use `clientData` for typed payload metadata
+
+If your agent uses `withClientData({schema})`, the transport's `clientData`
+option is now the canonical place to set it. The same value:
+
+- Is passed to your `startSession` callback as `params.clientData`, where
+  you forward it into `chat.createStartSessionAction`'s
+  `triggerConfig.basePayload.metadata`. The agent's first run sees it in
+  `payload.metadata` (visible to `onPreload` / `onChatStart`).
+- Merges into per-turn `metadata` on every `.in/append` chunk
+  (visible to `onTurnStart` / inside `run` via `turn.clientData`).
+
+```tsx
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, clientData }) =>
+      startChatSession({ chatId, clientData }),
+  clientData: {
+    userId: currentUser.id,
+    plan: currentUser.plan,
+  },
+});
+```
+
+The `clientData` value is live-updated when the option changes (the hook
+calls `setClientData` under the hood), so dynamic values work without
+reconstructing the transport.
+
+<Tip>
+  Server-side authorization can still override or augment the
+  browser-claimed `clientData` inside `startSession` — never trust the
+  browser's identity claim. A typical pattern: the server action looks up
+  the user from the request session, then merges the trusted server fields
+  on top of `params.clientData`.
+</Tip>
+
+## Step 5: Update your `ChatSession` persistence
+
+If you persist session state across page loads, drop the `runId` field:
+
+```ts before
+type ChatSession = {
+  runId: string;
+  publicAccessToken: string;
+  lastEventId?: string;
+};
+```
+
+```ts after
+type ChatSession = {
+  publicAccessToken: string;
+  lastEventId?: string;
+};
+```
+
+If your DB has a `runId` column, you can drop it (the transport doesn't
+read it) or keep it for telemetry. The current run ID lives on the
+Session row server-side now.
+
+Hydration on page reload is unchanged:
+
+```tsx
+const transport = useTriggerChatTransport<typeof myChat>({
+  // ...
+  sessions: persistedSession
+    ? { [chatId]: persistedSession }
+    : {},
+});
+```
+
+## `chat.requestUpgrade()`: same call, faster handoff
+
+Calling `chat.requestUpgrade()` inside `onTurnStart` /
+`onValidateMessages` still ends the current run so the next message starts
+on the latest version. What changed is the mechanism:
+
+- **Before:** the agent emitted a `trigger:upgrade-required` chunk on
+  `.out`; the transport consumed it browser-side and triggered a new run.
+- **After:** the agent calls `endAndContinueSession` server-to-server;
+  the webapp triggers a new run and atomically swaps `Session.currentRunId`
+  via optimistic locking. The browser's existing SSE subscription keeps
+  receiving chunks across the swap — no transport-side bookkeeping.
+
+The new run is recorded in a `SessionRun` audit row with
+`reason: "upgrade"` for dashboard provenance.
+
+## Hitting raw URLs
+
+If your code talks to the realtime API directly instead of going through
+the SDK, the URL shapes changed:
+
+| Before | After |
+|---|---|
+| `GET /realtime/v1/streams/{runId}/chat` | `GET /realtime/v1/sessions/{chatId}/out` |
+| `POST /realtime/v1/streams/{runId}/{target}/chat-messages/append` | `POST /realtime/v1/sessions/{chatId}/in/append` (body: `{kind: "message", payload}`) |
+| `POST /realtime/v1/streams/{runId}/{target}/chat-stop/append` | `POST /realtime/v1/sessions/{chatId}/in/append` (body: `{kind: "stop"}`) |
+
+The session-scoped PAT
+(`read:sessions:{chatId} + write:sessions:{chatId}`) authorizes both the
+externalId form (`/sessions/my-chat-id/...`) and the friendlyId form
+(`/sessions/session_abc.../...`). The transport always uses the
+externalId form; the friendlyId form is available for dashboard tooling
+and direct API consumers.
+
+## What didn't change
+
+- `chat.agent({...})` definition — `id`, `idleTimeoutInSeconds`,
+  `clientDataSchema`, `actionSchema`, `hydrateMessages`, `onPreload`,
+  `onChatStart`, `onValidateMessages`, `onTurnStart`, `onTurnComplete`,
+  `onChatSuspend`, `run`. All callbacks have the same signature and
+  fire at the same lifecycle points.
+- `onAction` is still defined the same way, but its semantics changed
+  in the [May 6 prerelease](/ai-chat/changelog) — actions are no longer
+  turns, and `onAction` returning a `StreamTextResult` produces a model
+  response.
+- `chat.customAgent({...})` and the `chat.createSession(payload, ...)`
+  helper for building a session loop manually inside a custom agent.
+- `chat.defer` (deferred work) and `chat.history` (imperative history
+  mutations from inside `onAction`).
+- `AgentChat` (server-side chat client) — `agent`, `id`, `clientData`,
+  `session`, `onTriggered`, `onTurnComplete`, `sendMessage`, `text()`.
+- `useTriggerChatTransport` React semantics (created once, kept in a
+  ref, callbacks updated under the hood).
+- Multi-tab coordination (`multiTab: true`),
+  [pending messages / steering](/ai-chat/pending-messages),
+  [background injection](/ai-chat/background-injection),
+  [compaction](/ai-chat/compaction).
+- Per-turn `metadata` flowing through
+  `sendMessage({ text }, { metadata })` to `turn.metadata` server-side.
+
+## Verifying the migration
+
+After updating, the smoke check is the same as before: send a message,
+confirm the assistant streams a response, reload mid-stream, confirm
+resume.
+
+A few new things worth verifying once you've cut over:
+
+- **Eager preload.** Click the button (or call `transport.preload(id)`
+  programmatically) — your `startSession` callback should fire and a
+  Session row + first run should be created before you send a message.
+- **Idle-timeout continuation.** Wait past the agent's
+  `idleTimeoutInSeconds` so the run exits, then send another message —
+  the transport's `.in/append` should boot a new run on the same
+  Session, with a `SessionRun` row of `reason: "continuation"`.
+- **PAT refresh.** Force a stale PAT in your DB (corrupt the signature)
+  and reload — the first request should 401, your `accessToken`
+  callback should fire, and the retry should succeed.
+
+If any of those misfire, check that:
+
+- Your `accessToken` callback returns a token minted via
+  `auth.createPublicToken({ scopes: { read: { sessions: chatId }, write: { sessions: chatId } } })`, **not**
+  `chat.createAccessToken` or `auth.createTriggerPublicToken`. The
+  transport rejects trigger tokens now.
+- Your `startSession` callback returns
+  `{publicAccessToken: string}` — the result of
+  `chat.createStartSessionAction(taskId)({chatId, ...})` already has
+  this shape.
+- You haven't left a stale `getStartToken` option on the transport;
+  it's not part of `TriggerChatTransportOptions` anymore.
+
+## v4.5 wire format change
+
+A second migration lands on top of the Sessions release. v4.5 removes the full-history wire payload — clients now ship at most one new `UIMessage` per `.in/append`, and the agent rebuilds prior history from a durable JSON snapshot in object storage plus a replay of the `session.out` tail.
+
+If you use the built-in `TriggerChatTransport` / `AgentChat` and don't reach into the wire shape directly, **most apps need no changes** — the change is below the customer-facing surface. Customers who built custom transports, hit `/realtime/v1/sessions/{id}/in/append` directly, or rely on specific behaviors of `hydrateMessages` / `onChatStart` should read this section.
+
+### Why the change
+
+Long chats with heavy tool results were hitting the realtime API's 512 KiB body cap on `/in/append` once the accumulated `UIMessage[]` history (which the wire shipped in full on every send) crossed the limit. The 413 surfaced as a CORS error in browsers and stalled chats around turn 10–30 with tool use.
+
+The wire is now **delta-only**: each `.in/append` carries at most one new `UIMessage`. The agent rebuilds prior history at run boot. The 512 KiB ceiling stops being pressure — typical payloads are a few KB regardless of chat length.
+
+### Object-store configuration
+
+Snapshot read/write uses Trigger.dev's existing object-store infrastructure — the same presigned-URL routes used for large payloads. Set the standard `OBJECT_STORE_*` env vars on your webapp deployment if you haven't already; MinIO and S3-compatible stores work via `OBJECT_STORE_DEFAULT_PROTOCOL`.
+
+| Env var | Purpose |
+|---|---|
+| `OBJECT_STORE_BASE_URL` | Endpoint URL (S3, MinIO, R2, etc.) |
+| `OBJECT_STORE_ACCESS_KEY_ID` | Access key |
+| `OBJECT_STORE_SECRET_ACCESS_KEY` | Secret key |
+| `OBJECT_STORE_DEFAULT_PROTOCOL` | `s3` (default), `minio`, etc. |
+
+Snapshots are written under `packets/{projectRef}/{envSlug}/sessions/{sessionId}/snapshot.json`. Each snapshot is small (typically tens of KB) and overwritten every turn — no append-only growth.
+
+<Warning>
+  **No object store + no `hydrateMessages` = conversations don't survive run boundaries.** With neither piece of state, a continuation boots empty and the agent can't reconstruct prior turns. Either configure an object store or register `hydrateMessages`. The runtime logs a warning at agent registration time when both are missing.
+</Warning>
+
+### Custom transports
+
+If you've built your own transport (Slack bot, CLI, native app) against the [Client Protocol](/ai-chat/client-protocol), the `ChatTaskWirePayload` shape changed:
+
+```ts before
+type ChatTaskWirePayload = {
+  messages: UIMessage[];        // full history
+  chatId: string;
+  trigger: "submit-message" | "regenerate-message" | "preload" | "close" | "action";
+  // ...
+};
+```
+
+```ts after
+type ChatTaskWirePayload = {
+  message?: UIMessage;          // singular, optional
+  headStartMessages?: UIMessage[];  // chat.headStart only, "handover-prepare"
+  chatId: string;
+  trigger:
+    | "submit-message"
+    | "regenerate-message"
+    | "preload"
+    | "close"
+    | "action"
+    | "handover-prepare";
+  // ...
+};
+```
+
+What to send per trigger:
+
+| Trigger | What to put in the payload |
+|---|---|
+| `submit-message` | The new user message (or a tool-approval-responded assistant message) in `message` |
+| `regenerate-message` | No `message` — the agent trims its own tail |
+| `preload` / `close` / `action` | No `message` |
+| `handover-prepare` (head-start only) | Full prior history in `headStartMessages` (route handler — not on `/in/append`) |
+
+The full wire breakdown is in the rewritten [Client Protocol](/ai-chat/client-protocol).
+
+### `hydrateMessages` consumers
+
+The hook signature is unchanged. Two behavior tightenings worth knowing:
+
+1. **`incomingMessages` is now consistently 0-or-1-length.** Previously some triggers (`regenerate-message`, continuation) shipped full history; now all triggers ship at most one. If you assumed `incomingMessages` could contain multiple messages and acted on them as a batch, the loop now runs zero or one times. Patterns like the one below work the same — they just iterate fewer messages:
+
+```ts
+hydrateMessages: async ({ incomingMessages }) => {
+  for (const msg of incomingMessages) {  // 0-or-1 iterations
+    for (const r of chat.history.extractNewToolResults(msg)) {
+      await auditLog.record({ id: r.toolCallId, output: r.output });
+    }
+  }
+  return await db.getMessages(chatId);
+}
+```
+
+2. **Registering `hydrateMessages` short-circuits snapshot+replay.** The runtime trusts your hook to be the source of truth, so it doesn't read or write the JSON snapshot or replay `session.out`. Zero object-store traffic. Trade-off: you own persistence end-to-end.
+
+### `onChatStart` is now once-per-chat
+
+`onChatStart` no longer fires on continuation runs (post-`endRun`, post-waitpoint-timeout, post-`chat.requestUpgrade`, post-cancel, post-crash) or on OOM-retry attempts. It fires **exactly once per chat**, on the very first user message of the chat's lifetime. The `continuation` and `previousRunId` fields on `ChatStartEvent` are now `@deprecated` (always `false` / `undefined` when the hook fires).
+
+This makes once-per-chat setup code (create the Chat DB row, mint chat-scoped resources) safe to write without continuation gates. Drop any `if (continuation) return;` checks from `onChatStart`:
+
+```ts before
+onChatStart: async ({ continuation, chatId, clientData }) => {
+  if (continuation) return;           // ❌ no longer needed — fires only on first message ever
+  await db.chat.create({ /* ... */ });
+}
+```
+
+```ts after
+onChatStart: async ({ chatId, clientData }) => {
+  await db.chat.create({ /* ... */ });  // ✅ guaranteed first-message-of-chat
+}
+```
+
+If you need per-turn setup that **does** run on continuations, move it to [`onTurnStart`](/ai-chat/lifecycle-hooks#onturnstart) — that hook still fires on every turn, including the first turn of a continuation run.
+
+### Move `chat.local` init from `onChatStart` to `onBoot`
+
+Because `onChatStart` no longer fires on continuation runs, **`chat.local`** state initialized there will be missing when a continuation run starts — `run()` then crashes with `"chat.local can only be modified after initialization"`. The fix is to move per-process initialization to the new [`onBoot`](/ai-chat/lifecycle-hooks#onboot) hook, which fires once per worker boot (initial, preloaded, AND continuation):
+
+```ts before
+const userContext = chat.local<{ name: string; plan: string }>({ id: "userContext" });
+
+onChatStart: async ({ clientData }) => {
+  const user = await db.user.findUnique({ where: { id: clientData.userId } });
+  userContext.init({ name: user.name, plan: user.plan }); // ❌ never runs on continuation
+}
+```
+
+```ts after
+const userContext = chat.local<{ name: string; plan: string }>({ id: "userContext" });
+
+onBoot: async ({ clientData }) => {
+  const user = await db.user.findUnique({ where: { id: clientData.userId } });
+  userContext.init({ name: user.name, plan: user.plan }); // ✅ runs on every fresh worker
+}
+```
+
+Anything else that's per-process (DB connection pools, sandbox handles, in-memory caches) belongs in `onBoot` for the same reason. Branch on `continuation` inside `onBoot` if you need to re-load state from your DB on takeover.
+
+### Client-side `setMessages` doesn't round-trip
+
+The new wire makes one thing explicit that was implicit before: **mutating `useChat()`'s messages on the client doesn't change the agent's history.** Full-history mutations were silently overwritten by the wire's accumulator before this release; now they aren't even shipped.
+
+For history compaction, summarization, or branch-swap, mutate the agent's accumulator inside `onTurnStart` using [`chat.setMessages()`](/ai-chat/backend) or [`chat.history.set()`](/ai-chat/backend#chat-history). The client's `useChat` will reconcile against the next `session.out` payload.
+
+### Verifying the v4.5 migration
+
+After updating, the smoke check is the same as for v4.4:
+
+- Send a message, confirm the assistant streams a response.
+- Reload mid-stream, confirm resume.
+- Send 30+ turns with tool calls — `.in/append` body sizes stay under ~5 KB the entire time. (Pre-change baseline: payloads grew past 512 KB around turn 10-30.)
+- Idle out a run, send another message — the new run reads the snapshot, replays the tail, and continues seamlessly.
+
+If continuations boot empty:
+
+- Confirm `OBJECT_STORE_*` env vars are set on the webapp.
+- Confirm the bucket key `packets/{projectRef}/{envSlug}/sessions/{sessionId}/snapshot.json` exists after a successful turn.
+- Or — register `hydrateMessages` and let your DB be the source of truth.
+
+## Reference
+
+- [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options)
+- [`chat.createStartSessionAction`](/ai-chat/reference)
+- [Backend setup](/ai-chat/backend)
+- [Frontend setup](/ai-chat/frontend)
+- [Client Protocol](/ai-chat/client-protocol) — wire format reference
+- [Persistence and replay](/ai-chat/patterns/persistence-and-replay) — snapshot model end-to-end
diff --git a/docs/ai/prompts.mdx b/docs/ai/prompts.mdx
new file mode 100644
index 00000000000..556eb074942
--- /dev/null
+++ b/docs/ai/prompts.mdx
@@ -0,0 +1,430 @@
+---
+title: "Prompts"
+sidebarTitle: "Prompts"
+description: "Define prompt templates as code, version them on deploy, and override from the dashboard without redeploying."
+---
+
+import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
+
+<RcBanner />
+
+## Overview
+
+AI Prompts let you define prompt templates in your codebase alongside your tasks. When you deploy, Trigger.dev automatically versions your prompts. You can then:
+
+- View all prompt versions in the dashboard
+- Create **overrides** to change the prompt text or model without redeploying
+- Track every generation that used each prompt version
+- See token usage, cost, and latency metrics per prompt
+- Manage prompts programmatically via SDK methods
+
+## Defining a prompt
+
+Use `prompts.define()` to create a prompt with typed variables:
+
+```ts
+import { prompts } from "@trigger.dev/sdk";
+import { z } from "zod";
+
+export const supportPrompt = prompts.define({
+  id: "customer-support",
+  description: "System prompt for customer support interactions",
+  model: "gpt-4o",
+  config: { temperature: 0.7 },
+  variables: z.object({
+    customerName: z.string(),
+    plan: z.string(),
+    issue: z.string(),
+  }),
+  content: `You are a support agent for Acme SaaS.
+
+## Customer context
+
+- **Name:** {{customerName}}
+- **Plan:** {{plan}}
+- **Issue:** {{issue}}
+
+Respond to the customer's issue. Be concise and helpful.`,
+});
+```
+
+### Options
+
+| Option | Type | Required | Description |
+|--------|------|----------|-------------|
+| `id` | `string` | Yes | Unique identifier (becomes the prompt slug) |
+| `description` | `string` | No | Shown in the dashboard |
+| `model` | `string` | No | Default model (e.g. `"gpt-4o"`, `"claude-sonnet-4-6"`) |
+| `config` | `object` | No | Default config (temperature, maxTokens, etc.) |
+| `variables` | Zod/ArkType schema | No | Schema for template variables (enables validation and dashboard UI) |
+| `content` | `string` | Yes | The prompt template with `{{variable}}` placeholders |
+
+### Template syntax
+
+Templates use Mustache-style placeholders:
+
+- `{{variableName}}` — replaced with the variable value
+- `{{#conditionalVar}}...{{/conditionalVar}}` — content only included if the variable is truthy
+
+```ts
+export const prompt = prompts.define({
+  id: "summarizer",
+  model: "gpt-4o-mini",
+  variables: z.object({
+    text: z.string(),
+    maxSentences: z.string().optional(),
+  }),
+  content: `Summarize the following text{{#maxSentences}} in {{maxSentences}} sentences or fewer{{/maxSentences}}:
+
+{{text}}`,
+});
+```
+
+## Resolving a prompt
+
+### Via prompt handle
+
+Call `.resolve()` on the handle returned by `define()`:
+
+```ts
+const resolved = await supportPrompt.resolve({
+  customerName: "Alice",
+  plan: "Pro",
+  issue: "Cannot access billing dashboard",
+});
+
+console.log(resolved.text);    // The compiled prompt with variables filled in
+console.log(resolved.version); // e.g. 3
+console.log(resolved.model);   // "gpt-4o"
+console.log(resolved.labels);  // ["current"] or ["override"]
+```
+
+### Via standalone prompts.resolve()
+
+Resolve any prompt by slug without needing a handle. Pass the prompt handle as a type parameter for full type safety:
+
+```ts
+import { prompts } from "@trigger.dev/sdk";
+import type { supportPrompt } from "./prompts";
+
+// Fully typesafe — ID and variables are checked at compile time
+const resolved = await prompts.resolve<typeof supportPrompt>("customer-support", {
+  customerName: "Alice",
+  plan: "Pro",
+  issue: "Cannot access billing dashboard",
+});
+```
+
+Without the generic, the function still works but accepts any string slug and `Record<string, unknown>` variables.
+
+### Resolve options
+
+You can resolve a specific version or label:
+
+```ts
+// Resolve a specific version
+const v2 = await supportPrompt.resolve(variables, { version: 2 });
+
+// Resolve by label
+const current = await supportPrompt.resolve(variables, { label: "current" });
+```
+
+By default, `resolve()` returns the **override** version if one is active, otherwise the **current** (latest deployed) version.
+
+<Note>
+  Both `promptHandle.resolve()` and `prompts.resolve()` call the Trigger.dev API when a client is configured. During local dev with `trigger dev`, this means you'll always get the server version (including overrides).
+</Note>
+
+## Using with the AI SDK
+
+The resolved prompt integrates with the [Vercel AI SDK](https://ai-sdk.dev) via `toAISDKTelemetry()`. This links AI generation spans to the prompt in the dashboard.
+
+### generateText
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { generateText, stepCountIs } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+export const supportTask = task({
+  id: "handle-support",
+  run: async (payload) => {
+    const resolved = await supportPrompt.resolve({
+      customerName: payload.name,
+      plan: payload.plan,
+      issue: payload.issue,
+    });
+
+    const result = await generateText({
+      model: openai(resolved.model ?? "gpt-4o"),
+      system: resolved.text,
+      prompt: payload.issue,
+      ...resolved.toAISDKTelemetry(),
+    });
+
+    return { response: result.text };
+  },
+});
+```
+
+### streamText
+
+```ts
+import { streamText } from "ai";
+
+export const streamTask = task({
+  id: "stream-support",
+  run: async (payload) => {
+    const resolved = await supportPrompt.resolve({
+      customerName: payload.name,
+      plan: payload.plan,
+      issue: payload.issue,
+    });
+
+    const result = streamText({
+      model: openai(resolved.model ?? "gpt-4o"),
+      system: resolved.text,
+      prompt: payload.issue,
+      ...resolved.toAISDKTelemetry(),
+      stopWhen: stepCountIs(15),
+    });
+
+    let fullText = "";
+    for await (const chunk of result.textStream) {
+      fullText += chunk;
+    }
+
+    return { response: fullText };
+  },
+});
+```
+
+### Custom telemetry metadata
+
+Pass additional metadata to `toAISDKTelemetry()` that will appear on the generation span:
+
+```ts
+const result = await generateText({
+  model: anthropic("claude-sonnet-4-5"),
+  prompt: resolved.text,
+  ...resolved.toAISDKTelemetry({
+    "task.type": "summarization",
+    "customer.tier": "enterprise",
+  }),
+});
+```
+
+## Using with chat.agent()
+
+Prompts integrate with `chat.agent()` via `chat.prompt` — a run-scoped store for the resolved prompt. Store a prompt once in a lifecycle hook, then access it anywhere during the run.
+
+### chat.prompt.set() and chat.prompt()
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, createProviderRegistry } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+const registry = createProviderRegistry({ anthropic });
+
+const systemPrompt = prompts.define({
+  id: "my-chat-system",
+  model: "anthropic:claude-sonnet-4-5",
+  config: { temperature: 0.7 },
+  variables: z.object({ name: z.string() }),
+  content: `You are a helpful assistant for {{name}}.`,
+});
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  onChatStart: async ({ clientData }) => {
+    const resolved = await systemPrompt.resolve({ name: clientData.name });
+    chat.prompt.set(resolved);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+      stopWhen: stepCountIs(15),
+    });
+  },
+});
+```
+
+### chat.toStreamTextOptions()
+
+Returns an options object ready to spread into `streamText()`. When a prompt is stored via `chat.prompt.set()`, it includes:
+
+- `system` — the compiled prompt text
+- `model` — resolved via the `registry` when provided
+- `temperature`, `maxTokens`, etc. — from the prompt's `config`
+- `experimental_telemetry` — links generations to the prompt in the dashboard
+
+```ts
+// With registry — model is resolved automatically
+const options = chat.toStreamTextOptions({ registry });
+// { system: "...", model: LanguageModel, temperature: 0.7, experimental_telemetry: { ... } }
+
+// Without registry — model is not included
+const options = chat.toStreamTextOptions();
+// { system: "...", temperature: 0.7, experimental_telemetry: { ... } }
+```
+
+<Tip>
+  When the user provides a `registry` and the prompt has a `model` string (e.g. `"anthropic:claude-sonnet-4-5"`), the model is resolved via `registry.languageModel()` and included in the returned options. This means `streamText` uses the prompt's model by default — no manual model selection needed.
+</Tip>
+
+### Reading the prompt
+
+Access the stored prompt from anywhere in the run:
+
+```ts
+run: async ({ messages, signal }) => {
+  const prompt = chat.prompt(); // Throws if not set
+  console.log(prompt.text);     // The compiled prompt
+  console.log(prompt.model);    // "anthropic:claude-sonnet-4-5"
+  console.log(prompt.version);  // 3
+
+  return streamText({
+    ...chat.toStreamTextOptions({ registry }),
+    messages,
+    abortSignal: signal,
+    stopWhen: stepCountIs(15),
+  });
+},
+```
+
+You can also set a plain string if you don't need the full prompt system:
+
+```ts
+chat.prompt.set("You are a helpful assistant.");
+```
+
+## Prompt management SDK
+
+The `prompts` namespace includes methods for managing prompts programmatically. These work both inside tasks and outside (e.g. scripts, API handlers) as long as an API client is configured.
+
+### List prompts
+
+```ts
+const allPrompts = await prompts.list();
+```
+
+### List versions
+
+```ts
+const versions = await prompts.versions("customer-support");
+```
+
+### Create an override
+
+Create a new override that takes priority over the deployed version:
+
+```ts
+const result = await prompts.createOverride("customer-support", {
+  textContent: "New prompt template: Hello {{customerName}}!",
+  model: "gpt-4o-mini",
+  commitMessage: "Shorter prompt",
+});
+```
+
+### Update an override
+
+```ts
+await prompts.updateOverride("customer-support", {
+  textContent: "Updated template: Hi {{customerName}}!",
+  model: "gpt-4o",
+});
+```
+
+### Remove an override
+
+Remove the active override, reverting to the deployed version:
+
+```ts
+await prompts.removeOverride("customer-support");
+```
+
+### Promote a version
+
+```ts
+await prompts.promote("customer-support", 2);
+```
+
+### All management methods
+
+| Method | Description |
+|--------|-------------|
+| `prompts.list()` | List all prompts in the current environment |
+| `prompts.versions(slug)` | List all versions for a prompt |
+| `prompts.resolve(slug, variables?, options?)` | Resolve a prompt by slug |
+| `prompts.promote(slug, version)` | Promote a version to current |
+| `prompts.createOverride(slug, body)` | Create an override |
+| `prompts.updateOverride(slug, body)` | Update the active override |
+| `prompts.removeOverride(slug)` | Remove the active override |
+| `prompts.reactivateOverride(slug, version)` | Reactivate a removed override |
+
+## Overrides
+
+Overrides let you change a prompt's template or model from the dashboard or SDK without redeploying your code. When an override is active, `resolve()` returns the override version instead of the deployed version.
+
+### How overrides work
+
+- Overrides take priority over the deployed ("current") version
+- Only one override can be active at a time
+- Creating a new override replaces the previous one
+- Removing an override reverts to the deployed version
+- Overrides are environment-scoped (dev, staging, production are independent)
+
+### Creating an override (dashboard)
+
+1. Go to the prompt detail page
+2. Click **Create Override**
+3. Edit the template text and/or model
+4. Add an optional commit message
+5. Click **Create override**
+
+### Version resolution order
+
+When `resolve()` is called, versions are resolved in this order:
+
+1. **Specific version** — if `{ version: N }` is passed
+2. **Override** — if an override is active in this environment
+3. **Label** — if `{ label: "..." }` is passed (defaults to `"current"`)
+4. **Current** — the latest deployed version with the "current" label
+
+## Dashboard
+
+### Prompts list
+
+The prompts list page shows all prompts in the current environment with the current or override version, default model, and a usage sparkline.
+
+### Prompt detail
+
+Click a prompt to see:
+
+- **Template panel** — the prompt template for the selected version
+- **Details tab** — slug, description, model, config, source file, and variable schema
+- **Versions tab** — all versions with labels, source, and commit messages
+- **Generations tab** — every AI generation that used this prompt, with live polling
+- **Metrics tab** — token usage, cost, and latency charts
+
+### AI span inspectors
+
+When you use `toAISDKTelemetry()`, AI generation spans in the run trace get a custom inspector showing:
+
+- **Overview** — model, provider, token usage, cost, input/output preview
+- **Messages** — the full message thread
+- **Tools** — tool definitions and tool call details
+- **Prompt** — the linked prompt's metadata, input variables, and template content
+
+## Type utilities
+
+```ts
+import type { PromptHandle, PromptIdentifier, PromptVariables } from "@trigger.dev/sdk";
+
+type Id = PromptIdentifier<typeof supportPrompt>;   // "customer-support"
+type Vars = PromptVariables<typeof supportPrompt>;   // { customerName: string; plan: string; issue: string }
+```
diff --git a/docs/docs.json b/docs/docs.json
index 7263daee638..ee670bca2da 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -10,7 +10,11 @@
   },
   "favicon": "/images/favicon.png",
   "contextual": {
-    "options": ["copy", "view", "claude"]
+    "options": [
+      "copy",
+      "view",
+      "claude"
+    ]
   },
   "navigation": {
     "dropdowns": [
@@ -36,7 +40,11 @@
             "pages": [
               {
                 "group": "Tasks",
-                "pages": ["tasks/overview", "tasks/schemaTask", "tasks/scheduled"]
+                "pages": [
+                  "tasks/overview",
+                  "tasks/schemaTask",
+                  "tasks/scheduled"
+                ]
               },
               "triggering",
               "runs",
@@ -49,7 +57,10 @@
               "building-with-ai",
               {
                 "group": "MCP Server",
-                "pages": ["mcp-introduction", "mcp-tools"]
+                "pages": [
+                  "mcp-introduction",
+                  "mcp-tools"
+                ]
               },
               "skills",
               "mcp-agent-rules"
@@ -63,7 +74,12 @@
               "errors-retrying",
               {
                 "group": "Wait",
-                "pages": ["wait", "wait-for", "wait-until", "wait-for-token"]
+                "pages": [
+                  "wait",
+                  "wait-for",
+                  "wait-until",
+                  "wait-for-token"
+                ]
               },
               "queue-concurrency",
               "versioning",
@@ -80,6 +96,67 @@
               "hidden-tasks"
             ]
           },
+          {
+            "group": "Agents",
+            "pages": [
+              "ai-chat/overview",
+              "ai-chat/quick-start",
+              {
+                "group": "Building agents",
+                "pages": [
+                  "ai-chat/how-it-works",
+                  "ai-chat/backend",
+                  "ai-chat/lifecycle-hooks",
+                  "ai-chat/frontend",
+                  "ai-chat/server-chat",
+                  "ai-chat/sessions",
+                  "ai-chat/chat-local",
+                  "ai-chat/types"
+                ]
+              },
+              {
+                "group": "Features",
+                "pages": [
+                  "ai/prompts",
+                  "ai-chat/fast-starts",
+                  "ai-chat/compaction",
+                  "ai-chat/pending-messages",
+                  "ai-chat/background-injection",
+                  "ai-chat/actions",
+                  "ai-chat/error-handling"
+                ]
+              },
+              {
+                "group": "Patterns",
+                "pages": [
+                  "ai-chat/patterns/sub-agents",
+                  "ai-chat/patterns/version-upgrades",
+                  "ai-chat/patterns/database-persistence",
+                  "ai-chat/patterns/persistence-and-replay",
+                  "ai-chat/patterns/branching-conversations",
+                  "ai-chat/patterns/code-sandbox",
+                  "ai-chat/patterns/human-in-the-loop",
+                  "ai-chat/patterns/tool-result-auditing",
+                  "ai-chat/patterns/large-payloads",
+                  "ai-chat/patterns/skills",
+                  "ai-chat/patterns/oom-resilience",
+                  "ai-chat/patterns/recovery-boot",
+                  "ai-chat/patterns/trusted-edge-signals"
+                ]
+              },
+              {
+                "group": "Reference",
+                "pages": [
+                  "ai-chat/reference",
+                  "ai-chat/client-protocol",
+                  "ai-chat/testing",
+                  "ai-chat/mcp",
+                  "ai-chat/upgrade-guide",
+                  "ai-chat/changelog"
+                ]
+              }
+            ]
+          },
           {
             "group": "Configuration",
             "pages": [
@@ -113,7 +190,9 @@
           },
           {
             "group": "Development",
-            "pages": ["cli-dev"]
+            "pages": [
+              "cli-dev"
+            ]
           },
           {
             "group": "Deployment",
@@ -125,7 +204,10 @@
               "deployment/atomic-deployment",
               {
                 "group": "Deployment integrations",
-                "pages": ["github-integration", "vercel-integration"]
+                "pages": [
+                  "github-integration",
+                  "vercel-integration"
+                ]
               }
             ]
           },
@@ -189,11 +271,19 @@
           },
           {
             "group": "Observability",
-            "pages": ["observability/query", "observability/dashboards"]
+            "pages": [
+              "observability/query",
+              "observability/dashboards"
+            ]
           },
           {
             "group": "Using the Dashboard",
-            "pages": ["run-tests", "troubleshooting-alerts", "replaying", "bulk-actions"]
+            "pages": [
+              "run-tests",
+              "troubleshooting-alerts",
+              "replaying",
+              "bulk-actions"
+            ]
           },
           {
             "group": "Troubleshooting",
@@ -215,18 +305,30 @@
               "self-hosting/kubernetes",
               {
                 "group": "Environment variables",
-                "pages": ["self-hosting/env/webapp", "self-hosting/env/supervisor"]
+                "pages": [
+                  "self-hosting/env/webapp",
+                  "self-hosting/env/supervisor"
+                ]
               },
               "open-source-self-hosting"
             ]
           },
           {
             "group": "Open source",
-            "pages": ["open-source-contributing", "github-repo", "changelog", "roadmap"]
+            "pages": [
+              "open-source-contributing",
+              "github-repo",
+              "changelog",
+              "roadmap"
+            ]
           },
           {
             "group": "Help",
-            "pages": ["community", "help-slack", "help-email"]
+            "pages": [
+              "community",
+              "help-slack",
+              "help-email"
+            ]
           }
         ]
       },
@@ -332,7 +434,11 @@
           },
           {
             "group": "Query API",
-            "pages": ["management/query/execute", "management/query/schema", "management/query/dashboards"]
+            "pages": [
+              "management/query/execute",
+              "management/query/schema",
+              "management/query/dashboards"
+            ]
           }
         ]
       },
@@ -343,7 +449,9 @@
         "groups": [
           {
             "group": "Introduction",
-            "pages": ["guides/introduction"]
+            "pages": [
+              "guides/introduction"
+            ]
           },
           {
             "group": "Frameworks",
@@ -366,7 +474,6 @@
                 },
                 "pages": [
                   "guides/ai-agents/overview",
-
                   "guides/ai-agents/generate-translate-copy",
                   "guides/ai-agents/route-question",
                   "guides/ai-agents/respond-and-check-content",
@@ -410,7 +517,10 @@
           },
           {
             "group": "Migration guides",
-            "pages": ["migration-mergent", "migration-n8n"]
+            "pages": [
+              "migration-mergent",
+              "migration-n8n"
+            ]
           },
           {
             "group": "Use cases",
@@ -501,7 +611,10 @@
     "href": "https://trigger.dev"
   },
   "api": {
-    "openapi": ["openapi.yml", "v3-openapi.yaml"],
+    "openapi": [
+      "openapi.yml",
+      "v3-openapi.yaml"
+    ],
     "playground": {
       "display": "simple"
     }
@@ -743,6 +856,10 @@
     {
       "source": "/insights/metrics",
       "destination": "/observability/dashboards"
+    },
+    {
+      "source": "/guides/ai-chat",
+      "destination": "/ai-chat/overview"
     }
   ]
 }
diff --git a/docs/mcp-tools.mdx b/docs/mcp-tools.mdx
index 037d7e887ef..9a920274fba 100644
--- a/docs/mcp-tools.mdx
+++ b/docs/mcp-tools.mdx
@@ -218,3 +218,56 @@ Check the status of the dev server and view recent output. Shows whether it is s
 <Callout type="warning">
   The deploy and list_preview_branches tools are not available when the MCP server is running with the `--dev-only` flag. The `--readonly` flag hides deploy, trigger_task, and cancel_run.
 </Callout>
+
+## Agent Chat Tools
+
+These tools let you have conversations with [chat agents](/ai-chat/overview) directly from your AI coding tool. See the [Agent MCP guide](/ai-chat/mcp) for a walkthrough.
+
+### list_agents
+
+List all chat agents registered in the current worker. Agents are tasks created with `chat.agent()` or `chat.customAgent()`.
+
+**Example usage:**
+- `"What agents are available?"`
+- `"List my chat agents"`
+
+### start_agent_chat
+
+Start a conversation with a chat agent. Returns a chat ID for use with `send_agent_message`. Optionally preloads the agent so it initializes before the first message.
+
+**Parameters:**
+- `agentId` (required) — The agent task slug (e.g., `"support-agent"`)
+- `chatId` (optional) — A custom conversation ID. Auto-generated if omitted
+- `clientData` (optional) — Client data to include with every message (e.g., `{ userId: "user_123" }`). Must match the agent's `clientDataSchema` if one is defined
+- `preload` (optional, default: `true`) — Whether to preload the agent before the first message
+
+**Example usage:**
+- `"Start a chat with the support agent"`
+- `"Talk to the pr-review agent with userId abc"`
+
+### send_agent_message
+
+Send a message to an active agent chat and get the full response back. The agent remembers full context from previous messages in the same chat.
+
+**Parameters:**
+- `chatId` (required) — The chat ID from `start_agent_chat`
+- `message` (required) — The message text to send
+
+**Example usage:**
+- `"Tell the agent to review the latest PR"`
+- `"Ask it what tools it has available"`
+
+### close_agent_chat
+
+Close an agent chat conversation. The agent exits its loop gracefully. Without this, the agent will close on its own when its idle timeout expires.
+
+**Parameters:**
+- `chatId` (required) — The chat ID to close
+
+**Example usage:**
+- `"Close the chat"`
+- `"End the conversation"`
+
+<Callout type="warning">
+  The `start_agent_chat`, `send_agent_message`, and `close_agent_chat` tools are write operations and are not available in readonly mode.
+</Callout>
diff --git a/docs/migrating-from-v3.mdx b/docs/migrating-from-v3.mdx
index 5530d66b62d..c820b25a1de 100644
--- a/docs/migrating-from-v3.mdx
+++ b/docs/migrating-from-v3.mdx
@@ -34,7 +34,7 @@ We're retiring Trigger.dev v3. **New v3 deploys will stop working from 1 April 2
 | [Hidden tasks](/hidden-tasks)                                        | Create tasks that are not exported from your trigger files but can still be executed.                                                                                                                      |
 | [Middleware & locals](#middleware-and-locals)                        | The middleware system runs at the top level, executing before and after all lifecycle hooks. The locals API allows sharing data between middleware and hooks.                                              |
 | [useWaitToken](/realtime/react-hooks/use-wait-token)                 | Use the useWaitToken hook to complete a wait token from a React component.                                                                                                                                 |
-| [ai.tool](/tasks/schemaTask#ai-tool)                                 | Create an AI tool from an existing `schemaTask` to use with the Vercel [AI SDK](https://vercel.com/docs/ai-sdk).                                                                                           |
+| [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools)       | Use `schemaTask` with AI SDK `tool()` and `ai.toolExecute()` (legacy `ai.tool` is deprecated).                                                                                                              |
 
 ## Node.js support
 
@@ -165,7 +165,7 @@ export const myAiTask = schemaTask({
 });
 ```
 
-We've replaced the `toolTask` function with the `ai.tool` function, which creates an AI tool from an existing `schemaTask`. See the [ai.tool](/tasks/schemaTask#ai-tool) page for more details.
+We've replaced the `toolTask` function with `schemaTask` plus AI SDK `tool()` and `ai.toolExecute()` (the older `ai.tool()` wrapper is deprecated). See [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools).
 
 ## Breaking changes
 
diff --git a/docs/realtime/backend/input-streams.mdx b/docs/realtime/backend/input-streams.mdx
index 1224e24244e..65e3bb494b7 100644
--- a/docs/realtime/backend/input-streams.mdx
+++ b/docs/realtime/backend/input-streams.mdx
@@ -11,6 +11,10 @@ The Input Streams API allows you to send data into running Trigger.dev tasks fro
   Streams](/tasks/streams#input-streams) in the Streams doc.
 </Note>
 
+<Tip>
+  Input streams are keyed by `runId` — they're correct for sending data to a specific live run. If you need a bidirectional channel that survives run boundaries (e.g. a chat that resumes tomorrow, an agent coordinated across many runs), look at [`chat.agent`](/ai-chat/overview): it's built on a durable Session row that owns its runs and exposes the same consumer-side API (`on` / `once` / `wait` / `waitWithIdleTimeout`) on its `.in` channel.
+</Tip>
+
 ## Sending data to a running task
 
 ### Using defined input streams (Recommended)
diff --git a/docs/realtime/backend/streams.mdx b/docs/realtime/backend/streams.mdx
index 8a273ea5a9f..b644e5dab10 100644
--- a/docs/realtime/backend/streams.mdx
+++ b/docs/realtime/backend/streams.mdx
@@ -10,6 +10,10 @@ description: "Read AI/LLM output, file chunks, and other streaming data from you
   To emit streams from your tasks, see [Streaming data from tasks](/tasks/streams). For React components, see [Streaming in React](/realtime/react-hooks/streams).
 </Note>
 
+<Tip>
+  Run-scoped streams are the right primitive for ephemeral I/O that lives inside a single run's lifetime. For durable, long-lived channels that outlive a run, see [`chat.agent`](/ai-chat/overview): it's built on a Session row that owns the chat's runs and exposes bidirectional `.in` / `.out` channels addressed by a durable id.
+</Tip>
+
 ## Reading streams
 
 ### Using defined streams (Recommended)
diff --git a/docs/snippets/ai-chat-rc-banner.mdx b/docs/snippets/ai-chat-rc-banner.mdx
new file mode 100644
index 00000000000..18a9dbcfe57
--- /dev/null
+++ b/docs/snippets/ai-chat-rc-banner.mdx
@@ -0,0 +1,3 @@
+<Warning>
+  The AI Agents and Prompts surface ships as part of the **v4.5 release candidate**. Install with `@trigger.dev/sdk@rc` (or pin `4.5.0-rc.0` or later) to use these features — they aren't yet on the latest stable, and APIs may still change before the 4.5.0 GA. See [supported AI SDK versions](/ai-chat/reference#compatibility) and the [AI chat changelog](/ai-chat/changelog) for details.
+</Warning>
diff --git a/docs/snippets/migrate-v4-using-ai.mdx b/docs/snippets/migrate-v4-using-ai.mdx
index fa749ed7231..aa5393c158d 100644
--- a/docs/snippets/migrate-v4-using-ai.mdx
+++ b/docs/snippets/migrate-v4-using-ai.mdx
@@ -56,7 +56,7 @@ const myTask = task({
   },
 });
 
-We’ve deprecated the `toolTask` function and replaced it with the `ai.tool` function, which creates an AI tool from an existing `schemaTask`. This is the old version:
+We’ve deprecated the `toolTask` function. Use `schemaTask` plus AI SDK `tool()` with `execute: ai.toolExecute(task)` (the `ai.tool()` wrapper is deprecated). This is the old version:
 
 import { toolTask, schemaTask } from "@trigger.dev/sdk";
 import { z } from "zod";
@@ -85,9 +85,11 @@ export const myAiTask = schemaTask({
 
 This is the new version:
 
-import { schemaTask, ai } from "@trigger.dev/sdk";
+import { schemaTask } from "@trigger.dev/sdk";
+import { ai } from "@trigger.dev/sdk/ai";
 import { z } from "zod";
-import { generateText } from "ai";
+import { generateText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
 
 // Convert toolTask to schemaTask with a schema
 const myToolTask = schemaTask({
@@ -99,8 +101,11 @@ const myToolTask = schemaTask({
   run: async (payload, { ctx }) => {},
 });
 
-// Create an AI tool from the schemaTask
-const myTool = ai.tool(myToolTask);
+const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 
 export const myAiTask = schemaTask({
   id: "my-ai-task",
@@ -112,7 +117,7 @@ export const myAiTask = schemaTask({
       prompt: payload.text,
       model: openai("gpt-4o"),
       tools: {
-        myTool, // Use the ai.tool created from schemaTask
+        myTool,
       },
     });
   },
diff --git a/docs/tasks/schemaTask.mdx b/docs/tasks/schemaTask.mdx
index 3692d1d7035..a551fb1af49 100644
--- a/docs/tasks/schemaTask.mdx
+++ b/docs/tasks/schemaTask.mdx
@@ -76,51 +76,63 @@ await myTask.trigger({ age: 30, dob: "2020-01-01" }); // this is valid
 await myTask.trigger({ name: "Alice", age: 30, dob: "2020-01-01" }); // this is also valid
 ```
 
-## `ai.tool`
+## Task-backed AI tools
 
-The `ai.tool` function allows you to create an AI tool from an existing `schemaTask` to use with the Vercel [AI SDK](https://vercel.com/docs/ai-sdk):
+Use a `schemaTask` as the implementation of a Vercel [AI SDK](https://vercel.com/docs/ai-sdk) tool: the model calls the tool, and Trigger runs your task as a **subtask** with tool-call metadata, optional [chat context](/ai-chat/patterns/sub-agents), and the same payload validation as a normal trigger.
+
+### Recommended: `ai.toolExecute` with `tool()`
+
+Prefer building the tool with the AI SDK’s [`tool()`](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling) and passing **`execute: ai.toolExecute(yourTask)`**. You keep full control of `description`, `inputSchema`, and AI-SDK-only options (for example `experimental_toToolResultContent`), and your types follow the `ai` version installed in **your** app.
 
 ```ts
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
+import { tool, generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
 import { z } from "zod";
-import { generateText } from "ai";
 
 const myToolTask = schemaTask({
   id: "my-tool-task",
   schema: z.object({
     foo: z.string(),
   }),
-  run: async (payload: any, { ctx }) => {},
+  run: async ({ foo }) => {
+    return { bar: foo.toUpperCase() };
+  },
 });
 
-const myTool = ai.tool(myToolTask);
+const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 
 export const myAiTask = schemaTask({
   id: "my-ai-task",
   schema: z.object({
     text: z.string(),
   }),
-  run: async (payload, { ctx }) => {
-    const { text } = await generateText({
-      prompt: payload.text,
+  run: async ({ text }) => {
+    const { text: reply } = await generateText({
+      prompt: text,
       model: openai("gpt-4o"),
       tools: {
         myTool,
       },
     });
+    return reply;
   },
 });
 ```
 
-You can also pass the `experimental_toToolResultContent` option to the `ai.tool` function to customize the content of the tool result:
+`experimental_toToolResultContent` and other tool-level options belong on **`tool({ ... })`**, not on `ai.toolExecute`:
 
 ```ts
 import { openai } from "@ai-sdk/openai";
 import { Sandbox } from "@e2b/code-interpreter";
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
-import { generateObject } from "ai";
+import { generateObject, tool } from "ai";
 import { z } from "zod";
 
 const chartTask = schemaTask({
@@ -135,56 +147,37 @@ const chartTask = schemaTask({
       schema: z.object({
         code: z.string().describe("The Python code to execute"),
       }),
-      system: `
-        You are a helpful assistant that can generate Python code to be executed in a sandbox, using matplotlib.pyplot.
-
-        For example: 
-        
-        import matplotlib.pyplot as plt
-        plt.plot([1, 2, 3, 4])
-        plt.ylabel('some numbers')
-        plt.show()
-        
-        Make sure the code ends with plt.show()
-      `,
+      system: `You are a helpful assistant that generates matplotlib code. End with plt.show().`,
       prompt: input,
     });
 
     const sandbox = await Sandbox.create();
-
     const execution = await sandbox.runCode(code.object.code);
-
     const firstResult = execution.results[0];
 
     if (firstResult.png) {
-      return {
-        chart: firstResult.png,
-      };
-    } else {
-      throw new Error("No chart generated");
+      return { chart: firstResult.png };
     }
+    throw new Error("No chart generated");
   },
 });
 
-// This is useful if you want to return an image from the tool
-export const chartTool = ai.tool(chartTask, {
-  experimental_toToolResultContent: (result) => {
-    return [
-      {
-        type: "image",
-        data: result.chart,
-        mimeType: "image/png",
-      },
-    ];
-  },
+export const chartTool = tool({
+  description: chartTask.description ?? "",
+  inputSchema: chartTask.schema!,
+  execute: ai.toolExecute(chartTask),
+  experimental_toToolResultContent: (result) => [
+    { type: "image", data: result.chart, mimeType: "image/png" },
+  ],
 });
 ```
 
-You can access the current tool execution options inside the task run function using the `ai.currentToolOptions()` function:
+Inside the task run, you can read tool execution context with **`ai.currentToolOptions()`** (and helpers like `ai.toolCallId()`, `ai.chatContext()` when running inside a [`chat.agent`](/ai-chat/overview)):
 
 ```ts
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
+import { tool } from "ai";
 import { z } from "zod";
 
 const myToolTask = schemaTask({
@@ -192,22 +185,49 @@ const myToolTask = schemaTask({
   schema: z.object({
     foo: z.string(),
   }),
-  run: async (payload, { ctx }) => {
+  run: async ({ foo }) => {
     const toolOptions = ai.currentToolOptions();
     console.log(toolOptions);
+    return { foo };
   },
 });
 
-export const myAiTask = ai.tool(myToolTask);
+export const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 ```
 
-See the [AI SDK tool execution options docs](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling#tool-execution-options) for more details on the tool execution options.
+See the [AI SDK tool execution options](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling#tool-execution-options) for fields passed through the runtime.
 
 <Note>
-  `ai.tool` is compatible with `schemaTask`'s defined with Zod and ArkType schemas, or any schemas
-  that implement a `.toJsonSchema()` function.
+  `ai.toolExecute` works with `schemaTask` definitions that use Zod, ArkType, or any schema that provides a JSON schema via `.toJsonSchema()` (same coverage as the legacy `ai.tool` wrapper).
 </Note>
 
+### Deprecated: `ai.tool`
+
+The **`ai.tool(task, options?)`** helper is **deprecated**. It constructs an AI SDK `Tool` for you (using `tool()` for Zod-like schemas and `dynamicTool()` otherwise) and may be removed in a future major version. New code should use **`tool({ ..., execute: ai.toolExecute(task) })`** as shown above.
+
+### Legacy `ai.tool` example (deprecated)
+
+```ts
+import { ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { z } from "zod";
+import { generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+const myToolTask = schemaTask({
+  id: "my-tool-task",
+  schema: z.object({ foo: z.string() }),
+  run: async ({ foo }) => ({ foo }),
+});
+
+// Deprecated — prefer tool({ execute: ai.toolExecute(myToolTask), ... })
+const myTool = ai.tool(myToolTask);
+```
+
 ## Supported schema types
 
 ### Zod
diff --git a/hosting/k8s/helm/Chart.yaml b/hosting/k8s/helm/Chart.yaml
index 529daddce08..40c535f6854 100644
--- a/hosting/k8s/helm/Chart.yaml
+++ b/hosting/k8s/helm/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
 name: trigger
 description: The official Trigger.dev Helm chart
 type: application
-version: 4.4.6
-appVersion: v4.4.6
+version: 4.5.0-rc.1
+appVersion: v4.5.0-rc.1
 home: https://trigger.dev
 sources:
   - https://github.com/triggerdotdev/trigger.dev
diff --git a/internal-packages/database/prisma/migrations/20260520120000_add_environment_variable_value_environment_id_idx/migration.sql b/internal-packages/database/prisma/migrations/20260520120000_add_environment_variable_value_environment_id_idx/migration.sql
new file mode 100644
index 00000000000..9aaf4d930c6
--- /dev/null
+++ b/internal-packages/database/prisma/migrations/20260520120000_add_environment_variable_value_environment_id_idx/migration.sql
@@ -0,0 +1,3 @@
+-- CreateIndex
+CREATE INDEX CONCURRENTLY IF NOT EXISTS "EnvironmentVariableValue_environmentId_idx"
+  ON "EnvironmentVariableValue"("environmentId");
diff --git a/internal-packages/database/prisma/migrations/20260520170000_add_session_chat_snapshot_storage_path/migration.sql b/internal-packages/database/prisma/migrations/20260520170000_add_session_chat_snapshot_storage_path/migration.sql
new file mode 100644
index 00000000000..c61c7ac5726
--- /dev/null
+++ b/internal-packages/database/prisma/migrations/20260520170000_add_session_chat_snapshot_storage_path/migration.sql
@@ -0,0 +1 @@
+ALTER TABLE "Session" ADD COLUMN IF NOT EXISTS "chatSnapshotStoragePath" TEXT;
diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma
index 7e32a96d805..2dc3e9db56e 100644
--- a/internal-packages/database/prisma/schema.prisma
+++ b/internal-packages/database/prisma/schema.prisma
@@ -830,6 +830,11 @@ model Session {
   /// (OSS, or pre-backfill); reads fall back to the global basin.
   streamBasinName String?
 
+  /// Storage URI (with protocol prefix) for this session's chat.agent
+  /// snapshot blob. Set on first snapshot write. Null = pre-column session,
+  /// fall back to computed default path.
+  chatSnapshotStoragePath String?
+
   runs SessionRun[]
 
   /// Idempotency: `(env, externalId)` uniquely identifies a session.
@@ -2020,6 +2025,7 @@ model EnvironmentVariableValue {
   lastUpdatedBy Json?
 
   @@unique([variableId, environmentId])
+  @@index([environmentId])
 }
 
 model Checkpoint {
diff --git a/packages/build/CHANGELOG.md b/packages/build/CHANGELOG.md
index 742c66c83ef..c6b4ee220e2 100644
--- a/packages/build/CHANGELOG.md
+++ b/packages/build/CHANGELOG.md
@@ -1,5 +1,29 @@
 # @trigger.dev/build
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- Add Agent Skills for `chat.agent`. Drop a folder with a `SKILL.md` and any helper scripts/references next to your task code, register it with `skills.define({ id, path })`, and the CLI bundles it into the deploy image automatically — no `trigger.config.ts` changes. The agent gets a one-line summary in its system prompt and discovers full instructions on demand via `loadSkill`, with `bash` and `readFile` tools scoped per-skill (path-traversal guards, output caps, abort-signal propagation). ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  ```ts
+  const pdfSkill = skills.define({ id: "pdf-extract", path: "./skills/pdf-extract" });
+
+  chat.skills.set([await pdfSkill.local()]);
+  ```
+
+  Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills) — portable across providers. SDK + CLI only for now; dashboard-editable `SKILL.md` text is on the roadmap.
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.0`
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/build/package.json b/packages/build/package.json
index 8d7bf6daf3f..5fa1932e8a3 100644
--- a/packages/build/package.json
+++ b/packages/build/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/build",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "trigger.dev build extensions",
   "license": "MIT",
   "publishConfig": {
@@ -31,8 +31,7 @@
       "./extensions/typescript": "./src/extensions/typescript.ts",
       "./extensions/puppeteer": "./src/extensions/puppeteer.ts",
       "./extensions/playwright": "./src/extensions/playwright.ts",
-      "./extensions/lightpanda": "./src/extensions/lightpanda.ts",
-      "./extensions/secureExec": "./src/extensions/secureExec.ts"
+      "./extensions/lightpanda": "./src/extensions/lightpanda.ts"
     },
     "sourceDialects": [
       "@triggerdotdev/source"
@@ -66,9 +65,6 @@
       ],
       "extensions/lightpanda": [
         "dist/commonjs/extensions/lightpanda.d.ts"
-      ],
-      "extensions/secureExec": [
-        "dist/commonjs/extensions/secureExec.d.ts"
       ]
     }
   },
@@ -82,7 +78,7 @@
   },
   "dependencies": {
     "@prisma/config": "^6.10.0",
-    "@trigger.dev/core": "workspace:4.4.6",
+    "@trigger.dev/core": "workspace:4.5.0-rc.1",
     "mlly": "^1.7.1",
     "pkg-types": "^1.1.3",
     "resolve": "^1.22.8",
@@ -211,17 +207,6 @@
         "types": "./dist/commonjs/extensions/lightpanda.d.ts",
         "default": "./dist/commonjs/extensions/lightpanda.js"
       }
-    },
-    "./extensions/secureExec": {
-      "import": {
-        "@triggerdotdev/source": "./src/extensions/secureExec.ts",
-        "types": "./dist/esm/extensions/secureExec.d.ts",
-        "default": "./dist/esm/extensions/secureExec.js"
-      },
-      "require": {
-        "types": "./dist/commonjs/extensions/secureExec.d.ts",
-        "default": "./dist/commonjs/extensions/secureExec.js"
-      }
     }
   },
   "main": "./dist/commonjs/index.js",
diff --git a/packages/build/src/extensions/secureExec.ts b/packages/build/src/extensions/secureExec.ts
deleted file mode 100644
index 808bc666501..00000000000
--- a/packages/build/src/extensions/secureExec.ts
+++ /dev/null
@@ -1,172 +0,0 @@
-import { BuildTarget } from "@trigger.dev/core/v3";
-import { BuildManifest } from "@trigger.dev/core/v3/schemas";
-import { BuildContext, BuildExtension } from "@trigger.dev/core/v3/build";
-import { dirname, resolve, join } from "node:path";
-import { readFileSync } from "node:fs";
-import { createRequire } from "node:module";
-import { readPackageJSON } from "pkg-types";
-
-export type SecureExecOptions = {
-  /**
-   * Packages available inside the sandbox at runtime.
-   *
-   * These are `require()`'d inside the V8 isolate at runtime — the bundler
-   * never sees them statically. They are marked external and installed as
-   * deploy dependencies.
-   *
-   * @example
-   * ```ts
-   * secureExec({ packages: ["jszip", "lodash"] })
-   * ```
-   */
-  packages?: string[];
-};
-
-/**
- * Build extension for [secure-exec](https://secureexec.dev) — run untrusted
- * JavaScript/TypeScript in V8 isolates with configurable permissions.
- *
- * Handles the esbuild workarounds needed for secure-exec's runtime
- * `require.resolve` calls, native binaries, and module-scope resolution.
- *
- * @example
- * ```ts
- * import { secureExec } from "@trigger.dev/build/extensions/secureExec";
- *
- * export default defineConfig({
- *   build: {
- *     extensions: [secureExec()],
- *   },
- * });
- * ```
- */
-export function secureExec(options?: SecureExecOptions): BuildExtension {
-  return new SecureExecExtension(options ?? {});
-}
-
-class SecureExecExtension implements BuildExtension {
-  public readonly name = "SecureExecExtension";
-
-  private userPackages: string[];
-
-  constructor(options: SecureExecOptions) {
-    this.userPackages = options.packages ?? [];
-  }
-
-  externalsForTarget(_target: BuildTarget) {
-    return [
-      // esbuild must not be bundled — it locates its native binary via a
-      // relative path from its JS API entry point. secure-exec uses esbuild
-      // at runtime to bundle polyfills for sandbox code.
-      "esbuild",
-      // User-specified packages are require()'d inside the V8 sandbox at
-      // runtime — the bundler never sees them statically.
-      ...this.userPackages,
-    ];
-  }
-
-  onBuildStart(context: BuildContext) {
-    context.logger.debug(`Adding ${this.name} esbuild plugins`);
-
-    // Plugin 1: Replace node-stdlib-browser with pre-resolved paths.
-    //
-    // Trigger's ESM shim anchors require.resolve() to the chunk path, so
-    // node-stdlib-browser's runtime require.resolve("./mock/empty.js") breaks.
-    // Fix: load the real node-stdlib-browser at build time (where require.resolve
-    // works), capture the resolved path map, and inline it as a static export.
-    const workingDir = context.workingDir;
-    context.registerPlugin({
-      name: "secure-exec-stdlib-resolver",
-      setup(build) {
-        build.onResolve({ filter: /^node-stdlib-browser$/ }, () => ({
-          path: "node-stdlib-browser",
-          namespace: "secure-exec-nsb-resolved",
-        }));
-        build.onLoad({ filter: /.*/, namespace: "secure-exec-nsb-resolved" }, () => {
-          const buildRequire = createRequire(join(workingDir, "package.json"));
-          const resolved = buildRequire("node-stdlib-browser");
-          return {
-            contents: `export default ${JSON.stringify(resolved)};`,
-            loader: "js",
-          };
-        });
-      },
-    });
-
-    // Plugin 2: Inline bridge.js at build time.
-    //
-    // bridge-loader.js in @secure-exec/node(js) uses __dirname and
-    // require.resolve("@secure-exec/core") at module scope to locate
-    // dist/bridge.js on disk. This fails in Trigger's bundled output.
-    // Fix: read bridge.js content at build time and inline it as a
-    // string literal so no runtime filesystem resolution is needed.
-    //
-    context.registerPlugin({
-      name: "secure-exec-bridge-inline",
-      setup(build) {
-        build.onLoad(
-          { filter: /[\\/]@secure-exec[\\/]node[\\/]dist[\\/]bridge-loader\.js$/ },
-          (args) => {
-            try {
-              const buildRequire = createRequire(args.path);
-              const coreEntry = buildRequire.resolve("@secure-exec/core");
-              const coreRoot = resolve(dirname(coreEntry), "..");
-              const bridgeCode = readFileSync(join(coreRoot, "dist", "bridge.js"), "utf8");
-
-              return {
-                contents: [
-                  `import { getIsolateRuntimeSource } from "@secure-exec/core";`,
-                  `const bridgeCodeCache = ${JSON.stringify(bridgeCode)};`,
-                  `export function getRawBridgeCode() { return bridgeCodeCache; }`,
-                  `export function getBridgeAttachCode() { return getIsolateRuntimeSource("bridgeAttach"); }`,
-                ].join("\n"),
-                loader: "js",
-              };
-            } catch {
-              // If we can't inline the bridge, let the normal loader handle it.
-              return undefined;
-            }
-          }
-        );
-      },
-    });
-  }
-
-  async onBuildComplete(context: BuildContext, _manifest: BuildManifest) {
-    if (context.target === "dev") {
-      return;
-    }
-
-    context.logger.debug(`Adding ${this.name} deploy dependencies`);
-
-    const dependencies: Record<string, string> = {};
-
-    // Resolve versions for user-specified sandbox packages
-    for (const pkg of this.userPackages) {
-      try {
-        const modulePath = await context.resolvePath(pkg);
-        if (!modulePath) {
-          dependencies[pkg] = "latest";
-          continue;
-        }
-
-        const packageJSON = await readPackageJSON(dirname(modulePath));
-        dependencies[pkg] = packageJSON.version ?? "latest";
-      } catch {
-        context.logger.warn(
-          `Could not resolve version for sandbox package ${pkg}, defaulting to latest`
-        );
-        dependencies[pkg] = "latest";
-      }
-    }
-
-    context.addLayer({
-      id: "secureExec",
-      dependencies,
-      image: {
-        // isolated-vm requires native compilation tools
-        pkgs: ["python3", "make", "g++"],
-      },
-    });
-  }
-}
diff --git a/packages/cli-v3/CHANGELOG.md b/packages/cli-v3/CHANGELOG.md
index c0a0c29fd18..8a4c454e4b8 100644
--- a/packages/cli-v3/CHANGELOG.md
+++ b/packages/cli-v3/CHANGELOG.md
@@ -1,5 +1,38 @@
 # trigger.dev
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Fix `chat.agent` skills silently missing in `trigger dev` for projects whose task files read `process.env` at module top level (e.g. a third-party SDK client initialized at import). Skill folders now bundle into `.trigger/skills/` reliably regardless of which env vars are set when the CLI launches. ([#3690](https://github.com/triggerdotdev/trigger.dev/pull/3690))
+- Fix `COULD_NOT_FIND_EXECUTOR` when a task's definition is loaded via `await import(...)` from inside another task's `run()`. The runtime workers now register such tasks with a sentinel file context, and the catalog logs a one-time warning per task id. ([#3688](https://github.com/triggerdotdev/trigger.dev/pull/3688))
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+  - `@trigger.dev/build@4.5.0-rc.1`
+  - `@trigger.dev/schema-to-json@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- Add Agent Skills for `chat.agent`. Drop a folder with a `SKILL.md` and any helper scripts/references next to your task code, register it with `skills.define({ id, path })`, and the CLI bundles it into the deploy image automatically — no `trigger.config.ts` changes. The agent gets a one-line summary in its system prompt and discovers full instructions on demand via `loadSkill`, with `bash` and `readFile` tools scoped per-skill (path-traversal guards, output caps, abort-signal propagation). ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  ```ts
+  const pdfSkill = skills.define({ id: "pdf-extract", path: "./skills/pdf-extract" });
+
+  chat.skills.set([await pdfSkill.local()]);
+  ```
+
+  Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills) — portable across providers. SDK + CLI only for now; dashboard-editable `SKILL.md` text is on the roadmap.
+
+- Add `TRIGGER_BUILD_SKIP_REWRITE_TIMESTAMP=1` escape hatch for local self-hosted builds whose buildx driver doesn't support `rewrite-timestamp` alongside push (e.g. orbstack's default `docker` driver). ([#3618](https://github.com/triggerdotdev/trigger.dev/pull/3618))
+- The CLI MCP server's agent-chat tools (`start_agent_chat`, `send_agent_message`, `close_agent_chat`) now run on the new Sessions primitive, so AI assistants driving a `chat.agent` get the same idempotent-by-`chatId`, durable-across-runs behavior the browser transport gets. Required PAT scopes go from `write:inputStreams` to `read:sessions` + `write:sessions`. ([#3546](https://github.com/triggerdotdev/trigger.dev/pull/3546))
+- MCP `list_runs` tool: add a `region` filter input and surface each run's executing region in the formatted summary. ([#3612](https://github.com/triggerdotdev/trigger.dev/pull/3612))
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.0`
+  - `@trigger.dev/build@4.5.0-rc.0`
+  - `@trigger.dev/schema-to-json@4.5.0-rc.0`
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/cli-v3/package.json b/packages/cli-v3/package.json
index 326104a624d..0ba8907d8e2 100644
--- a/packages/cli-v3/package.json
+++ b/packages/cli-v3/package.json
@@ -1,6 +1,6 @@
 {
   "name": "trigger.dev",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "A Command-Line Interface for Trigger.dev projects",
   "type": "module",
   "license": "MIT",
@@ -95,9 +95,9 @@
     "@opentelemetry/sdk-trace-node": "2.0.1",
     "@opentelemetry/semantic-conventions": "1.36.0",
     "@s2-dev/streamstore": "^0.22.5",
-    "@trigger.dev/build": "workspace:4.4.6",
-    "@trigger.dev/core": "workspace:4.4.6",
-    "@trigger.dev/schema-to-json": "workspace:4.4.6",
+    "@trigger.dev/build": "workspace:4.5.0-rc.1",
+    "@trigger.dev/core": "workspace:4.5.0-rc.1",
+    "@trigger.dev/schema-to-json": "workspace:4.5.0-rc.1",
     "ansi-escapes": "^7.0.0",
     "braces": "^3.0.3",
     "c12": "^1.11.1",
diff --git a/packages/cli-v3/src/build/bundleSkills.ts b/packages/cli-v3/src/build/bundleSkills.ts
index 65ad9834abe..8533d254c72 100644
--- a/packages/cli-v3/src/build/bundleSkills.ts
+++ b/packages/cli-v3/src/build/bundleSkills.ts
@@ -1,6 +1,5 @@
-import { createHash } from "node:crypto";
 import { readFile } from "node:fs/promises";
-import { dirname, isAbsolute, join, resolve as resolvePath } from "node:path";
+import { isAbsolute, join, resolve as resolvePath } from "node:path";
 import type { BuildManifest, SkillManifest } from "@trigger.dev/core/v3/schemas";
 import { copyDirectoryRecursive } from "@trigger.dev/build/internal";
 import { indexWorkerManifest } from "../indexing/indexWorkerManifest.js";
@@ -21,13 +20,84 @@ export type BundleSkillsResult = {
   skills: SkillManifest[];
 };
 
+export type CopySkillFoldersOptions = {
+  skills: SkillManifest[];
+  /** Root where `{destinationRoot}/{id}/` folders will be created. */
+  destinationRoot: string;
+  /** Used to resolve relative `filePath` references in skill manifests. */
+  workingDir: string;
+  /** Only `debug` is used. `BuildLogger` and the cli `logger` both satisfy this shape. */
+  logger: { debug: (...args: unknown[]) => void };
+};
+
+/**
+ * Copy each skill's source folder to `{destinationRoot}/{id}/`. Validates
+ * that `SKILL.md` exists and has the required frontmatter. Pure file IO —
+ * no indexer subprocess, no env handling.
+ *
+ * Used by the dev path (driven by the main worker indexer's skills list)
+ * and indirectly by the deploy path (via `bundleSkills` which discovers
+ * skills via its own indexer pass first, then delegates here).
+ */
+export async function copySkillFolders(
+  options: CopySkillFoldersOptions
+): Promise<SkillManifest[]> {
+  const { skills, destinationRoot, workingDir, logger } = options;
+
+  if (skills.length === 0) {
+    return [];
+  }
+
+  for (const skill of skills) {
+    const callerDir = skill.filePath
+      ? resolvePath(workingDir, skill.filePath, "..")
+      : workingDir;
+    const sourcePath = isAbsolute(skill.sourcePath)
+      ? skill.sourcePath
+      : resolvePath(callerDir, skill.sourcePath);
+    const skillMdPath = join(sourcePath, "SKILL.md");
+
+    let skillMd: string;
+    try {
+      skillMd = await readFile(skillMdPath, "utf8");
+    } catch {
+      throw new Error(
+        `Skill "${skill.id}": SKILL.md not found at ${skillMdPath}. ` +
+          `Registered via skills.define({ id: "${skill.id}", path: "${skill.sourcePath}" }) ` +
+          `at ${skill.filePath}.`
+      );
+    }
+
+    if (!/^---\r?\n[\s\S]*?\r?\n---/.test(skillMd)) {
+      throw new Error(
+        `Skill "${skill.id}": SKILL.md at ${skillMdPath} is missing a frontmatter block.`
+      );
+    }
+    if (!/\bname:\s*\S/.test(skillMd) || !/\bdescription:\s*\S/.test(skillMd)) {
+      throw new Error(
+        `Skill "${skill.id}": SKILL.md at ${skillMdPath} frontmatter must include both \`name\` and \`description\`.`
+      );
+    }
+
+    const skillDest = join(destinationRoot, skill.id);
+    logger.debug(`[copySkillFolders] Copying ${sourcePath} → ${skillDest}`);
+    await copyDirectoryRecursive(sourcePath, skillDest);
+  }
+
+  return [...skills].sort((a, b) => a.id.localeCompare(b.id));
+}
+
 /**
  * Built-in skill bundler — not an extension. Runs the indexer locally
- * against the bundled worker output to discover `ai.defineSkill(...)`
+ * against the bundled worker output to discover `skills.define(...)`
  * registrations, validates each skill's `SKILL.md`, and copies the
  * folder into `{outputPath}/.trigger/skills/{id}/` so the deploy image
  * picks it up via the existing Dockerfile `COPY`.
  *
+ * Used by the deploy path. The dev path uses `copySkillFolders` directly,
+ * driven by the main worker indexer that already runs in `BackgroundWorker.initialize` —
+ * no duplicate indexer pass needed there.
+ *
  * No `trigger.config.ts` changes required — discovery is side-effect
  * based, same mechanism as task/prompt registration.
  */
@@ -71,65 +141,20 @@ export async function bundleSkills(
     return { buildManifest, skills: [] };
   }
 
-  // Destination layout differs between dev and deploy:
-  // - Dev:    the worker runs with cwd = workingDir, so skills must live at
-  //           {workingDir}/.trigger/skills/{id}/ for skill.local() to find them.
-  // - Deploy: the Dockerfile COPY picks up everything under outputPath into
-  //           /app, so we target {outputPath}/.trigger/skills/{id}/ and the
-  //           container's cwd (/app) resolves correctly.
-  const destinationRoot =
-    buildManifest.target === "dev"
-      ? join(workingDir, ".trigger", "skills")
-      : join(buildManifest.outputPath, ".trigger", "skills");
+  // Deploy target: the Dockerfile COPY picks up everything under outputPath
+  // into /app, so we target {outputPath}/.trigger/skills/{id}/ and the
+  // container's cwd (/app) resolves correctly.
+  const destinationRoot = join(buildManifest.outputPath, ".trigger", "skills");
 
-  for (const skill of skills) {
-    // Resolve the skill's source folder relative to the file that called
-    // `skills.define(...)`. Absolute paths are honored as-is.
-    const callerDir = skill.filePath
-      ? dirname(resolvePath(workingDir, skill.filePath))
-      : workingDir;
-    const sourcePath = isAbsolute(skill.sourcePath)
-      ? skill.sourcePath
-      : resolvePath(callerDir, skill.sourcePath);
-    const skillMdPath = join(sourcePath, "SKILL.md");
-
-    let skillMd: string;
-    try {
-      skillMd = await readFile(skillMdPath, "utf8");
-    } catch {
-      throw new Error(
-        `Skill "${skill.id}": SKILL.md not found at ${skillMdPath}. ` +
-          `Registered via ai.defineSkill({ id: "${skill.id}", path: "${skill.sourcePath}" }) ` +
-          `at ${skill.filePath}.`
-      );
-    }
-
-    if (!/^---\r?\n[\s\S]*?\r?\n---/.test(skillMd)) {
-      throw new Error(
-        `Skill "${skill.id}": SKILL.md at ${skillMdPath} is missing a frontmatter block.`
-      );
-    }
-    if (!/\bname:\s*\S/.test(skillMd) || !/\bdescription:\s*\S/.test(skillMd)) {
-      throw new Error(
-        `Skill "${skill.id}": SKILL.md at ${skillMdPath} frontmatter must include both \`name\` and \`description\`.`
-      );
-    }
-
-    const skillDest = join(destinationRoot, skill.id);
-    logger.debug(`[bundleSkills] Copying ${sourcePath} → ${skillDest}`);
-    await copyDirectoryRecursive(sourcePath, skillDest);
-  }
-
-  // Sort by id for deterministic manifest output
-  skills = [...skills].sort((a, b) => a.id.localeCompare(b.id));
-
-  // Content hash is derived from each SKILL.md's content for cache invalidation
-  // downstream (dashboard persistence in Phase 2). Not used in Phase 1.
-  void createHash;
-  void dirname;
+  const sortedSkills = await copySkillFolders({
+    skills,
+    destinationRoot,
+    workingDir,
+    logger,
+  });
 
   return {
-    buildManifest: { ...buildManifest, skills },
-    skills,
+    buildManifest: { ...buildManifest, skills: sortedSkills },
+    skills: sortedSkills,
   };
 }
diff --git a/packages/cli-v3/src/dev/devSession.ts b/packages/cli-v3/src/dev/devSession.ts
index 2d6645cd50c..ed6290a1b86 100644
--- a/packages/cli-v3/src/dev/devSession.ts
+++ b/packages/cli-v3/src/dev/devSession.ts
@@ -9,7 +9,6 @@ import {
   logBuildFailure,
   logBuildWarnings,
 } from "../build/bundle.js";
-import { bundleSkills } from "../build/bundleSkills.js";
 import {
   createBuildContext,
   notifyExtensionOnBuildComplete,
@@ -119,25 +118,12 @@ export async function startDevSession({
       bundle.metafile
     );
 
-    // Built-in skill bundling — copies registered skill folders into
-    // `.trigger/skills/{id}/` so `skill.local()` works at dev runtime.
-    try {
-      const buildManifestPath = join(
-        workerDir?.path ?? destination.path,
-        "build.json"
-      );
-      await writeJSONFile(buildManifestPath, buildManifest);
-      const skillsResult = await bundleSkills({
-        buildManifest,
-        buildManifestPath,
-        workingDir: rawConfig.workingDir,
-        env: process.env,
-        logger: buildContext.logger,
-      });
-      buildManifest = skillsResult.buildManifest;
-    } catch (err) {
-      logger.warn("Skill bundling failed during dev rebuild", err);
-    }
+    // Skill folder copying happens after the main worker indexer runs in
+    // `BackgroundWorker.initialize` — that pass already discovers skills
+    // via the resource catalog and reports them on `workerManifest.skills`,
+    // so we don't need a duplicate indexer here (which historically ran
+    // with a bare `process.env` and silently dropped skills on projects
+    // whose task files read CLI-injected vars at module top level).
 
     buildManifest = await notifyExtensionOnBuildComplete(buildContext, buildManifest);
 
diff --git a/packages/cli-v3/src/dev/devSupervisor.ts b/packages/cli-v3/src/dev/devSupervisor.ts
index 59b2d2a473b..0d972384c40 100644
--- a/packages/cli-v3/src/dev/devSupervisor.ts
+++ b/packages/cli-v3/src/dev/devSupervisor.ts
@@ -18,6 +18,7 @@ import { eventBus } from "../utilities/eventBus.js";
 import { logger } from "../utilities/logger.js";
 import { resolveSourceFiles } from "../utilities/sourceFiles.js";
 import { BackgroundWorker } from "./backgroundWorker.js";
+import { copySkillFolders } from "../build/bundleSkills.js";
 import { WorkerRuntime } from "./workerRuntime.js";
 import { chalkTask, cliLink, prettyError } from "../utilities/cliOutput.js";
 import { DevRunController } from "../entryPoints/dev-run-controller.js";
@@ -331,6 +332,25 @@ class DevSupervisor implements WorkerRuntime {
       throw new Error("Could not initialize worker");
     }
 
+    // Copy registered skill folders into `${workingDir}/.trigger/skills/{id}/`
+    // so `skill.local()` can read them at runtime. The main indexer already
+    // discovered skills; we just do the file IO here.
+    const discoveredSkills = backgroundWorker.manifest.skills ?? [];
+    if (discoveredSkills.length > 0) {
+      try {
+        await copySkillFolders({
+          skills: discoveredSkills,
+          destinationRoot: join(this.options.config.workingDir, ".trigger", "skills"),
+          workingDir: this.options.config.workingDir,
+          logger,
+        });
+      } catch (err) {
+        prettyError("Skill bundling failed", (err as Error).message);
+        stop();
+        return;
+      }
+    }
+
     const validationIssue = validateWorkerManifest(backgroundWorker.manifest);
 
     if (validationIssue) {
diff --git a/packages/cli-v3/src/entryPoints/dev-run-worker.ts b/packages/cli-v3/src/entryPoints/dev-run-worker.ts
index 067076a7b99..b7f621954c9 100644
--- a/packages/cli-v3/src/entryPoints/dev-run-worker.ts
+++ b/packages/cli-v3/src/entryPoints/dev-run-worker.ts
@@ -47,6 +47,7 @@ import {
   SharedRuntimeManager,
   OtelTaskLogger,
   populateEnv,
+  NO_FILE_CONTEXT,
   StandardLifecycleHooksManager,
   StandardLocalsManager,
   StandardMetadataManager,
@@ -501,8 +502,8 @@ const zodIpc = new ZodIpcConnection({
               async () => {
                 const beforeImport = performance.now();
                 resourceCatalog.setCurrentFileContext(
-                  taskManifest.entryPoint,
-                  taskManifest.filePath
+                  taskManifest.filePath,
+                  taskManifest.entryPoint
                 );
 
                 // Load init file if it exists
@@ -610,6 +611,12 @@ const zodIpc = new ZodIpcConnection({
 
           const signal = AbortSignal.any([_cancelController.signal, timeoutController.signal]);
 
+          // Sentinel context so `task()` calls firing during run / lifecycle
+          // hooks (e.g. via `await import(...)` of a module containing a task
+          // definition) register normally instead of being silently dropped.
+          // Cleared in the surrounding finally below.
+          resourceCatalog.setCurrentFileContext(NO_FILE_CONTEXT, NO_FILE_CONTEXT);
+
           const { result } = await executor.execute(execution, ctx, signal);
 
           if (_isRunning && !_isCancelled) {
@@ -628,6 +635,7 @@ const zodIpc = new ZodIpcConnection({
           }
         } finally {
           standardHeartbeatsManager.stopHeartbeat();
+          resourceCatalog.clearCurrentFileContext();
 
           _execution = undefined;
           _isRunning = false;
diff --git a/packages/cli-v3/src/entryPoints/managed-run-worker.ts b/packages/cli-v3/src/entryPoints/managed-run-worker.ts
index 3fc27dd8ab9..ed8fc9be5e7 100644
--- a/packages/cli-v3/src/entryPoints/managed-run-worker.ts
+++ b/packages/cli-v3/src/entryPoints/managed-run-worker.ts
@@ -47,6 +47,7 @@ import {
   OtelTaskLogger,
   populateEnv,
   ProdUsageManager,
+  NO_FILE_CONTEXT,
   StandardLifecycleHooksManager,
   StandardLocalsManager,
   StandardMetadataManager,
@@ -490,8 +491,8 @@ const zodIpc = new ZodIpcConnection({
               async () => {
                 const beforeImport = performance.now();
                 resourceCatalog.setCurrentFileContext(
-                  taskManifest.entryPoint,
-                  taskManifest.filePath
+                  taskManifest.filePath,
+                  taskManifest.entryPoint
                 );
 
                 // Load init file if it exists
@@ -595,6 +596,12 @@ const zodIpc = new ZodIpcConnection({
 
           const signal = AbortSignal.any([_cancelController.signal, timeoutController.signal]);
 
+          // Sentinel context so `task()` calls firing during run / lifecycle
+          // hooks (e.g. via `await import(...)` of a module containing a task
+          // definition) register normally instead of being silently dropped.
+          // Cleared in the surrounding finally below.
+          resourceCatalog.setCurrentFileContext(NO_FILE_CONTEXT, NO_FILE_CONTEXT);
+
           const { result } = await executor.execute(execution, ctx, signal);
 
           if (_isRunning && !_isCancelled) {
@@ -613,6 +620,7 @@ const zodIpc = new ZodIpcConnection({
           }
         } finally {
           standardHeartbeatsManager.stopHeartbeat();
+          resourceCatalog.clearCurrentFileContext();
 
           _execution = undefined;
           _isRunning = false;
diff --git a/packages/core/CHANGELOG.md b/packages/core/CHANGELOG.md
index 56b6d757ed7..33ff1b005cf 100644
--- a/packages/core/CHANGELOG.md
+++ b/packages/core/CHANGELOG.md
@@ -1,5 +1,96 @@
 # internal-platform
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Fix `COULD_NOT_FIND_EXECUTOR` when a task's definition is loaded via `await import(...)` from inside another task's `run()`. The runtime workers now register such tasks with a sentinel file context, and the catalog logs a one-time warning per task id. ([#3688](https://github.com/triggerdotdev/trigger.dev/pull/3688))
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- Add Agent Skills for `chat.agent`. Drop a folder with a `SKILL.md` and any helper scripts/references next to your task code, register it with `skills.define({ id, path })`, and the CLI bundles it into the deploy image automatically — no `trigger.config.ts` changes. The agent gets a one-line summary in its system prompt and discovers full instructions on demand via `loadSkill`, with `bash` and `readFile` tools scoped per-skill (path-traversal guards, output caps, abort-signal propagation). ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  ```ts
+  const pdfSkill = skills.define({ id: "pdf-extract", path: "./skills/pdf-extract" });
+
+  chat.skills.set([await pdfSkill.local()]);
+  ```
+
+  Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills) — portable across providers. SDK + CLI only for now; dashboard-editable `SKILL.md` text is on the roadmap.
+
+- Reject overlong `idempotencyKey` values at the API boundary so they no longer trip an internal size limit on the underlying unique index and surface as a generic 500. Inputs are capped at 2048 characters — well above what `idempotencyKeys.create()` produces (a 64-character hash) and above any realistic raw key. Applies to `tasks.trigger`, `tasks.batchTrigger`, `batch.create` (Phase 1 streaming batches), `wait.createToken`, `wait.forDuration`, and the input/session stream waitpoint endpoints. Over-limit requests now return a structured 400 instead. ([#3560](https://github.com/triggerdotdev/trigger.dev/pull/3560))
+- **AI Agents** — run AI SDK chat completions as durable Trigger.dev agents instead of fragile API routes. Define an agent in one function, point `useChat` at it from React, and the conversation survives page refreshes, network blips, and process restarts. ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  ```ts
+  import { chat } from "@trigger.dev/sdk/ai";
+  import { streamText } from "ai";
+  import { openai } from "@ai-sdk/openai";
+
+  export const myChat = chat.agent({
+    id: "my-chat",
+    run: async ({ messages, signal }) =>
+      streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }),
+  });
+  ```
+
+  ```tsx
+  import { useChat } from "@ai-sdk/react";
+  import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+
+  const transport = useTriggerChatTransport({ task: "my-chat", accessToken, startSession });
+  const { messages, sendMessage } = useChat({ transport });
+  ```
+
+  **What you get:**
+
+  - **AI SDK `useChat` integration** — a custom [`ChatTransport`](https://sdk.vercel.ai/docs/ai-sdk-ui/transport) (`useTriggerChatTransport`) plugs straight into Vercel AI SDK's `useChat` hook. Text streaming, tool calls, reasoning, and `data-*` parts all work natively over Trigger.dev's realtime streams. No custom API routes needed.
+  - **First-turn fast path (`chat.headStart`)** — opt-in handler that runs the first turn's `streamText` step in your warm server process while the agent run boots in parallel, cutting cold-start TTFC by roughly half (measured 2801ms → 1218ms on `claude-sonnet-4-6`). The agent owns step 2+ (tool execution, persistence, hooks) so heavy deps stay where they belong. Web Fetch handler works natively in Next.js, Hono, SvelteKit, Remix, Workers, etc.; bridge to Express/Fastify/Koa via `chat.toNodeListener`. New `@trigger.dev/sdk/chat-server` subpath.
+  - **Multi-turn durability via Sessions** — every chat is backed by a durable Session that outlives any individual run. Conversations resume across page refreshes, idle timeout, crashes, and deploys; `resume: true` reconnects via `lastEventId` so clients only see new chunks. `sessions.list` enumerates chats for inbox-style UIs.
+  - **Auto-accumulated history, delta-only wire** — the backend accumulates the full conversation across turns; clients only ship the new message each turn. Long chats never hit the 512 KiB body cap. Register `hydrateMessages` to be the source of truth yourself.
+  - **Lifecycle hooks** — `onPreload`, `onChatStart`, `onValidateMessages`, `hydrateMessages`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`, `onChatSuspend`, `onChatResume` — for persistence, validation, and post-turn work.
+  - **Stop generation** — client-driven `transport.stopGeneration(chatId)` aborts mid-stream; the run stays alive for the next message, partial response is captured, and aborted parts (stuck `partial-call` tools, in-progress reasoning) are auto-cleaned.
+  - **Tool approvals (HITL)** — tools with `needsApproval: true` pause until the user approves or denies via `addToolApprovalResponse`. The runtime reconciles the updated assistant message by ID and continues `streamText`.
+  - **Steering and background injection** — `pendingMessages` injects user messages between tool-call steps so users can steer the agent mid-execution; `chat.inject()` + `chat.defer()` adds context from background work (self-review, RAG, safety checks) between turns.
+  - **Actions** — non-turn frontend commands (undo, rollback, regenerate, edit) sent via `transport.sendAction`. Fire `hydrateMessages` + `onAction` only — no turn hooks, no `run()`. `onAction` can return a `StreamTextResult` for a model response, or `void` for side-effect-only.
+  - **Typed state primitives** — `chat.local<T>` for per-run state accessible from hooks, `run()`, tools, and subtasks (auto-serialized through `ai.toolExecute`); `chat.store` for typed shared data between agent and client; `chat.history` for reading and mutating the message chain; `clientDataSchema` for typed `clientData` in every hook.
+  - **`chat.toStreamTextOptions()`** — one spread into `streamText` wires up versioned system [Prompts](https://trigger.dev/docs/ai/prompts), model resolution, telemetry metadata, compaction, steering, and background injection.
+  - **Multi-tab coordination** — `multiTab: true` + `useMultiTabChat` prevents duplicate sends and syncs state across browser tabs via `BroadcastChannel`. Non-active tabs go read-only with live updates.
+  - **Network resilience** — built-in indefinite retry with bounded backoff, reconnect on `online` / tab refocus / bfcache restore, `Last-Event-ID` mid-stream resume. No app code needed.
+
+  See [/docs/ai-chat](https://trigger.dev/docs/ai-chat/overview) for the full surface — quick start, three backend approaches (`chat.agent`, `chat.createSession`, raw task), persistence and code-sandbox patterns, type-level guides, and API reference.
+
+- Stamp `gen_ai.conversation.id` (the chat id) on every span and metric emitted from inside a `chat.task` or `chat.agent` run. Lets you filter dashboard spans, runs, and metrics by the chat conversation that produced them — independent of the run boundary, so multi-run chats correlate cleanly. No code changes required on the user side. ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+- Fix `LocalsKey<T>` type incompatibility across dual-package builds. The phantom value-type brand no longer uses a module-level `unique symbol`, so a single TypeScript compilation that resolves the type from both the ESM and CJS outputs (which can happen under certain pnpm hoisting layouts) no longer sees two structurally-incompatible variants of the same type. ([#3626](https://github.com/triggerdotdev/trigger.dev/pull/3626))
+- Unit-test `chat.agent` definitions offline with `mockChatAgent` from `@trigger.dev/sdk/ai/test`. Drives a real agent's turn loop in-process — no network, no task runtime — so you can send messages, actions, and stop signals via driver methods, inspect captured output chunks, and verify hooks fire. Pairs with `MockLanguageModelV3` from `ai/test` for model mocking. `setupLocals` lets you pre-seed `locals` (DB clients, service stubs) before `run()` starts. ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  The broader `runInMockTaskContext` harness it's built on lives at `@trigger.dev/core/v3/test` — useful for unit-testing any task code, not just chat.
+
+- Retry `TASK_PROCESS_SIGSEGV` task crashes under the user's retry policy instead of failing the run on the first segfault. SIGSEGV in Node tasks is frequently non-deterministic (native addon races, JIT/GC interaction, near-OOM in native code, host issues), so retrying on a fresh process often succeeds. The retry is gated by the task's existing `retry` config + `maxAttempts` — same path `TASK_PROCESS_SIGTERM` and uncaught exceptions already use — so tasks without a retry policy still fail fast. ([#3552](https://github.com/triggerdotdev/trigger.dev/pull/3552))
+- Add `region` to the runs list / retrieve API: filter runs by region (`runs.list({ region: "..." })` / `filter[region]=<masterQueue>`) and read each run's executing region from the new `region` field on the response. ([#3612](https://github.com/triggerdotdev/trigger.dev/pull/3612))
+- **Sessions** — a durable, run-aware stream channel keyed on a stable `externalId`. A Session is the unit of state that owns a multi-run conversation: messages flow through `.in`, responses through `.out`, both survive run boundaries. Sessions back the new `chat.agent` runtime, and you can build on them directly for any pattern that needs durable bi-directional streaming across runs. ([#3542](https://github.com/triggerdotdev/trigger.dev/pull/3542))
+
+  ```ts
+  import { sessions, tasks } from "@trigger.dev/sdk";
+
+  // Trigger a task and subscribe to its session output in one call
+  const { runId, stream } = await tasks.triggerAndSubscribe("my-task", payload, {
+    externalId: "user-456",
+  });
+
+  for await (const chunk of stream) {
+    // ...
+  }
+
+  // Enumerate existing sessions (powers inbox-style UIs without a separate index)
+  for await (const s of sessions.list({ type: "chat.agent", tag: "user:user-456" })) {
+    console.log(s.id, s.externalId, s.createdAt, s.closedAt);
+  }
+  ```
+
+  See [/docs/ai-chat/overview](https://trigger.dev/docs/ai-chat/overview) for the full surface — Sessions powers the durable, resumable chat runtime described there.
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/core/package.json b/packages/core/package.json
index 1235e0576b3..a1489754ff0 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/core",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "Core code used across the Trigger.dev SDK and platform",
   "license": "MIT",
   "publishConfig": {
diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts
index 64472a349ba..4bbeca8bd31 100644
--- a/packages/core/src/v3/apiClient/index.ts
+++ b/packages/core/src/v3/apiClient/index.ts
@@ -602,6 +602,32 @@ export class ApiClient {
     );
   }
 
+  /** Presigned PUT URL for a `chat.agent` session snapshot. */
+  createChatSnapshotUploadUrl(sessionId: string, requestOptions?: ZodFetchOptions) {
+    return zodfetch(
+      CreateUploadPayloadUrlResponseBody,
+      `${this.baseUrl}/api/v1/sessions/${encodeURIComponent(sessionId)}/snapshot-url`,
+      {
+        method: "PUT",
+        headers: this.#getHeaders(false),
+      },
+      mergeRequestOptions(this.defaultRequestOptions, requestOptions)
+    );
+  }
+
+  /** Presigned GET URL for a `chat.agent` session snapshot. */
+  getChatSnapshotUrl(sessionId: string, requestOptions?: ZodFetchOptions) {
+    return zodfetch(
+      CreateUploadPayloadUrlResponseBody,
+      `${this.baseUrl}/api/v1/sessions/${encodeURIComponent(sessionId)}/snapshot-url`,
+      {
+        method: "GET",
+        headers: this.#getHeaders(false),
+      },
+      mergeRequestOptions(this.defaultRequestOptions, requestOptions)
+    );
+  }
+
   retrieveRun(runId: string, requestOptions?: ZodFetchOptions) {
     return zodfetch(
       RetrieveRunResponse,
diff --git a/packages/core/src/v3/apiClient/runStream.ts b/packages/core/src/v3/apiClient/runStream.ts
index 217b7a51082..4b60bb410fa 100644
--- a/packages/core/src/v3/apiClient/runStream.ts
+++ b/packages/core/src/v3/apiClient/runStream.ts
@@ -233,9 +233,15 @@ export class SSEStreamSubscription implements StreamSubscription {
       // reset the timer naturally.
       stallTimeoutMs?: number;
       // HTTP statuses that should NOT be retried — fail the stream
-      // permanently. `404` (stream gone) and `410` (session closed)
-      // are sensible defaults; tune per-caller for other 4xx.
+      // permanently. Defaults cover the permanent client-error set:
+      // `400` (bad request), `404` (stream gone), `409` (conflict),
+      // `410` (session closed), `422` (unprocessable). Tune per-caller
+      // for other 4xx.
       nonRetryableStatuses?: readonly number[];
+      // Optional fetch override. Used by transports that need to route
+      // the SSE connect through a custom path (proxy, custom headers,
+      // tracing). Defaults to global `fetch`.
+      fetchClient?: typeof fetch;
     }
   ) {
     this.lastEventId = options.lastEventId;
@@ -245,7 +251,9 @@ export class SSEStreamSubscription implements StreamSubscription {
     this.retryJitter = options.retryJitter ?? 0.5;
     this.fetchTimeoutMs = options.fetchTimeoutMs ?? 30_000;
     this.stallTimeoutMs = options.stallTimeoutMs ?? 0;
-    this.nonRetryableStatuses = new Set(options.nonRetryableStatuses ?? [404, 410]);
+    this.nonRetryableStatuses = new Set(
+      options.nonRetryableStatuses ?? [400, 404, 409, 410, 422]
+    );
   }
 
   /**
@@ -331,7 +339,8 @@ export class SSEStreamSubscription implements StreamSubscription {
         headers["Timeout-Seconds"] = this.options.timeoutInSeconds.toString();
       }
 
-      const response = await fetch(this.url, {
+      const fetchClient = this.options.fetchClient ?? fetch;
+      const response = await fetchClient(this.url, {
         headers,
         signal: this.internalAbort.signal,
       });
diff --git a/packages/core/src/v3/realtime-streams-api.ts b/packages/core/src/v3/realtime-streams-api.ts
index d9cd9ecfb45..728399bea6e 100644
--- a/packages/core/src/v3/realtime-streams-api.ts
+++ b/packages/core/src/v3/realtime-streams-api.ts
@@ -6,7 +6,10 @@ export const realtimeStreams = RealtimeStreamsAPI.getInstance();
 
 export * from "./realtimeStreams/types.js";
 export { SessionStreamInstance } from "./realtimeStreams/sessionStreamInstance.js";
-export type { SessionStreamInstanceOptions } from "./realtimeStreams/sessionStreamInstance.js";
+export type {
+  SessionStreamInstanceOptions,
+  InitializeSessionStreamResponseLike,
+} from "./realtimeStreams/sessionStreamInstance.js";
 export {
   trimSessionStream,
   writeSessionControlRecord,
diff --git a/packages/core/src/v3/realtimeStreams/index.ts b/packages/core/src/v3/realtimeStreams/index.ts
index b1c20735808..71854888ee5 100644
--- a/packages/core/src/v3/realtimeStreams/index.ts
+++ b/packages/core/src/v3/realtimeStreams/index.ts
@@ -10,7 +10,10 @@ import {
 // `SessionOutputChannel.pipe` / `.writer` can construct it without reaching
 // into the core package's internals.
 export { SessionStreamInstance } from "./sessionStreamInstance.js";
-export type { SessionStreamInstanceOptions } from "./sessionStreamInstance.js";
+export type {
+  SessionStreamInstanceOptions,
+  InitializeSessionStreamResponseLike,
+} from "./sessionStreamInstance.js";
 export {
   trimSessionStream,
   writeSessionControlRecord,
diff --git a/packages/core/src/v3/realtimeStreams/manager.test.ts b/packages/core/src/v3/realtimeStreams/manager.test.ts
new file mode 100644
index 00000000000..179754bc752
--- /dev/null
+++ b/packages/core/src/v3/realtimeStreams/manager.test.ts
@@ -0,0 +1,147 @@
+import { describe, expect, it, vi } from "vitest";
+import type { ApiClient } from "../apiClient/index.js";
+import { StandardRealtimeStreamsManager } from "./manager.js";
+
+// The cache lives on a private method to keep `pipe()` callers from having
+// to thread cache concerns. Tests exercise it via bracket-notation to keep
+// the assertions tight on cache contracts and avoid spinning up real
+// `StreamsWriterV1`/`StreamsWriterV2` infrastructure (HTTP requests, S2
+// connections) for what is purely an in-memory dedup check.
+type GetCached = (
+  runId: string,
+  key: string,
+  requestOptions?: undefined
+) => Promise<{ version: string; headers?: Record<string, string> }>;
+
+function getCached(manager: StandardRealtimeStreamsManager, runId: string, key: string) {
+  return (manager as unknown as { getCachedCreateStream: GetCached }).getCachedCreateStream(
+    runId,
+    key
+  );
+}
+
+function makeApiClient(impl: () => Promise<{ version: string; headers?: Record<string, string> }>) {
+  const spy = vi.fn(impl);
+  const client = { createStream: spy } as unknown as ApiClient;
+  return { client, spy };
+}
+
+describe("StandardRealtimeStreamsManager createStream cache", () => {
+  it("dedupes repeated calls for the same (runId, key)", async () => {
+    const { client, spy } = makeApiClient(async () => ({ version: "v1", headers: {} }));
+    const manager = new StandardRealtimeStreamsManager(client, "http://localhost");
+
+    const p1 = getCached(manager, "run-1", "chat");
+    const p2 = getCached(manager, "run-1", "chat");
+
+    expect(p1).toBe(p2);
+    expect(spy).toHaveBeenCalledTimes(1);
+    await Promise.all([p1, p2]);
+    expect(spy).toHaveBeenCalledTimes(1);
+  });
+
+  it("issues a separate PUT for each distinct stream key on the same run", async () => {
+    const { client, spy } = makeApiClient(async () => ({ version: "v1", headers: {} }));
+    const manager = new StandardRealtimeStreamsManager(client, "http://localhost");
+
+    await Promise.all([
+      getCached(manager, "run-1", "chat"),
+      getCached(manager, "run-1", "tool-output"),
+    ]);
+
+    expect(spy).toHaveBeenCalledTimes(2);
+    expect(spy).toHaveBeenNthCalledWith(1, "run-1", "self", "chat", undefined);
+    expect(spy).toHaveBeenNthCalledWith(2, "run-1", "self", "tool-output", undefined);
+  });
+
+  it("issues a separate PUT for each distinct run, even with the same key", async () => {
+    const { client, spy } = makeApiClient(async () => ({ version: "v1", headers: {} }));
+    const manager = new StandardRealtimeStreamsManager(client, "http://localhost");
+
+    await Promise.all([
+      getCached(manager, "run-1", "chat"),
+      getCached(manager, "run-2", "chat"),
+    ]);
+
+    expect(spy).toHaveBeenCalledTimes(2);
+  });
+
+  it("evicts on failure so the next call retries instead of returning a poisoned entry", async () => {
+    const spy = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("boom"))
+      .mockResolvedValueOnce({ version: "v1", headers: {} });
+    const client = { createStream: spy } as unknown as ApiClient;
+    const manager = new StandardRealtimeStreamsManager(client, "http://localhost");
+
+    await expect(getCached(manager, "run-1", "chat")).rejects.toThrow("boom");
+
+    const retried = await getCached(manager, "run-1", "chat");
+
+    expect(retried).toEqual({ version: "v1", headers: {} });
+    expect(spy).toHaveBeenCalledTimes(2);
+  });
+
+  it("reset() clears cached entries so the next call re-PUTs", async () => {
+    const { client, spy } = makeApiClient(async () => ({ version: "v1", headers: {} }));
+    const manager = new StandardRealtimeStreamsManager(client, "http://localhost");
+
+    await getCached(manager, "run-1", "chat");
+    expect(spy).toHaveBeenCalledTimes(1);
+
+    manager.reset();
+
+    await getCached(manager, "run-1", "chat");
+    expect(spy).toHaveBeenCalledTimes(2);
+  });
+
+  it("evictCreateStreamIfStale clears the matching entry so the next call re-PUTs", async () => {
+    const { client, spy } = makeApiClient(async () => ({ version: "v1", headers: {} }));
+    const manager = new StandardRealtimeStreamsManager(client, "http://localhost");
+
+    // Prime the cache and capture which promise was stored.
+    const cachedPromise = getCached(manager, "run-1", "chat");
+    await cachedPromise;
+    expect(spy).toHaveBeenCalledTimes(1);
+
+    // Simulate the reactive invalidation path that `pipe()` runs when a
+    // writer's `wait()` rejects.
+    (
+      manager as unknown as {
+        evictCreateStreamIfStale: (
+          runId: string,
+          key: string,
+          expected: Promise<unknown>
+        ) => void;
+      }
+    ).evictCreateStreamIfStale("run-1", "chat", cachedPromise);
+
+    await getCached(manager, "run-1", "chat");
+    expect(spy).toHaveBeenCalledTimes(2);
+  });
+
+  it("evictCreateStreamIfStale is a no-op when the cache holds a different promise", async () => {
+    const { client, spy } = makeApiClient(async () => ({ version: "v1", headers: {} }));
+    const manager = new StandardRealtimeStreamsManager(client, "http://localhost");
+
+    const original = getCached(manager, "run-1", "chat");
+    await original;
+
+    // A different promise (e.g. from a concurrent caller that already
+    // refreshed) shouldn't trigger eviction.
+    const stalePromise = Promise.resolve({ version: "v1", headers: {} });
+    (
+      manager as unknown as {
+        evictCreateStreamIfStale: (
+          runId: string,
+          key: string,
+          expected: Promise<unknown>
+        ) => void;
+      }
+    ).evictCreateStreamIfStale("run-1", "chat", stalePromise);
+
+    // Cache should still hold the original entry; next call is a hit.
+    await getCached(manager, "run-1", "chat");
+    expect(spy).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/packages/core/src/v3/realtimeStreams/manager.ts b/packages/core/src/v3/realtimeStreams/manager.ts
index beda3535fb4..f4d915acc3f 100644
--- a/packages/core/src/v3/realtimeStreams/manager.ts
+++ b/packages/core/src/v3/realtimeStreams/manager.ts
@@ -1,7 +1,8 @@
 import { ApiClient } from "../apiClient/index.js";
 import { ensureAsyncIterable, ensureReadableStream } from "../streams/asyncIterableStream.js";
+import { AnyZodFetchOptions } from "../zodfetch.js";
 import { taskContext } from "../task-context-api.js";
-import { StreamInstance } from "./streamInstance.js";
+import { CreateStreamResponseLike, StreamInstance } from "./streamInstance.js";
 import {
   RealtimeStreamInstance,
   RealtimeStreamOperationOptions,
@@ -21,8 +22,60 @@ export class StandardRealtimeStreamsManager implements RealtimeStreamsManager {
     abortController: AbortController;
   }>();
 
+  // Cache of in-flight / resolved `createStream` responses, keyed by
+  // `${runId}:${key}`. S2 v2 access tokens are scoped to the org basin
+  // (default 1-day TTL server-side) so reusing them across repeated
+  // `pipe()` calls for the same `(runId, key)` is safe, and avoids the
+  // per-call PUT that pushes `streamId` onto `TaskRun.realtimeStreams`,
+  // which under chat-agent-style hot-loop writers caused row-lock
+  // contention on the writer DB.
+  private createStreamCache = new Map<string, Promise<CreateStreamResponseLike>>();
+
   reset(): void {
     this.activeStreams.clear();
+    this.createStreamCache.clear();
+  }
+
+  private getCachedCreateStream(
+    runId: string,
+    key: string,
+    requestOptions: AnyZodFetchOptions | undefined
+  ): Promise<CreateStreamResponseLike> {
+    const cacheKey = `${runId}:${key}`;
+    const cached = this.createStreamCache.get(cacheKey);
+    if (cached) {
+      return cached;
+    }
+
+    const promise = this.apiClient.createStream(runId, "self", key, requestOptions);
+    this.createStreamCache.set(cacheKey, promise);
+    // Evict on failure so the next call retries instead of returning a
+    // poisoned cache entry forever.
+    promise.catch((err) => {
+      if (this.createStreamCache.get(cacheKey) === promise) {
+        this.createStreamCache.delete(cacheKey);
+      }
+    });
+    return promise;
+  }
+
+  /**
+   * Reactive invalidation: a writer's `wait()` rejecting can mean the
+   * cached S2 credentials have gone stale (expired token, revoked
+   * access, basin retired), so evict the cached `createStream` response
+   * for `(runId, key)` and let the next `pipe()` re-PUT to mint fresh
+   * credentials. Compare by identity so a fresh promise installed by a
+   * concurrent caller isn't accidentally cleared.
+   */
+  private evictCreateStreamIfStale(
+    runId: string,
+    key: string,
+    expected: Promise<CreateStreamResponseLike>
+  ): void {
+    const cacheKey = `${runId}:${key}`;
+    if (this.createStreamCache.get(cacheKey) === expected) {
+      this.createStreamCache.delete(cacheKey);
+    }
   }
 
   public pipe<T>(
@@ -48,6 +101,15 @@ export class StandardRealtimeStreamsManager implements RealtimeStreamsManager {
       ? AbortSignal.any?.([options.signal, abortController.signal]) ?? abortController.signal
       : abortController.signal;
 
+    // Capture which cached promise this writer uses so reactive
+    // invalidation below evicts only if the cache still holds it (a
+    // concurrent caller may have already refreshed it).
+    const activeCreatePromise = this.getCachedCreateStream(
+      runId,
+      key,
+      options?.requestOptions
+    );
+
     const streamInstance = new StreamInstance({
       apiClient: this.apiClient,
       baseUrl: this.baseUrl,
@@ -58,14 +120,29 @@ export class StandardRealtimeStreamsManager implements RealtimeStreamsManager {
       requestOptions: options?.requestOptions,
       target: options?.target,
       debug: this.debug,
+      createStream: () => activeCreatePromise,
     });
 
     // Register this stream
     const streamInfo = { wait: () => streamInstance.wait(), abortController };
     this.activeStreams.add(streamInfo);
 
-    // Clean up when stream completes
-    streamInstance.wait().finally(() => this.activeStreams.delete(streamInfo));
+    // Single internal chain that handles activeStreams cleanup AND
+    // reactive invalidation. On rejection we evict the cached
+    // `createStream` entry so the next pipe() for the same `(runId, key)`
+    // re-PUTs and recovers (e.g. when a cached S2 access token expired
+    // mid-process). Customer awaiters still observe the rejection via
+    // the returned `wait()`; this chain just keeps the cleanup path
+    // from surfacing as unhandled.
+    streamInstance.wait().then(
+      () => {
+        this.activeStreams.delete(streamInfo);
+      },
+      (err) => {
+        this.evictCreateStreamIfStale(runId, key, activeCreatePromise);
+        this.activeStreams.delete(streamInfo);
+      }
+    );
 
     return {
       wait: () => streamInstance.wait(),
diff --git a/packages/core/src/v3/realtimeStreams/sessionStreamInstance.ts b/packages/core/src/v3/realtimeStreams/sessionStreamInstance.ts
index 11eb7290edc..73bec591d9e 100644
--- a/packages/core/src/v3/realtimeStreams/sessionStreamInstance.ts
+++ b/packages/core/src/v3/realtimeStreams/sessionStreamInstance.ts
@@ -4,6 +4,10 @@ import { AnyZodFetchOptions } from "../zodfetch.js";
 import { StreamsWriterV2 } from "./streamsWriterV2.js";
 import { StreamsWriter, StreamWriteResult } from "./types.js";
 
+export type InitializeSessionStreamResponseLike = {
+  headers?: Record<string, string>;
+};
+
 export type SessionStreamInstanceOptions<T> = {
   apiClient: ApiClient;
   baseUrl: string;
@@ -13,6 +17,14 @@ export type SessionStreamInstanceOptions<T> = {
   signal?: AbortSignal;
   requestOptions?: AnyZodFetchOptions;
   debug?: boolean;
+  /**
+   * Optional override for the initialize-session-stream call. Defaults to
+   * `apiClient.initializeSessionStream(sessionId, io, requestOptions)`. The
+   * channel passes a cached version so repeated `pipe()` / `writer()`
+   * calls for the same `(sessionId, io)` share a single PUT instead of
+   * hammering the server on every chunk.
+   */
+  initializeSession?: () => Promise<InitializeSessionStreamResponseLike>;
 };
 
 /**
@@ -31,11 +43,16 @@ export class SessionStreamInstance<T> implements StreamsWriter {
   }
 
   private async initializeWriter(): Promise<StreamsWriterV2<T>> {
-    const response = await this.options.apiClient.initializeSessionStream(
-      this.options.sessionId,
-      this.options.io,
-      this.options?.requestOptions
-    );
+    const initializeFn =
+      this.options.initializeSession ??
+      (() =>
+        this.options.apiClient.initializeSessionStream(
+          this.options.sessionId,
+          this.options.io,
+          this.options?.requestOptions
+        ));
+
+    const response = await initializeFn();
 
     const headers = response.headers ?? {};
     const accessToken = headers["x-s2-access-token"];
diff --git a/packages/core/src/v3/realtimeStreams/streamInstance.ts b/packages/core/src/v3/realtimeStreams/streamInstance.ts
index 07ee0158bfb..e5cd3f84aea 100644
--- a/packages/core/src/v3/realtimeStreams/streamInstance.ts
+++ b/packages/core/src/v3/realtimeStreams/streamInstance.ts
@@ -5,6 +5,11 @@ import { StreamsWriterV1 } from "./streamsWriterV1.js";
 import { StreamsWriterV2 } from "./streamsWriterV2.js";
 import { StreamsWriter, StreamWriteResult } from "./types.js";
 
+export type CreateStreamResponseLike = {
+  version: string;
+  headers?: Record<string, string>;
+};
+
 export type StreamInstanceOptions<T> = {
   apiClient: ApiClient;
   baseUrl: string;
@@ -15,6 +20,14 @@ export type StreamInstanceOptions<T> = {
   requestOptions?: AnyZodFetchOptions;
   target?: "self" | "parent" | "root" | string;
   debug?: boolean;
+  /**
+   * Optional override for the create-stream call. Defaults to
+   * `apiClient.createStream(runId, "self", key, requestOptions)`. The
+   * manager passes a cached version so repeated `pipe()` calls for the
+   * same `(runId, key)` share a single PUT instead of hammering the
+   * server on every chunk.
+   */
+  createStream?: () => Promise<CreateStreamResponseLike>;
 };
 
 type StreamsWriterInstance<T> = StreamsWriterV1<T> | StreamsWriterV2<T>;
@@ -27,12 +40,17 @@ export class StreamInstance<T> implements StreamsWriter {
   }
 
   private async initializeWriter(): Promise<StreamsWriterInstance<T>> {
-    const { version, headers } = await this.options.apiClient.createStream(
-      this.options.runId,
-      "self",
-      this.options.key,
-      this.options?.requestOptions
-    );
+    const createStreamFn =
+      this.options.createStream ??
+      (() =>
+        this.options.apiClient.createStream(
+          this.options.runId,
+          "self",
+          this.options.key,
+          this.options?.requestOptions
+        ));
+
+    const { version, headers } = await createStreamFn();
 
     const parsedResponse = parseCreateStreamResponse(version, headers);
 
diff --git a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts
index 0a67a4fd9a4..6333706317f 100644
--- a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts
+++ b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts
@@ -12,6 +12,18 @@ import {
 import { PromptMetadataWithFunctions, TaskMetadataWithFunctions, TaskSchema } from "../types/index.js";
 import { ResourceCatalog } from "./catalog.js";
 
+/**
+ * Sentinel file-context value the runtime workers set around task execution
+ * (via `TaskExecutor.execute`) so that `task()` calls firing during a run —
+ * e.g. as a side effect of `await import(...)` of a module containing a
+ * task definition — register normally instead of hitting the silent-drop
+ * guard in `registerTaskMetadata`. The catalog uses this exact string to
+ * detect "registered during execution" and emit a one-time warning per
+ * task id. The indexer never sets this context, so its behavior is
+ * unchanged.
+ */
+export const NO_FILE_CONTEXT = "<no-context>";
+
 export class StandardResourceCatalog implements ResourceCatalog {
   private _taskSchemas: Map<string, TaskSchema> = new Map();
   private _taskMetadata: Map<string, TaskMetadata> = new Map();
@@ -25,6 +37,7 @@ export class StandardResourceCatalog implements ResourceCatalog {
   private _queueMetadata: Map<string, QueueManifest> = new Map();
   private _skillMetadata: Map<string, SkillMetadata> = new Map();
   private _skillFileMetadata: Map<string, TaskFileMetadata> = new Map();
+  private _sentinelContextWarned: Set<string> = new Set();
 
   setCurrentFileContext(filePath: string, entryPoint: string) {
     this._currentFileContext = { filePath, entryPoint };
@@ -77,6 +90,20 @@ export class StandardResourceCatalog implements ResourceCatalog {
       return;
     }
 
+    // When the current context is the sentinel set by TaskExecutor around a
+    // run, the task() call fired during execution — most commonly via a
+    // dynamic import inside another task's run(). Warn once per task id so
+    // the pattern stays visible.
+    if (
+      this._currentFileContext.filePath === NO_FILE_CONTEXT &&
+      !this._sentinelContextWarned.has(task.id)
+    ) {
+      this._sentinelContextWarned.add(task.id);
+      console.warn(
+        `[trigger.dev] task "${task.id}" was registered via dynamic import during another task's run(); move to a static import if you notice any issues.`
+      );
+    }
+
     this._taskFileMetadata.set(task.id, {
       ...this._currentFileContext,
     });
diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts
index 6cb746762c0..e86e503de47 100644
--- a/packages/core/src/v3/schemas/api.ts
+++ b/packages/core/src/v3/schemas/api.ts
@@ -1995,14 +1995,21 @@ export type SendInputStreamResponseBody = z.infer<typeof SendInputStreamResponse
  * Response body for `GET /realtime/v1/sessions/:id/:io/records`. A non-SSE,
  * `wait=0` drain of a session channel — used at run boot for snapshot
  * replay where the SSE long-poll tax (~1s on empty streams) was the
- * dominant cost. The shape mirrors the webapp's internal `StreamRecord`
- * type (`apps/webapp/app/services/realtime/types.ts`); each record's
- * `data` is a JSON-encoded chunk body that callers parse client-side.
+ * dominant cost.
+ *
+ * `data` is the parsed chunk body (the SDK writer puts the chunk object
+ * directly into the S2 record envelope; the route unwraps the envelope
+ * and forwards the inner object as-is). Callers use it directly — no
+ * additional JSON.parse step. Schema is `z.unknown()` because chunk
+ * shape varies by `chunk.type` (the AI SDK's `UIMessageChunk`
+ * discriminated union plus Trigger control records); consumers
+ * already runtime-check on the discriminator and tolerate malformed
+ * records by skipping them.
  */
 export const ReadSessionStreamRecordsResponseBody = z.object({
   records: z.array(
     z.object({
-      data: z.string(),
+      data: z.unknown(),
       id: z.string(),
       seqNum: z.number(),
     })
diff --git a/packages/core/src/v3/workers/index.ts b/packages/core/src/v3/workers/index.ts
index 8ac06930328..14515cd0d25 100644
--- a/packages/core/src/v3/workers/index.ts
+++ b/packages/core/src/v3/workers/index.ts
@@ -10,7 +10,10 @@ export {
   recordSpanException,
   carrierFromContext,
 } from "../otel/index.js";
-export { StandardResourceCatalog } from "../resource-catalog/standardResourceCatalog.js";
+export {
+  StandardResourceCatalog,
+  NO_FILE_CONTEXT,
+} from "../resource-catalog/standardResourceCatalog.js";
 export {
   TaskContextSpanProcessor,
   TaskContextLogProcessor,
diff --git a/packages/core/test/fixtures/dynamic-task-module.mjs b/packages/core/test/fixtures/dynamic-task-module.mjs
new file mode 100644
index 00000000000..5d2f6719593
--- /dev/null
+++ b/packages/core/test/fixtures/dynamic-task-module.mjs
@@ -0,0 +1,19 @@
+// Fixture mimicking a task entrypoint file: top-level code calls into the
+// catalog (the same way `task()` / `schemaTask()` does via
+// `registerTaskMetadata`).
+//
+// Loaded via `await import()` from inside a test that simulates the worker
+// running a task. The point is to exercise top-level evaluation through Node's
+// ESM module loader so the module-cache semantics are real.
+
+const register = globalThis.__catalogRegisterTaskMetadata;
+if (typeof register === "function") {
+  register({
+    id: "lazy-task",
+    fns: {
+      run: async () => "ok",
+    },
+  });
+}
+
+export const lazyTask = { id: "lazy-task" };
diff --git a/packages/core/test/resourceCatalog.test.ts b/packages/core/test/resourceCatalog.test.ts
new file mode 100644
index 00000000000..fa7270f669c
--- /dev/null
+++ b/packages/core/test/resourceCatalog.test.ts
@@ -0,0 +1,154 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import {
+  NO_FILE_CONTEXT,
+  StandardResourceCatalog,
+} from "../src/v3/resource-catalog/standardResourceCatalog.js";
+
+// Regression tests for COULD_NOT_FIND_EXECUTOR on warm worker processes when
+// a task's `task()` / `schemaTask()` call is evaluated during another task's
+// execution (e.g. as a side effect of `await import(...)` of a module that
+// contains a task definition).
+//
+// Production throw site:
+//   - managed-run-worker.ts:566 (post-wrap)
+//   - dev-run-worker.ts:578 (post-wrap)
+// Pre-fix symptom: `resourceCatalog.getTask(execution.task.id)` returned
+// undefined even after the worker re-imported the task entrypoint.
+//
+// Pre-fix mechanism: `registerTaskMetadata` silently returned when
+// `_currentFileContext` was unset. Any `task()` call firing during a
+// running task's run() / lifecycle hooks (directly, or transitively via a
+// dynamic import) hit the silent guard. Node's ESM module cache then
+// prevented recovery — the worker's setContext + re-import fallback didn't
+// re-evaluate the module body, so the `task()` call never fired again.
+//
+// Post-fix: the runtime workers wrap their `executor.execute(...)` call with
+// `setCurrentFileContext(NO_FILE_CONTEXT, NO_FILE_CONTEXT)` so any `task()`
+// call firing during execution registers normally with sentinel file
+// metadata. The catalog detects the sentinel and emits a one-time warning
+// per task id to keep the bundle-shape pattern visible. The indexer never
+// sets this sentinel context — its behavior is unchanged.
+
+describe("StandardResourceCatalog — runtime registration via sentinel context", () => {
+  afterEach(() => {
+    delete (globalThis as { __catalogRegisterTaskMetadata?: unknown })
+      .__catalogRegisterTaskMetadata;
+    vi.restoreAllMocks();
+  });
+
+  it("silently drops registration when no context is set (indexer's invariant)", () => {
+    const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+    const catalog = new StandardResourceCatalog();
+
+    catalog.registerTaskMetadata({
+      id: "no-context-task",
+      fns: { run: async () => "ok" },
+    });
+
+    expect(catalog.getTask("no-context-task")).toBeUndefined();
+    expect(warn).not.toHaveBeenCalled();
+  });
+
+  it(
+    "registers normally and warns once when the sentinel context is set " +
+      "(simulates the worker's executor wrap)",
+    () => {
+      const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+      const catalog = new StandardResourceCatalog();
+
+      catalog.setCurrentFileContext(NO_FILE_CONTEXT, NO_FILE_CONTEXT);
+      catalog.registerTaskMetadata({
+        id: "lazy-task",
+        fns: { run: async () => "ok" },
+      });
+      catalog.clearCurrentFileContext();
+
+      const registered = catalog.getTask("lazy-task");
+      expect(registered).toBeDefined();
+      expect(registered?.id).toBe("lazy-task");
+      expect(registered?.filePath).toBe(NO_FILE_CONTEXT);
+      expect(registered?.entryPoint).toBe(NO_FILE_CONTEXT);
+      expect(warn).toHaveBeenCalledTimes(1);
+      expect(warn.mock.calls[0]?.[0]).toContain("lazy-task");
+    }
+  );
+
+  it(
+    "warm-start path: a task whose top-level definition fires during a " +
+      "dynamic import inside the sentinel wrap remains findable; the " +
+      "worker's setContext + re-import fallback (managed-run-worker.ts:482) " +
+      "is not needed",
+    async () => {
+      vi.spyOn(console, "warn").mockImplementation(() => {});
+      const catalog = new StandardResourceCatalog();
+
+      (globalThis as { __catalogRegisterTaskMetadata?: unknown })
+        .__catalogRegisterTaskMetadata = (
+        task: Parameters<StandardResourceCatalog["registerTaskMetadata"]>[0]
+      ) => {
+        catalog.registerTaskMetadata(task);
+      };
+
+      // Simulate the worker wrap: setContext(NO_FILE_CONTEXT) → run user code
+      // (which does a dynamic import) → clearContext.
+      catalog.setCurrentFileContext(NO_FILE_CONTEXT, NO_FILE_CONTEXT);
+      await import("./fixtures/dynamic-task-module.mjs");
+      catalog.clearCurrentFileContext();
+
+      const registered = catalog.getTask("lazy-task");
+      expect(registered).toBeDefined();
+      expect(registered?.filePath).toBe(NO_FILE_CONTEXT);
+    }
+  );
+
+  it("warns at most once per task id under the sentinel context", () => {
+    const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+    const catalog = new StandardResourceCatalog();
+
+    catalog.setCurrentFileContext(NO_FILE_CONTEXT, NO_FILE_CONTEXT);
+
+    const register = (id: string) =>
+      catalog.registerTaskMetadata({
+        id,
+        fns: { run: async () => "ok" },
+      });
+
+    register("task-a");
+    register("task-a");
+    register("task-a");
+    expect(warn).toHaveBeenCalledTimes(1);
+
+    register("task-b");
+    expect(warn).toHaveBeenCalledTimes(2);
+
+    catalog.clearCurrentFileContext();
+  });
+
+  it(
+    "control: real file context registers without firing the sentinel warning",
+    async () => {
+      const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+      const catalog = new StandardResourceCatalog();
+
+      (globalThis as { __catalogRegisterTaskMetadata?: unknown })
+        .__catalogRegisterTaskMetadata = (
+        task: Parameters<StandardResourceCatalog["registerTaskMetadata"]>[0]
+      ) => {
+        catalog.registerTaskMetadata(task);
+      };
+
+      catalog.setCurrentFileContext(
+        "/app/dist/lazy-task.entry.mjs",
+        "src/tasks/lazy-task.ts"
+      );
+      await import("./fixtures/dynamic-task-module.mjs?control");
+      catalog.clearCurrentFileContext();
+
+      const task = catalog.getTask("lazy-task");
+      expect(task).toBeDefined();
+      expect(task?.filePath).toBe("/app/dist/lazy-task.entry.mjs");
+      expect(task?.entryPoint).toBe("src/tasks/lazy-task.ts");
+      expect(warn).not.toHaveBeenCalled();
+    }
+  );
+});
diff --git a/packages/plugins/CHANGELOG.md b/packages/plugins/CHANGELOG.md
index ed4841c95e1..7b8cb1ceb5d 100644
--- a/packages/plugins/CHANGELOG.md
+++ b/packages/plugins/CHANGELOG.md
@@ -1,5 +1,20 @@
 # @trigger.dev/plugins
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- The public interfaces for a plugin system. Initially consolidated authentication and authorization interfaces. ([#3499](https://github.com/triggerdotdev/trigger.dev/pull/3499))
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.0`
+
 ## 0.0.0-prerelease-20260506134321
 
 ### Patch Changes
diff --git a/packages/plugins/package.json b/packages/plugins/package.json
index 7b52afb7b87..7ae7348869a 100644
--- a/packages/plugins/package.json
+++ b/packages/plugins/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/plugins",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "Plugin contracts and interfaces for Trigger.dev",
   "license": "MIT",
   "publishConfig": {
diff --git a/packages/python/CHANGELOG.md b/packages/python/CHANGELOG.md
index 357e7dc1cd3..bdee0d00245 100644
--- a/packages/python/CHANGELOG.md
+++ b/packages/python/CHANGELOG.md
@@ -1,5 +1,23 @@
 # @trigger.dev/python
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+  - `@trigger.dev/build@4.5.0-rc.1`
+  - `@trigger.dev/sdk@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/sdk@4.5.0-rc.0`
+  - `@trigger.dev/core@4.5.0-rc.0`
+  - `@trigger.dev/build@4.5.0-rc.0`
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/python/package.json b/packages/python/package.json
index be93677702d..9e262b6bec9 100644
--- a/packages/python/package.json
+++ b/packages/python/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/python",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "Python runtime and build extension for Trigger.dev",
   "license": "MIT",
   "publishConfig": {
@@ -45,7 +45,7 @@
     "check-exports": "attw --pack ."
   },
   "dependencies": {
-    "@trigger.dev/core": "workspace:4.4.6",
+    "@trigger.dev/core": "workspace:4.5.0-rc.1",
     "tinyexec": "^0.3.2"
   },
   "devDependencies": {
@@ -56,12 +56,12 @@
     "tsx": "4.17.0",
     "esbuild": "^0.23.0",
     "@arethetypeswrong/cli": "^0.15.4",
-    "@trigger.dev/build": "workspace:4.4.6",
-    "@trigger.dev/sdk": "workspace:4.4.6"
+    "@trigger.dev/build": "workspace:4.5.0-rc.1",
+    "@trigger.dev/sdk": "workspace:4.5.0-rc.1"
   },
   "peerDependencies": {
-    "@trigger.dev/sdk": "workspace:^4.4.6",
-    "@trigger.dev/build": "workspace:^4.4.6"
+    "@trigger.dev/sdk": "workspace:^4.5.0-rc.1",
+    "@trigger.dev/build": "workspace:^4.5.0-rc.1"
   },
   "engines": {
     "node": ">=18.20.0"
diff --git a/packages/react-hooks/CHANGELOG.md b/packages/react-hooks/CHANGELOG.md
index fcbc0bb7be6..6f8b063552f 100644
--- a/packages/react-hooks/CHANGELOG.md
+++ b/packages/react-hooks/CHANGELOG.md
@@ -1,5 +1,19 @@
 # @trigger.dev/react-hooks
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.0`
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/react-hooks/package.json b/packages/react-hooks/package.json
index 96a4a90ed8a..6a32e4521d0 100644
--- a/packages/react-hooks/package.json
+++ b/packages/react-hooks/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/react-hooks",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "trigger.dev react hooks",
   "license": "MIT",
   "publishConfig": {
@@ -37,7 +37,7 @@
     "check-exports": "attw --pack ."
   },
   "dependencies": {
-    "@trigger.dev/core": "workspace:^4.4.6",
+    "@trigger.dev/core": "workspace:^4.5.0-rc.1",
     "swr": "^2.2.5"
   },
   "devDependencies": {
diff --git a/packages/redis-worker/CHANGELOG.md b/packages/redis-worker/CHANGELOG.md
index 5bad65ed478..cb6957b5ea2 100644
--- a/packages/redis-worker/CHANGELOG.md
+++ b/packages/redis-worker/CHANGELOG.md
@@ -1,5 +1,25 @@
 # @trigger.dev/redis-worker
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- Add MollifierBuffer and MollifierDrainer primitives for trigger burst smoothing. ([#3614](https://github.com/triggerdotdev/trigger.dev/pull/3614))
+
+  MollifierBuffer (`accept`, `pop`, `ack`, `requeue`, `fail`, `evaluateTrip`) is a per-env FIFO over Redis with atomic Lua transitions for status tracking. `evaluateTrip` is a sliding-window trip evaluator the webapp gate uses to detect per-env trigger bursts.
+
+  MollifierDrainer pops entries through a polling loop with a user-supplied handler. The loop survives transient Redis errors via capped exponential backoff (up to 5s), and per-env pop failures don't poison the rest of the batch — one env's blip is logged and counted as failed for that tick. Rotation is two-level: orgs at the top, envs within each org. The buffer maintains `mollifier:orgs` and `mollifier:org-envs:${orgId}` atomically with per-env queues, so the drainer walks orgs → envs directly without an in-memory cache. The `maxOrgsPerTick` option (default 500) caps how many orgs are scheduled per tick; for each picked org, one env is popped (rotating round-robin within the org). An org with N envs gets the same per-tick scheduling slot as an org with 1 env, so tenant-level drainage throughput is determined by org count rather than env count.
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.0`
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/redis-worker/package.json b/packages/redis-worker/package.json
index df8bd8a6b40..318b25a43da 100644
--- a/packages/redis-worker/package.json
+++ b/packages/redis-worker/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/redis-worker",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "Redis worker for trigger.dev",
   "license": "MIT",
   "publishConfig": {
@@ -23,7 +23,7 @@
     "test": "vitest --sequence.concurrent=false --no-file-parallelism"
   },
   "dependencies": {
-    "@trigger.dev/core": "workspace:4.4.6",
+    "@trigger.dev/core": "workspace:4.5.0-rc.1",
     "lodash.omit": "^4.5.0",
     "nanoid": "^5.0.7",
     "p-limit": "^6.2.0",
diff --git a/packages/redis-worker/src/index.ts b/packages/redis-worker/src/index.ts
index 1c5147ea48d..e5e3db32f12 100644
--- a/packages/redis-worker/src/index.ts
+++ b/packages/redis-worker/src/index.ts
@@ -4,3 +4,4 @@ export * from "./utils.js";
 
 // Fair Queue System
 export * from "./fair-queue/index.js";
+export * from "./mollifier/index.js";
diff --git a/packages/redis-worker/src/mollifier/buffer.test.ts b/packages/redis-worker/src/mollifier/buffer.test.ts
new file mode 100644
index 00000000000..c8f7b95c97a
--- /dev/null
+++ b/packages/redis-worker/src/mollifier/buffer.test.ts
@@ -0,0 +1,1027 @@
+import { describe, expect, it } from "vitest";
+import { BufferEntrySchema, serialiseSnapshot, deserialiseSnapshot } from "./schemas.js";
+import { redisTest } from "@internal/testcontainers";
+import { Logger } from "@trigger.dev/core/logger";
+import { MollifierBuffer } from "./buffer.js";
+
+describe("schemas", () => {
+  it("serialiseSnapshot then deserialiseSnapshot is identity for plain objects", () => {
+    const snapshot = { taskId: "my-task", payload: { foo: 42, bar: "baz" } };
+    const round = deserialiseSnapshot(serialiseSnapshot(snapshot));
+    expect(round).toEqual(snapshot);
+  });
+
+  it("BufferEntrySchema parses a complete entry", () => {
+    const raw = {
+      runId: "run_abc",
+      envId: "env_1",
+      orgId: "org_1",
+      payload: serialiseSnapshot({ taskId: "t" }),
+      status: "QUEUED",
+      attempts: "0",
+      createdAt: "2026-05-11T10:00:00.000Z",
+    };
+    const parsed = BufferEntrySchema.parse(raw);
+    expect(parsed.runId).toBe("run_abc");
+    expect(parsed.status).toBe("QUEUED");
+    expect(parsed.attempts).toBe(0);
+    expect(parsed.createdAt).toBeInstanceOf(Date);
+  });
+
+  it("BufferEntrySchema parses a FAILED entry with lastError", () => {
+    const raw = {
+      runId: "run_abc",
+      envId: "env_1",
+      orgId: "org_1",
+      payload: serialiseSnapshot({}),
+      status: "FAILED",
+      attempts: "3",
+      createdAt: "2026-05-11T10:00:00.000Z",
+      lastError: JSON.stringify({ code: "P2024", message: "connection lost" }),
+    };
+    const parsed = BufferEntrySchema.parse(raw);
+    expect(parsed.lastError).toEqual({ code: "P2024", message: "connection lost" });
+  });
+});
+
+describe("MollifierBuffer construction", () => {
+  redisTest("constructs and closes cleanly", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    await buffer.close();
+  });
+});
+
+describe("MollifierBuffer.accept", () => {
+  redisTest("accept writes entry, enqueues, and tracks env", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      await buffer.accept({
+        runId: "run_1",
+        envId: "env_a",
+        orgId: "org_1",
+        payload: serialiseSnapshot({ taskId: "t" }),
+      });
+
+      const entry = await buffer.getEntry("run_1");
+      expect(entry).not.toBeNull();
+      expect(entry!.runId).toBe("run_1");
+      expect(entry!.envId).toBe("env_a");
+      expect(entry!.orgId).toBe("org_1");
+      expect(entry!.status).toBe("QUEUED");
+      expect(entry!.attempts).toBe(0);
+      expect(entry!.createdAt).toBeInstanceOf(Date);
+
+      const envs = await buffer.listEnvsForOrg("org_1");
+      expect(envs).toContain("env_a");
+    } finally {
+      await buffer.close();
+    }
+  });
+});
+
+describe("MollifierBuffer.pop", () => {
+  redisTest("pop returns next QUEUED entry and transitions to DRAINING", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "run_1", envId: "env_a", orgId: "org_1", payload: "{}" });
+      await buffer.accept({ runId: "run_2", envId: "env_a", orgId: "org_1", payload: "{}" });
+
+      const popped = await buffer.pop("env_a");
+      expect(popped).not.toBeNull();
+      expect(popped!.runId).toBe("run_1");
+      expect(popped!.status).toBe("DRAINING");
+
+      const stored = await buffer.getEntry("run_1");
+      expect(stored!.status).toBe("DRAINING");
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("pop returns null when env queue is empty", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      const popped = await buffer.pop("env_nonexistent");
+      expect(popped).toBeNull();
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("atomic RPOP across two parallel pops on the same env", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "only", envId: "env_a", orgId: "org_1", payload: "{}" });
+
+      const [a, b] = await Promise.all([buffer.pop("env_a"), buffer.pop("env_a")]);
+      const winners = [a, b].filter((x) => x !== null);
+      expect(winners).toHaveLength(1);
+      expect(winners[0]!.runId).toBe("only");
+    } finally {
+      await buffer.close();
+    }
+  });
+});
+
+describe("MollifierBuffer.ack", () => {
+  redisTest("ack deletes the entry", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "run_x", envId: "env_a", orgId: "org_1", payload: "{}" });
+      await buffer.pop("env_a");
+      await buffer.ack("run_x");
+
+      const after = await buffer.getEntry("run_x");
+      expect(after).toBeNull();
+    } finally {
+      await buffer.close();
+    }
+  });
+});
+
+describe("MollifierBuffer.pop orphan handling", () => {
+  redisTest(
+    "pop skips orphan queue references (runId in queue but entry hash expired)",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        // Simulate a TTL-expired orphan: queue ref exists, entry hash does not.
+        await buffer["redis"].lpush("mollifier:queue:env_a", "run_orphan");
+
+        const popped = await buffer.pop("env_a");
+        expect(popped).toBeNull();
+
+        // Critical: no partial hash was created for the orphan.
+        const raw = await buffer["redis"].hgetall("mollifier:entries:run_orphan");
+        expect(Object.keys(raw)).toHaveLength(0);
+
+        // Queue is drained — the loop pops orphans until empty.
+        const qLen = await buffer["redis"].llen("mollifier:queue:env_a");
+        expect(qLen).toBe(0);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "pop skips orphans then returns the first valid entry behind them",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        // Layout (oldest-first, since RPOP takes from tail): orphan, valid, orphan.
+        // LPUSH puts items at the head, so to get RPOP order [orphan_a, valid, orphan_b]
+        // we LPUSH in reverse: orphan_b first, then valid, then orphan_a.
+        await buffer["redis"].lpush("mollifier:queue:env_a", "orphan_b");
+        await buffer.accept({ runId: "valid", envId: "env_a", orgId: "org_1", payload: "{}" });
+        await buffer["redis"].lpush("mollifier:queue:env_a", "orphan_a");
+
+        const popped = await buffer.pop("env_a");
+        expect(popped).not.toBeNull();
+        expect(popped!.runId).toBe("valid");
+        expect(popped!.status).toBe("DRAINING");
+
+        // The trailing orphan_b is still in the queue (single pop call).
+        const remaining = await buffer["redis"].llen("mollifier:queue:env_a");
+        expect(remaining).toBe(1);
+
+        // A second pop drains the trailing orphan_b. The queue is now
+        // empty. NOTE: the pop's no-runId branch can't read orgId from
+        // a popped entry (it never got one), so it doesn't prune the
+        // org-envs SET. env_a remains in `mollifier:org-envs:org_1` as
+        // a stale entry until the next accept-or-success-pop cycle
+        // recovers it. This is the deliberate trade-off documented in
+        // popAndMarkDraining's Lua.
+        const second = await buffer.pop("env_a");
+        expect(second).toBeNull();
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+describe("MollifierBuffer.requeue", () => {
+  redisTest("requeue increments attempts, restores QUEUED, re-LPUSHes", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "run_r", envId: "env_a", orgId: "org_1", payload: "{}" });
+      await buffer.pop("env_a");
+      await buffer.requeue("run_r");
+
+      const entry = await buffer.getEntry("run_r");
+      expect(entry!.status).toBe("QUEUED");
+      expect(entry!.attempts).toBe(1);
+
+      const popped = await buffer.pop("env_a");
+      expect(popped!.runId).toBe("run_r");
+    } finally {
+      await buffer.close();
+    }
+  });
+});
+
+describe("MollifierBuffer.fail", () => {
+  redisTest("fail transitions to FAILED and stores lastError", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "run_f", envId: "env_a", orgId: "org_1", payload: "{}" });
+      await buffer.pop("env_a");
+      const failed = await buffer.fail("run_f", { code: "VALIDATION", message: "boom" });
+      expect(failed).toBe(true);
+
+      const entry = await buffer.getEntry("run_f");
+      expect(entry!.status).toBe("FAILED");
+      expect(entry!.lastError).toEqual({ code: "VALIDATION", message: "boom" });
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest(
+    "fail on missing entry is a no-op (returns false; no partial hash created)",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        const result = await buffer.fail("run_ghost", { code: "VALIDATION", message: "boom" });
+        expect(result).toBe(false);
+
+        // Critical: no partial entry hash was created.
+        const stored = await buffer.getEntry("run_ghost");
+        expect(stored).toBeNull();
+        const raw = await buffer["redis"].hgetall("mollifier:entries:run_ghost");
+        expect(Object.keys(raw)).toHaveLength(0);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+describe("MollifierBuffer TTL", () => {
+  redisTest("entry has TTL applied on accept", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "run_t", envId: "env_a", orgId: "org_1", payload: "{}" });
+
+      const ttl = await buffer.getEntryTtlSeconds("run_t");
+      expect(ttl).toBeGreaterThan(0);
+      expect(ttl).toBeLessThanOrEqual(600);
+    } finally {
+      await buffer.close();
+    }
+  });
+});
+
+describe("MollifierBuffer payload encoding", () => {
+  redisTest(
+    "pop round-trips payloads with quotes, backslashes, control chars, unicode",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      const tricky = {
+        quotes: 'a"b\'c',
+        backslash: "x\\y\\z",
+        newlines: "line1\nline2\r\nline3",
+        tab: "col1\tcol2",
+        unicode: "héllo 🦀 世界",
+        lineSep: "before after end",
+        nested: { arr: ["a", "b", 1, true, null], n: 3.14 },
+      };
+      const payload = serialiseSnapshot(tricky);
+
+      try {
+        await buffer.accept({ runId: "tricky", envId: "env_a", orgId: "org_1", payload });
+
+        const popped = await buffer.pop("env_a");
+        expect(popped).not.toBeNull();
+        expect(popped!.payload).toBe(payload);
+
+        const decoded = JSON.parse(popped!.payload);
+        expect(decoded).toEqual(tricky);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+describe("MollifierBuffer.requeue on missing entry", () => {
+  redisTest(
+    "requeue on a non-existent runId is a no-op (Lua returns 0; no queue push)",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        await buffer.requeue("run_does_not_exist");
+
+        // Critical: no queue keys were created from this no-op requeue.
+        const queueKeys = await buffer["redis"].keys("mollifier:queue:*");
+        expect(queueKeys).toHaveLength(0);
+        const envs = await buffer.listEnvsForOrg("org_1");
+        expect(envs).toHaveLength(0);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+describe("MollifierBuffer.requeue ordering", () => {
+  redisTest(
+    "requeued entry is popped AFTER other queued entries on the same env (FIFO retry)",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        await buffer.accept({ runId: "a", envId: "env_a", orgId: "org_1", payload: "{}" });
+        await buffer.accept({ runId: "b", envId: "env_a", orgId: "org_1", payload: "{}" });
+        await buffer.accept({ runId: "c", envId: "env_a", orgId: "org_1", payload: "{}" });
+
+        const first = await buffer.pop("env_a");
+        expect(first!.runId).toBe("a");
+
+        await buffer.requeue("a");
+
+        const next = await buffer.pop("env_a");
+        expect(next!.runId).toBe("b");
+        const after = await buffer.pop("env_a");
+        expect(after!.runId).toBe("c");
+        const last = await buffer.pop("env_a");
+        expect(last!.runId).toBe("a");
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+describe("MollifierBuffer.evaluateTrip", () => {
+  const tripOptions = {
+    windowMs: 200,
+    threshold: 5,
+    holdMs: 100,
+  };
+
+  redisTest("under threshold: not tripped, count increments", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      const r1 = await buffer.evaluateTrip("env_a", tripOptions);
+      expect(r1).toEqual({ tripped: false, count: 1 });
+
+      const r2 = await buffer.evaluateTrip("env_a", tripOptions);
+      expect(r2).toEqual({ tripped: false, count: 2 });
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("crossing threshold sets the tripped marker", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      for (let i = 0; i < 5; i++) {
+        const r = await buffer.evaluateTrip("env_a", tripOptions);
+        expect(r.tripped).toBe(false);
+      }
+
+      const after = await buffer.evaluateTrip("env_a", tripOptions);
+      expect(after).toEqual({ tripped: true, count: 6 });
+
+      const sticky = await buffer.evaluateTrip("env_a", tripOptions);
+      expect(sticky.tripped).toBe(true);
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("hold-down marker expires after holdMs and env resets", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      const fastWindow = { windowMs: 100, threshold: 2, holdMs: 100 };
+      await buffer.evaluateTrip("env_a", fastWindow);
+      await buffer.evaluateTrip("env_a", fastWindow);
+      const tripped = await buffer.evaluateTrip("env_a", fastWindow);
+      expect(tripped.tripped).toBe(true);
+
+      // Wait past windowMs AND holdMs so both rate counter and tripped marker expire
+      await new Promise((r) => setTimeout(r, 220));
+
+      const recovered = await buffer.evaluateTrip("env_a", fastWindow);
+      expect(recovered).toEqual({ tripped: false, count: 1 });
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("env isolation: tripping env_a does not affect env_b", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      for (let i = 0; i < 6; i++) {
+        await buffer.evaluateTrip("env_a", tripOptions);
+      }
+      const aTripped = await buffer.evaluateTrip("env_a", tripOptions);
+      expect(aTripped.tripped).toBe(true);
+
+      const b = await buffer.evaluateTrip("env_b", tripOptions);
+      expect(b).toEqual({ tripped: false, count: 1 });
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("window expires and counter resets when no traffic", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      entryTtlSeconds: 600,
+      logger: new Logger("test", "log"),
+    });
+
+    try {
+      const fastWindow = { windowMs: 100, threshold: 100, holdMs: 100 };
+      await buffer.evaluateTrip("env_x", fastWindow);
+      await buffer.evaluateTrip("env_x", fastWindow);
+      // both incremented within a fresh window — count should be 2
+
+      await new Promise((r) => setTimeout(r, 150));
+      const fresh = await buffer.evaluateTrip("env_x", fastWindow);
+      expect(fresh.count).toBe(1);
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest(
+    "tripped marker outlives the rate counter window",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        const opts = { windowMs: 50, threshold: 2, holdMs: 1000 };
+        await buffer.evaluateTrip("env_a", opts);
+        await buffer.evaluateTrip("env_a", opts);
+        const tripped = await buffer.evaluateTrip("env_a", opts);
+        expect(tripped.tripped).toBe(true);
+
+        // Wait past windowMs (rate counter expires) but well inside holdMs (marker persists).
+        await new Promise((r) => setTimeout(r, 120));
+
+        const after = await buffer.evaluateTrip("env_a", opts);
+        expect(after.tripped).toBe(true);
+        expect(after.count).toBeLessThanOrEqual(2);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "INCR is atomic under 100 concurrent calls (no lost increments)",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        // Wide window so all 100 calls land in the same window. High threshold
+        // so trip semantics don't interfere with the count assertion.
+        const opts = { windowMs: 5000, threshold: 1_000_000, holdMs: 100 };
+        const results = await Promise.all(
+          Array.from({ length: 100 }, () => buffer.evaluateTrip("env_atomic", opts)),
+        );
+
+        // Every return value is unique (no two callers saw the same INCR result).
+        const counts = results.map((r) => r.count).sort((a, b) => a - b);
+        expect(counts).toEqual(Array.from({ length: 100 }, (_, i) => i + 1));
+
+        // No call tripped (we set threshold absurdly high).
+        expect(results.every((r) => !r.tripped)).toBe(true);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+describe("MollifierBuffer entry lifecycle invariants", () => {
+  redisTest(
+    "entry TTL is preserved across pop (DRAINING entries don't lose their TTL)",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        await buffer.accept({ runId: "run_ttl", envId: "env_a", orgId: "org_1", payload: "{}" });
+        const beforeTtl = await buffer.getEntryTtlSeconds("run_ttl");
+        expect(beforeTtl).toBeGreaterThan(0);
+
+        await buffer.pop("env_a");
+        const afterTtl = await buffer.getEntryTtlSeconds("run_ttl");
+
+        // TTL must still be present (>0). Redis returns -1 if the key has no
+        // TTL — that's the leak shape we're guarding against.
+        expect(afterTtl).toBeGreaterThan(0);
+        expect(afterTtl).toBeLessThanOrEqual(beforeTtl);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "envs set membership tracks queue+DRAINING presence across the full lifecycle",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        // Empty start
+        expect(await buffer.listEnvsForOrg("org_1")).not.toContain("env_lc");
+
+        // accept → SADD
+        await buffer.accept({ runId: "r1", envId: "env_lc", orgId: "org_1", payload: "{}" });
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_lc");
+
+        // second accept (different runId) → still SADD (idempotent)
+        await buffer.accept({ runId: "r2", envId: "env_lc", orgId: "org_1", payload: "{}" });
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_lc");
+
+        // pop r1 → queue still has r2 → env stays
+        await buffer.pop("env_lc");
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_lc");
+
+        // ack r1 → no queue change, env still tracked (r2 still queued)
+        await buffer.ack("r1");
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_lc");
+
+        // pop r2 → queue empties → SREM
+        await buffer.pop("env_lc");
+        expect(await buffer.listEnvsForOrg("org_1")).not.toContain("env_lc");
+
+        // requeue r2 → SADD back
+        await buffer.requeue("r2");
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_lc");
+
+        // fail r2 → entry FAILED but queue empty → next pop should SREM
+        await buffer.pop("env_lc");
+        await buffer.fail("r2", { code: "X", message: "boom" });
+        const afterFailEnvs = await buffer.listEnvsForOrg("org_1");
+        // Queue is empty, env was SREM'd by the pop above.
+        expect(afterFailEnvs).not.toContain("env_lc");
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+describe("MollifierBuffer.accept idempotency", () => {
+  redisTest(
+    "duplicate runId is refused; queue not double-LPUSHed; existing entry not overwritten",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        const first = await buffer.accept({
+          runId: "run_dup",
+          envId: "env_a",
+          orgId: "org_1",
+          payload: serialiseSnapshot({ first: true }),
+        });
+        const second = await buffer.accept({
+          runId: "run_dup",
+          envId: "env_a",
+          orgId: "org_1",
+          payload: serialiseSnapshot({ first: false }),
+        });
+
+        expect(first).toBe(true);
+        expect(second).toBe(false);
+
+        // First payload preserved; second was a no-op.
+        const stored = await buffer.getEntry("run_dup");
+        expect(stored).not.toBeNull();
+        const decoded = JSON.parse(stored!.payload);
+        expect(decoded).toEqual({ first: true });
+
+        // Exactly one queue entry, not two.
+        const popped1 = await buffer.pop("env_a");
+        expect(popped1).not.toBeNull();
+        expect(popped1!.runId).toBe("run_dup");
+        const popped2 = await buffer.pop("env_a");
+        expect(popped2).toBeNull();
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "accept refused while existing entry is DRAINING",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        await buffer.accept({ runId: "run_dr", envId: "env_a", orgId: "org_1", payload: "{}" });
+        await buffer.pop("env_a"); // now DRAINING
+        const stored = await buffer.getEntry("run_dr");
+        expect(stored!.status).toBe("DRAINING");
+
+        const dup = await buffer.accept({ runId: "run_dr", envId: "env_a", orgId: "org_1", payload: "{}" });
+        expect(dup).toBe(false);
+
+        const afterDup = await buffer.getEntry("run_dr");
+        expect(afterDup!.status).toBe("DRAINING"); // unchanged
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "accept refused while existing entry is FAILED",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        await buffer.accept({ runId: "run_fl", envId: "env_a", orgId: "org_1", payload: "{}" });
+        await buffer.pop("env_a");
+        await buffer.fail("run_fl", { code: "VALIDATION", message: "boom" });
+        const stored = await buffer.getEntry("run_fl");
+        expect(stored!.status).toBe("FAILED");
+
+        const dup = await buffer.accept({ runId: "run_fl", envId: "env_a", orgId: "org_1", payload: "{}" });
+        expect(dup).toBe(false);
+
+        const afterDup = await buffer.getEntry("run_fl");
+        expect(afterDup!.status).toBe("FAILED"); // unchanged
+        expect(afterDup!.lastError).toEqual({ code: "VALIDATION", message: "boom" });
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "re-accept after ack works (terminal entry can be re-accepted)",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        const first = await buffer.accept({
+          runId: "run_x",
+          envId: "env_a",
+          orgId: "org_1",
+          payload: "{}",
+        });
+        await buffer.pop("env_a");
+        await buffer.ack("run_x");
+
+        // Entry is gone — re-accept should succeed.
+        const reAccept = await buffer.accept({
+          runId: "run_x",
+          envId: "env_a",
+          orgId: "org_1",
+          payload: "{}",
+        });
+
+        expect(first).toBe(true);
+        expect(reAccept).toBe(true);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+describe("MollifierBuffer envs set lifecycle", () => {
+  redisTest(
+    "pop SREMs envId when it drains the queue",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        await buffer.accept({ runId: "r1", envId: "env_a", orgId: "org_1", payload: "{}" });
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_a");
+
+        await buffer.pop("env_a");
+        expect(await buffer.listEnvsForOrg("org_1")).not.toContain("env_a");
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "pop keeps envId in set while items remain; SREMs only on the draining pop",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        await buffer.accept({ runId: "r1", envId: "env_a", orgId: "org_1", payload: "{}" });
+        await buffer.accept({ runId: "r2", envId: "env_a", orgId: "org_1", payload: "{}" });
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_a");
+
+        await buffer.pop("env_a");
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_a");
+
+        await buffer.pop("env_a");
+        expect(await buffer.listEnvsForOrg("org_1")).not.toContain("env_a");
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+
+  redisTest(
+    "requeue re-SADDs the envId if pop had previously cleaned it",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        entryTtlSeconds: 600,
+        logger: new Logger("test", "log"),
+      });
+
+      try {
+        await buffer.accept({ runId: "r1", envId: "env_a", orgId: "org_1", payload: "{}" });
+        await buffer.pop("env_a");
+        // Queue drained → env_a SREM'd.
+        expect(await buffer.listEnvsForOrg("org_1")).not.toContain("env_a");
+
+        await buffer.requeue("r1");
+        // requeue must put env_a back so the drainer notices the retry.
+        expect(await buffer.listEnvsForOrg("org_1")).toContain("env_a");
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
diff --git a/packages/redis-worker/src/mollifier/buffer.ts b/packages/redis-worker/src/mollifier/buffer.ts
new file mode 100644
index 00000000000..f739e3ff362
--- /dev/null
+++ b/packages/redis-worker/src/mollifier/buffer.ts
@@ -0,0 +1,399 @@
+import {
+  createRedisClient,
+  type Callback,
+  type Redis,
+  type RedisOptions,
+  type Result,
+} from "@internal/redis";
+import { Logger } from "@trigger.dev/core/logger";
+import { BufferEntry, BufferEntrySchema } from "./schemas.js";
+
+export type MollifierBufferOptions = {
+  redisOptions: RedisOptions;
+  entryTtlSeconds: number;
+  logger?: Logger;
+};
+
+export class MollifierBuffer {
+  private readonly redis: Redis;
+  private readonly entryTtlSeconds: number;
+  private readonly logger: Logger;
+
+  constructor(options: MollifierBufferOptions) {
+    this.entryTtlSeconds = options.entryTtlSeconds;
+    this.logger = options.logger ?? new Logger("MollifierBuffer", "debug");
+
+    this.redis = createRedisClient(
+      {
+        ...options.redisOptions,
+        retryStrategy(times) {
+          const delay = Math.min(times * 50, 1000);
+          return delay;
+        },
+        maxRetriesPerRequest: 20,
+      },
+      {
+        onError: (error) => {
+          this.logger.error("MollifierBuffer redis client error:", { error });
+        },
+      },
+    );
+    this.#registerCommands();
+  }
+
+  // Returns true if the entry was newly written; false if a duplicate runId
+  // was already buffered (idempotent no-op). Callers can use the boolean to
+  // record a duplicate-accept metric without affecting buffer state.
+  async accept(input: {
+    runId: string;
+    envId: string;
+    orgId: string;
+    payload: string;
+  }): Promise<boolean> {
+    const entryKey = `mollifier:entries:${input.runId}`;
+    const queueKey = `mollifier:queue:${input.envId}`;
+    const orgsKey = "mollifier:orgs";
+    const createdAt = new Date().toISOString();
+    const result = await this.redis.acceptMollifierEntry(
+      entryKey,
+      queueKey,
+      orgsKey,
+      input.runId,
+      input.envId,
+      input.orgId,
+      input.payload,
+      createdAt,
+      String(this.entryTtlSeconds),
+      "mollifier:org-envs:",
+    );
+    return result === 1;
+  }
+
+  async pop(envId: string): Promise<BufferEntry | null> {
+    const queueKey = `mollifier:queue:${envId}`;
+    const orgsKey = "mollifier:orgs";
+    const entryPrefix = "mollifier:entries:";
+    const encoded = (await this.redis.popAndMarkDraining(
+      queueKey,
+      orgsKey,
+      entryPrefix,
+      envId,
+      "mollifier:org-envs:",
+    )) as string | null;
+    if (!encoded) return null;
+
+    let raw: unknown;
+    try {
+      raw = JSON.parse(encoded);
+    } catch {
+      this.logger.error("MollifierBuffer.pop: failed to parse script result", { envId });
+      return null;
+    }
+
+    const parsed = BufferEntrySchema.safeParse(raw);
+    if (!parsed.success) {
+      this.logger.error("MollifierBuffer.pop: invalid entry shape", {
+        envId,
+        errors: parsed.error.flatten(),
+      });
+      return null;
+    }
+    return parsed.data;
+  }
+
+  async getEntry(runId: string): Promise<BufferEntry | null> {
+    const raw = await this.redis.hgetall(`mollifier:entries:${runId}`);
+    if (!raw || Object.keys(raw).length === 0) return null;
+
+    const parsed = BufferEntrySchema.safeParse(raw);
+    if (!parsed.success) {
+      this.logger.error("MollifierBuffer.getEntry: invalid entry shape", {
+        runId,
+        errors: parsed.error.flatten(),
+      });
+      return null;
+    }
+    return parsed.data;
+  }
+
+  // Drainer walks these two methods to schedule pops with org-level
+  // fairness: one env per org per tick. The Lua scripts maintain both
+  // sets atomically with the per-env queues, so an org/env appears here
+  // exactly when at least one of its envs has a queued entry.
+  async listOrgs(): Promise<string[]> {
+    return this.redis.smembers("mollifier:orgs");
+  }
+
+  async listEnvsForOrg(orgId: string): Promise<string[]> {
+    return this.redis.smembers(`mollifier:org-envs:${orgId}`);
+  }
+
+  async ack(runId: string): Promise<void> {
+    await this.redis.del(`mollifier:entries:${runId}`);
+  }
+
+  async requeue(runId: string): Promise<void> {
+    await this.redis.requeueMollifierEntry(
+      `mollifier:entries:${runId}`,
+      "mollifier:orgs",
+      "mollifier:queue:",
+      runId,
+      "mollifier:org-envs:",
+    );
+  }
+
+  // Returns true if the entry transitioned to FAILED; false if the entry no
+  // longer exists (TTL expired between pop and fail). Caller can use the
+  // boolean to skip downstream FAILED handling for ghost entries.
+  async fail(runId: string, error: { code: string; message: string }): Promise<boolean> {
+    const result = await this.redis.failMollifierEntry(
+      `mollifier:entries:${runId}`,
+      JSON.stringify(error),
+    );
+    return result === 1;
+  }
+
+  async getEntryTtlSeconds(runId: string): Promise<number> {
+    return this.redis.ttl(`mollifier:entries:${runId}`);
+  }
+
+  async evaluateTrip(
+    envId: string,
+    options: { windowMs: number; threshold: number; holdMs: number },
+  ): Promise<{ tripped: boolean; count: number }> {
+    const rateKey = `mollifier:rate:${envId}`;
+    const trippedKey = `mollifier:tripped:${envId}`;
+    const result = (await this.redis.mollifierEvaluateTrip(
+      rateKey,
+      trippedKey,
+      String(options.windowMs),
+      String(options.threshold),
+      String(options.holdMs),
+    )) as [number, number];
+
+    return { count: result[0], tripped: result[1] === 1 };
+  }
+
+  async close(): Promise<void> {
+    await this.redis.quit();
+  }
+
+  #registerCommands() {
+    this.redis.defineCommand("acceptMollifierEntry", {
+      numberOfKeys: 3,
+      lua: `
+        local entryKey = KEYS[1]
+        local queueKey = KEYS[2]
+        local orgsKey = KEYS[3]
+        local runId = ARGV[1]
+        local envId = ARGV[2]
+        local orgId = ARGV[3]
+        local payload = ARGV[4]
+        local createdAt = ARGV[5]
+        local ttlSeconds = tonumber(ARGV[6])
+        local orgEnvsPrefix = ARGV[7]
+
+        -- Idempotent: refuse if an entry for this runId already exists in any
+        -- state. Caller-side dedup is also enforced via API idempotency keys,
+        -- but the buffer must not double-enqueue if a caller retries.
+        if redis.call('EXISTS', entryKey) == 1 then
+          return 0
+        end
+
+        redis.call('HSET', entryKey,
+          'runId', runId,
+          'envId', envId,
+          'orgId', orgId,
+          'payload', payload,
+          'status', 'QUEUED',
+          'attempts', '0',
+          'createdAt', createdAt)
+        redis.call('EXPIRE', entryKey, ttlSeconds)
+        redis.call('LPUSH', queueKey, runId)
+        -- Org-level membership: maintained atomically with the per-env
+        -- queue so the drainer can walk orgs → envs-for-org and
+        -- schedule one env per org per tick. SADDs are idempotent if the
+        -- org/env are already tracked.
+        redis.call('SADD', orgsKey, orgId)
+        redis.call('SADD', orgEnvsPrefix .. orgId, envId)
+        return 1
+      `,
+    });
+
+    this.redis.defineCommand("requeueMollifierEntry", {
+      numberOfKeys: 2,
+      lua: `
+        local entryKey = KEYS[1]
+        local orgsKey = KEYS[2]
+        local queuePrefix = ARGV[1]
+        local runId = ARGV[2]
+        local orgEnvsPrefix = ARGV[3]
+
+        local envId = redis.call('HGET', entryKey, 'envId')
+        local orgId = redis.call('HGET', entryKey, 'orgId')
+        if not envId then
+          return 0
+        end
+
+        local currentAttempts = redis.call('HGET', entryKey, 'attempts')
+        local nextAttempts = tonumber(currentAttempts or '0') + 1
+
+        redis.call('HSET', entryKey, 'status', 'QUEUED', 'attempts', tostring(nextAttempts))
+        redis.call('LPUSH', queuePrefix .. envId, runId)
+        -- Re-track the org/env: pop may have SREM'd them when the queue
+        -- last emptied. SADDs are idempotent if the values are still
+        -- present.
+        if orgId then
+          redis.call('SADD', orgsKey, orgId)
+          redis.call('SADD', orgEnvsPrefix .. orgId, envId)
+        end
+        return 1
+      `,
+    });
+
+    this.redis.defineCommand("popAndMarkDraining", {
+      numberOfKeys: 2,
+      lua: `
+        local queueKey = KEYS[1]
+        local orgsKey = KEYS[2]
+        local entryPrefix = ARGV[1]
+        local envId = ARGV[2]
+        local orgEnvsPrefix = ARGV[3]
+
+        -- Helper: prune org-level membership when an env's queue empties.
+        -- Called only from the success branch where we know orgId from the
+        -- popped entry. The no-runId branch below can't reach this because
+        -- it has no entry to read orgId from — accept any stale org-envs
+        -- entries that result (bounded by env count, recovered next accept).
+        local function pruneOrgMembership(orgId)
+          if not orgId then return end
+          local orgEnvsKey = orgEnvsPrefix .. orgId
+          redis.call('SREM', orgEnvsKey, envId)
+          if redis.call('SCARD', orgEnvsKey) == 0 then
+            redis.call('SREM', orgsKey, orgId)
+          end
+        end
+
+        -- Loop to skip orphan queue references — runIds whose entry hash has
+        -- expired (TTL hit). HSET on a missing key would CREATE a partial
+        -- hash without a TTL, leaking memory. The loop is bounded by queue
+        -- length; entire Lua script remains atomic.
+        while true do
+          local runId = redis.call('RPOP', queueKey)
+          if not runId then
+            -- Queue is empty AND we have no entry to read orgId from, so
+            -- skip org-level cleanup. Stale org-envs entries are bounded
+            -- by env count and recovered on the next accept.
+            return nil
+          end
+
+          local entryKey = entryPrefix .. runId
+          if redis.call('EXISTS', entryKey) == 1 then
+            redis.call('HSET', entryKey, 'status', 'DRAINING')
+            local raw = redis.call('HGETALL', entryKey)
+            local result = {}
+            for i = 1, #raw, 2 do
+              result[raw[i]] = raw[i + 1]
+            end
+            -- Prune org-level membership if this pop drained the queue.
+            -- Atomic with the RPOP above — a concurrent accept AFTER this
+            -- script will SADD both back along with its LPUSH.
+            if redis.call('LLEN', queueKey) == 0 then
+              pruneOrgMembership(result['orgId'])
+            end
+            return cjson.encode(result)
+          end
+          -- Orphan queue reference: entry TTL expired while runId was queued.
+          -- Discard the reference and loop to the next.
+        end
+      `,
+    });
+
+    this.redis.defineCommand("failMollifierEntry", {
+      numberOfKeys: 1,
+      lua: `
+        local entryKey = KEYS[1]
+        local errorPayload = ARGV[1]
+
+        -- Guard: never create a partial entry. If the hash expired between
+        -- pop and fail, the run is gone — nothing to mark FAILED.
+        if redis.call('EXISTS', entryKey) == 0 then
+          return 0
+        end
+
+        redis.call('HSET', entryKey, 'status', 'FAILED', 'lastError', errorPayload)
+        return 1
+      `,
+    });
+
+    this.redis.defineCommand("mollifierEvaluateTrip", {
+      numberOfKeys: 2,
+      lua: `
+        local rateKey = KEYS[1]
+        local trippedKey = KEYS[2]
+        local windowMs = tonumber(ARGV[1])
+        local threshold = tonumber(ARGV[2])
+        local holdMs = tonumber(ARGV[3])
+
+        local count = redis.call('INCR', rateKey)
+        if count == 1 then
+          redis.call('PEXPIRE', rateKey, windowMs)
+        end
+
+        if count > threshold then
+          redis.call('PSETEX', trippedKey, holdMs, '1')
+        end
+
+        local tripped = redis.call('EXISTS', trippedKey)
+        return {count, tripped}
+      `,
+    });
+  }
+}
+
+declare module "@internal/redis" {
+  interface RedisCommander<Context> {
+    acceptMollifierEntry(
+      entryKey: string,
+      queueKey: string,
+      orgsKey: string,
+      runId: string,
+      envId: string,
+      orgId: string,
+      payload: string,
+      createdAt: string,
+      ttlSeconds: string,
+      orgEnvsPrefix: string,
+      callback?: Callback<number>,
+    ): Result<number, Context>;
+    popAndMarkDraining(
+      queueKey: string,
+      orgsKey: string,
+      entryPrefix: string,
+      envId: string,
+      orgEnvsPrefix: string,
+      callback?: Callback<string | null>,
+    ): Result<string | null, Context>;
+    requeueMollifierEntry(
+      entryKey: string,
+      orgsKey: string,
+      queuePrefix: string,
+      runId: string,
+      orgEnvsPrefix: string,
+      callback?: Callback<number>,
+    ): Result<number, Context>;
+    failMollifierEntry(
+      entryKey: string,
+      errorPayload: string,
+      callback?: Callback<number>,
+    ): Result<number, Context>;
+    mollifierEvaluateTrip(
+      rateKey: string,
+      trippedKey: string,
+      windowMs: string,
+      threshold: string,
+      holdMs: string,
+      callback?: Callback<[number, number]>,
+    ): Result<[number, number], Context>;
+  }
+}
diff --git a/packages/redis-worker/src/mollifier/drainer.test.ts b/packages/redis-worker/src/mollifier/drainer.test.ts
new file mode 100644
index 00000000000..c8f68977f69
--- /dev/null
+++ b/packages/redis-worker/src/mollifier/drainer.test.ts
@@ -0,0 +1,1322 @@
+import { redisTest } from "@internal/testcontainers";
+import { describe, expect, it } from "vitest";
+import { Logger } from "@trigger.dev/core/logger";
+import { MollifierBuffer } from "./buffer.js";
+import { MollifierDrainer } from "./drainer.js";
+import { serialiseSnapshot } from "./schemas.js";
+
+const noopOptions = {
+  entryTtlSeconds: 600,
+  logger: new Logger("test", "log"),
+};
+
+// Module-scope stub helpers used by the unit tests below (no real Redis).
+type StubBuffer = Partial<MollifierBuffer> & { [K in keyof MollifierBuffer]?: any };
+
+function makeStubBuffer(overrides: StubBuffer): MollifierBuffer {
+  const base: StubBuffer = {
+    listOrgs: async () => [],
+    listEnvsForOrg: async () => [],
+    pop: async () => null,
+    ack: async () => {},
+    requeue: async () => {},
+    fail: async () => true,
+    getEntry: async () => null,
+    close: async () => {},
+  };
+  return { ...base, ...overrides } as unknown as MollifierBuffer;
+}
+
+// Convenience for tests that don't care about org grouping: treat each
+// env as its own org. `listOrgs` returns the env list verbatim;
+// `listEnvsForOrg(envId)` returns `[envId]`. Spread into makeStubBuffer
+// alongside the test's own `pop` override.
+function eachEnvAsOwnOrg(envs: string[]): Partial<StubBuffer> {
+  return {
+    listOrgs: async () => envs,
+    listEnvsForOrg: async (orgId: string) => (envs.includes(orgId) ? [orgId] : []),
+  };
+}
+
+describe("MollifierDrainer.runOnce", () => {
+  redisTest("drains one queued entry through the handler and acks", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      ...noopOptions,
+    });
+
+    const handlerCalls: Array<{ runId: string; envId: string; orgId: string; payload: unknown }> =
+      [];
+    const handler = async (input: {
+      runId: string;
+      envId: string;
+      orgId: string;
+      payload: unknown;
+    }) => {
+      handlerCalls.push(input);
+    };
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler,
+      concurrency: 5,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    try {
+      await buffer.accept({
+        runId: "run_1",
+        envId: "env_a",
+        orgId: "org_1",
+        payload: serialiseSnapshot({ foo: 1 }),
+      });
+
+      const result = await drainer.runOnce();
+      expect(result.drained).toBe(1);
+      expect(result.failed).toBe(0);
+      expect(handlerCalls).toHaveLength(1);
+      expect(handlerCalls[0]).toMatchObject({
+        runId: "run_1",
+        envId: "env_a",
+        orgId: "org_1",
+        payload: { foo: 1 },
+      });
+
+      const entry = await buffer.getEntry("run_1");
+      expect(entry).toBeNull();
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("runOnce with no entries does nothing", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      ...noopOptions,
+    });
+
+    let handlerCalls = 0;
+    const handler = async () => {
+      handlerCalls++;
+    };
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler,
+      concurrency: 5,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    try {
+      const result = await drainer.runOnce();
+      expect(result.drained).toBe(0);
+      expect(result.failed).toBe(0);
+      expect(handlerCalls).toBe(0);
+    } finally {
+      await buffer.close();
+    }
+  });
+});
+
+describe("MollifierDrainer error handling", () => {
+  redisTest("retryable error requeues and increments attempts", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      ...noopOptions,
+    });
+
+    let calls = 0;
+    const handler = async () => {
+      calls++;
+      throw new Error("transient");
+    };
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler,
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => true,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "run_r", envId: "env_a", orgId: "org_1", payload: "{}" });
+
+      await drainer.runOnce();
+      const after1 = await buffer.getEntry("run_r");
+      expect(after1!.status).toBe("QUEUED");
+      expect(after1!.attempts).toBe(1);
+
+      await drainer.runOnce();
+      const after2 = await buffer.getEntry("run_r");
+      expect(after2!.status).toBe("QUEUED");
+      expect(after2!.attempts).toBe(2);
+
+      await drainer.runOnce();
+      const after3 = await buffer.getEntry("run_r");
+      expect(after3!.status).toBe("FAILED");
+      expect(calls).toBe(3);
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("non-retryable error transitions directly to FAILED", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      ...noopOptions,
+    });
+
+    const handler = async () => {
+      throw new Error("validation failure");
+    };
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler,
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "run_nr", envId: "env_a", orgId: "org_1", payload: "{}" });
+
+      await drainer.runOnce();
+
+      const entry = await buffer.getEntry("run_nr");
+      expect(entry!.status).toBe("FAILED");
+      expect(entry!.lastError).toEqual({ code: "Error", message: "validation failure" });
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest(
+    "multi-org round-robin: drains one item per org per runOnce",
+    { timeout: 20_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        ...noopOptions,
+      });
+
+      const handled: string[] = [];
+      const handler = async (input: { runId: string }) => {
+        handled.push(input.runId);
+      };
+
+      const drainer = new MollifierDrainer({
+        buffer,
+        handler,
+        concurrency: 10,
+        maxAttempts: 3,
+        isRetryable: () => false,
+        logger: new Logger("test-drainer", "log"),
+      });
+
+      try {
+        // org_A has two envs (env_a, env_b) → drainer picks one per tick
+        // via the per-org env cursor. org_B has one env (env_c) → it's
+        // always picked when org_B is in the slice.
+        await buffer.accept({ runId: "a1", envId: "env_a", orgId: "org_A", payload: "{}" });
+        await buffer.accept({ runId: "b1", envId: "env_b", orgId: "org_A", payload: "{}" });
+        await buffer.accept({ runId: "c1", envId: "env_c", orgId: "org_B", payload: "{}" });
+
+        // Tick 1: 2 orgs in slice → 2 pops, one from org_A's rotating env
+        // pick and one from org_B's only env.
+        const r1 = await drainer.runOnce();
+        expect(r1.drained).toBe(2);
+        expect(handled).toContain("c1");
+        // Org_A contributed exactly one of {a1, b1}.
+        const orgADrainedTick1 = handled.filter((h) => h === "a1" || h === "b1");
+        expect(orgADrainedTick1).toHaveLength(1);
+
+        handled.length = 0;
+        // Tick 2: org_B's queue is empty (only had 1 entry, drained tick 1).
+        // listOrgs returns [org_A] only. Drain the remaining org_A env.
+        const r2 = await drainer.runOnce();
+        expect(r2.drained).toBe(1);
+        expect(handled).toHaveLength(1);
+        expect(["a1", "b1"]).toContain(handled[0]);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
+
+// Transient Redis errors used to permanently kill the loop because
+// `processOneFromEnv` didn't catch `buffer.pop()` rejections — the error
+// bubbled through `Promise.all` → `runOnce` → `loop`'s outer catch and
+// left `isRunning = false`. These tests use a stubbed buffer (no Redis
+// container) so we can deterministically inject failures from `listEnvs`
+// and `pop` without racing against a real client.
+describe("MollifierDrainer resilience to transient buffer errors", () => {
+  it("survives a transient listOrgs failure and resumes draining", async () => {
+    let listCalls = 0;
+    const popped: string[] = [];
+    const buffer = makeStubBuffer({
+      listOrgs: async () => {
+        listCalls += 1;
+        if (listCalls === 1) {
+          throw new Error("simulated redis blip");
+        }
+        return ["env_a"];
+      },
+      listEnvsForOrg: async (orgId: string) => (orgId === "env_a" ? ["env_a"] : []),
+      pop: async () => {
+        const runId = `run_${popped.length + 1}`;
+        if (popped.length >= 2) return null;
+        popped.push(runId);
+        return {
+          runId,
+          envId: "env_a",
+          orgId: "org_1",
+          payload: "{}",
+          attempts: 0,
+          createdAt: new Date(),
+        } as any;
+      },
+    });
+
+    const handled: string[] = [];
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async (input) => {
+        handled.push(input.runId);
+      },
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      pollIntervalMs: 20,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    drainer.start();
+    const deadline = Date.now() + 3_000;
+    while (handled.length < 2 && Date.now() < deadline) {
+      await new Promise((r) => setTimeout(r, 20));
+    }
+    await drainer.stop({ timeoutMs: 1_000 });
+
+    expect(handled).toEqual(["run_1", "run_2"]);
+    expect(listCalls).toBeGreaterThan(1);
+  });
+
+  it("a pop failure for one env doesn't poison the rest of the batch", async () => {
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(["bad", "good"]),
+      pop: async (envId: string) => {
+        if (envId === "bad") {
+          throw new Error("simulated pop failure on bad env");
+        }
+        return {
+          runId: "run_good",
+          envId: "good",
+          orgId: "org_1",
+          payload: "{}",
+          attempts: 0,
+          createdAt: new Date(),
+        } as any;
+      },
+    });
+
+    const handled: string[] = [];
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async (input) => {
+        handled.push(input.runId);
+      },
+      concurrency: 5,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    const result = await drainer.runOnce();
+    expect(result.drained).toBe(1);
+    expect(result.failed).toBe(1);
+    expect(handled).toEqual(["run_good"]);
+  });
+
+  it("a requeue failure during retry recovery doesn't poison the rest of the batch", async () => {
+    // Regression: handler throws a retryable error → processEntry calls
+    // buffer.requeue() inside its catch block. If requeue() itself throws
+    // (Redis blip during error recovery), the rejection used to escape
+    // processOneFromEnv unwrapped and reject the runOnce Promise.all,
+    // dropping handler results from sibling envs in the same tick.
+    const handled: string[] = [];
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(["bad", "good"]),
+      pop: async (envId: string) =>
+        ({
+          runId: envId === "bad" ? "run_bad" : "run_good",
+          envId,
+          orgId: "org_1",
+          payload: "{}",
+          attempts: 0,
+          createdAt: new Date(),
+        }) as any,
+      requeue: async () => {
+        throw new Error("simulated requeue failure");
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async (input) => {
+        handled.push(input.runId);
+        if (input.runId === "run_bad") throw new Error("transient");
+      },
+      concurrency: 5,
+      maxAttempts: 3,
+      isRetryable: () => true,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    const result = await drainer.runOnce();
+    // Two envs scheduled, one handler succeeded (drained), one handler threw
+    // and its recovery requeue threw too — counted as failed, batch not poisoned.
+    expect(result.drained).toBe(1);
+    expect(result.failed).toBe(1);
+    expect(new Set(handled)).toEqual(new Set(["run_bad", "run_good"]));
+  });
+
+  it("a fail() throw during terminal recovery doesn't poison the rest of the batch", async () => {
+    // Regression: handler throws a non-retryable error → processEntry calls
+    // buffer.fail() inside its catch block. If fail() itself throws, the
+    // rejection used to escape unwrapped and reject runOnce's Promise.all.
+    const handled: string[] = [];
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(["bad", "good"]),
+      pop: async (envId: string) =>
+        ({
+          runId: envId === "bad" ? "run_bad" : "run_good",
+          envId,
+          orgId: "org_1",
+          payload: "{}",
+          attempts: 0,
+          createdAt: new Date(),
+        }) as any,
+      fail: async () => {
+        throw new Error("simulated fail() failure");
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async (input) => {
+        handled.push(input.runId);
+        if (input.runId === "run_bad") throw new Error("terminal");
+      },
+      concurrency: 5,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    const result = await drainer.runOnce();
+    expect(result.drained).toBe(1);
+    expect(result.failed).toBe(1);
+    expect(new Set(handled)).toEqual(new Set(["run_bad", "run_good"]));
+  });
+});
+
+describe("MollifierDrainer per-tick org cap", () => {
+  // Bounding fan-out prevents one runOnce from queuing thousands of
+  // processOneFromEnv jobs when the org set is unexpectedly large.
+  // These tests use a stub buffer so we can drive the org/env counts
+  // deterministically without provisioning a real Redis with thousands
+  // of envs.
+
+  it("processes at most maxOrgsPerTick envs per runOnce", async () => {
+    const allEnvs = Array.from({ length: 20 }, (_, i) => `env_${i}`);
+    const popped: string[] = [];
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(allEnvs),
+      pop: async (envId: string) => {
+        popped.push(envId);
+        return null; // empty queue — runOnce records this as "empty"
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 5,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: 5,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    await drainer.runOnce();
+    expect(popped).toHaveLength(5);
+  });
+
+  it("covers the full env set across `envs.length` ticks when sliced", async () => {
+    const allEnvs = Array.from({ length: 12 }, (_, i) => `env_${i}`);
+    const popped: string[] = [];
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(allEnvs),
+      pop: async (envId: string) => {
+        popped.push(envId);
+        return null;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 4,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: 4,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    // Cursor advances by 1 each tick. Over envs.length ticks every env
+    // appears in exactly `sliceSize` of them (slices overlap — intentional,
+    // see the head-of-line fairness test below).
+    for (let i = 0; i < allEnvs.length; i++) {
+      await drainer.runOnce();
+    }
+
+    expect(new Set(popped)).toEqual(new Set(allEnvs));
+    expect(popped).toHaveLength(allEnvs.length * 4); // envs.length × sliceSize
+    const perEnvCounts = popped.reduce<Record<string, number>>((acc, e) => {
+      acc[e] = (acc[e] ?? 0) + 1;
+      return acc;
+    }, {});
+    for (const env of allEnvs) {
+      expect(perEnvCounts[env]).toBe(4);
+    }
+  });
+
+  it("preserves head-of-line fairness when sliced: every env reaches every slice position", async () => {
+    // Regression test for the bias that advance-by-sliceSize would
+    // reintroduce. With fixed disjoint slices, env_0 would always be at
+    // position 0 (first into pLimit) and env_(sliceSize-1) would always
+    // be last. Advance-by-1 spreads each env across every slot.
+    const allEnvs = Array.from({ length: 8 }, (_, i) => `env_${i}`);
+    const sliceSize = 4;
+    const positionsByEnv = new Map<string, Set<number>>();
+    for (const env of allEnvs) positionsByEnv.set(env, new Set());
+
+    let currentTick: string[] = [];
+    const popOrderBuffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(allEnvs),
+      pop: async (envId: string) => {
+        currentTick.push(envId);
+        return null;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer: popOrderBuffer,
+      handler: async () => {},
+      // Concurrency >= sliceSize so pLimit doesn't reorder — pop call order
+      // matches the slice's scheduling order (i.e. the env's slot position).
+      concurrency: sliceSize,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: sliceSize,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    for (let tick = 0; tick < allEnvs.length; tick++) {
+      currentTick = [];
+      await drainer.runOnce();
+      currentTick.forEach((env, position) => {
+        positionsByEnv.get(env)!.add(position);
+      });
+    }
+
+    // Each env should have occupied every slot 0..sliceSize-1 across the
+    // cycle. If we'd regressed to advance-by-sliceSize, env_0 would only
+    // ever be at position 0 and env_3 only at position 3.
+    for (const env of allEnvs) {
+      const positions = positionsByEnv.get(env)!;
+      expect(positions.size).toBe(sliceSize);
+      for (let p = 0; p < sliceSize; p++) {
+        expect(positions.has(p)).toBe(true);
+      }
+    }
+  });
+
+  it("takes all envs and rotates by 1 when the set fits within the cap", async () => {
+    const allEnvs = ["env_a", "env_b", "env_c"];
+    const popsPerTick: string[][] = [];
+    let tick: string[] = [];
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(allEnvs),
+      pop: async (envId: string) => {
+        tick.push(envId);
+        return null;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 3,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: 100, // way above n
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    for (let i = 0; i < 3; i++) {
+      tick = [];
+      await drainer.runOnce();
+      popsPerTick.push(tick);
+    }
+
+    // Every tick covers every env (because cap > n), but the head-of-line
+    // env rotates by 1 each tick — preserves the original fairness behaviour.
+    for (const popped of popsPerTick) {
+      expect(new Set(popped)).toEqual(new Set(allEnvs));
+    }
+    const [tick0, tick1, tick2] = popsPerTick;
+    expect(tick0?.[0]).not.toEqual(tick1?.[0]);
+    expect(tick1?.[0]).not.toEqual(tick2?.[0]);
+  });
+
+  it("a light env is not starved behind heavy envs", async () => {
+    // The buffer's atomic Lua removes an env from `mollifier:envs` the
+    // moment its queue becomes empty, so a heavy env with thousands of
+    // pending entries stays in listEnvs and a light env with a single
+    // entry only stays until that one entry pops. Combined with the
+    // advance-by-1 cursor, this means the light env can't be parked
+    // behind heavy envs indefinitely — it gets popped within at most
+    // `envs.length - sliceSize + 1` ticks regardless of how many
+    // entries the heavy envs have queued.
+    const heavy = Array.from({ length: 6 }, (_, i) => `env_heavy_${i}`);
+    const light = "env_light";
+    const queues = new Map<string, string[]>();
+    for (const h of heavy) {
+      queues.set(
+        h,
+        Array.from({ length: 100 }, (_, i) => `${h}_run_${i}`),
+      );
+    }
+    queues.set(light, [`${light}_run_0`]);
+
+    const activeEnvs = () =>
+      [...queues.keys()].filter((k) => (queues.get(k)?.length ?? 0) > 0);
+    const buffer = makeStubBuffer({
+      listOrgs: async () => activeEnvs(),
+      listEnvsForOrg: async (orgId: string) =>
+        activeEnvs().includes(orgId) ? [orgId] : [],
+      pop: async (envId: string) => {
+        const q = queues.get(envId);
+        if (!q || q.length === 0) return null;
+        const runId = q.shift()!;
+        return {
+          runId,
+          envId,
+          orgId: "org_1",
+          payload: "{}",
+          status: "DRAINING",
+          attempts: 0,
+          createdAt: new Date(),
+        } as any;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 4,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: 4, // < 7 envs so we exercise slicing
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    // 7 envs, sliceSize=4 → worst-case wait for env_light is 4 ticks
+    // (it appears in the slice in exactly 4 of every 7 ticks). Run 7 to
+    // give the upper bound a wide margin.
+    const ticksUntilLightDrained = await (async () => {
+      for (let tick = 1; tick <= 7; tick++) {
+        await drainer.runOnce();
+        if ((queues.get(light)?.length ?? 0) === 0) return tick;
+      }
+      return Infinity;
+    })();
+
+    expect(ticksUntilLightDrained).toBeLessThanOrEqual(4);
+    // Sanity: heavy envs are being worked on (not starved themselves) but
+    // are far from drained — confirms we measured the right property.
+    for (const h of heavy) {
+      const remaining = queues.get(h)!.length;
+      expect(remaining).toBeGreaterThan(0);
+      expect(remaining).toBeLessThan(100);
+    }
+  });
+
+  it("a light org is not starved behind a heavy org with many envs", async () => {
+    // Org-level no-starvation: org_B's single entry drains within ~1
+    // tick because the drainer walks orgs at the top level. Org_A
+    // having many envs doesn't give it extra rotation slots.
+    const orgAEnvs = Array.from({ length: 6 }, (_, i) => `env_orgA_${i}`);
+    const orgBEnv = "env_orgB_only";
+    const envOrg = new Map<string, string>();
+    for (const e of orgAEnvs) envOrg.set(e, "org_A");
+    envOrg.set(orgBEnv, "org_B");
+    const queues = new Map<string, Array<{ runId: string; orgId: string }>>();
+    for (const e of orgAEnvs) {
+      queues.set(
+        e,
+        Array.from({ length: 100 }, (_, i) => ({
+          runId: `${e}_run_${i}`,
+          orgId: "org_A",
+        })),
+      );
+    }
+    queues.set(orgBEnv, [{ runId: `${orgBEnv}_run_0`, orgId: "org_B" }]);
+
+    const drainedByOrg: Record<string, number> = { org_A: 0, org_B: 0 };
+    const buffer = makeStubBuffer({
+      listOrgs: async () => {
+        const orgs = new Set<string>();
+        for (const [envId, items] of queues.entries()) {
+          if (items.length > 0) orgs.add(envOrg.get(envId)!);
+        }
+        return [...orgs];
+      },
+      listEnvsForOrg: async (orgId: string) => {
+        const envs: string[] = [];
+        for (const [envId, items] of queues.entries()) {
+          if (items.length > 0 && envOrg.get(envId) === orgId) envs.push(envId);
+        }
+        return envs;
+      },
+      pop: async (envId: string) => {
+        const q = queues.get(envId);
+        if (!q || q.length === 0) return null;
+        const entry = q.shift()!;
+        return {
+          runId: entry.runId,
+          envId,
+          orgId: entry.orgId,
+          payload: "{}",
+          status: "DRAINING",
+          attempts: 0,
+          createdAt: new Date(),
+        } as any;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async (input) => {
+        drainedByOrg[input.orgId] = (drainedByOrg[input.orgId] ?? 0) + 1;
+      },
+      concurrency: 4,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: 4,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    // Only 2 orgs in play → both are in every tick's slice. Org_B's
+    // single env is popped on tick 1.
+    const ticksUntilOrgBDrained = await (async () => {
+      for (let tick = 1; tick <= 7; tick++) {
+        await drainer.runOnce();
+        if ((drainedByOrg["org_B"] ?? 0) > 0) return tick;
+      }
+      return Infinity;
+    })();
+
+    expect(ticksUntilOrgBDrained).toBe(1);
+    // Sanity: org_A is being drained too (not starved itself) but its many
+    // envs are far from empty.
+    expect(drainedByOrg["org_A"]).toBeGreaterThan(0);
+    for (const e of orgAEnvs) {
+      expect(queues.get(e)!.length).toBeGreaterThan(0);
+    }
+  });
+
+  it("a heavy org with many envs gets ~1 slot per tick, not N slots", async () => {
+    // Hierarchical rotation property: an org with N envs gets the SAME
+    // per-tick scheduling slot as an org with 1 env, instead of N slots
+    // (which is what per-env rotation would give). Sustained-run drainage
+    // rate is therefore determined by org count, not env count.
+    //
+    // Org_A: 6 envs × 100 entries (a noisy tenant).
+    // Org_B: 1 env × 100 entries (a quiet tenant).
+    // Per-env rotation would drain org_A 6× faster than org_B. The org-
+    // level walk via listOrgs → listEnvsForOrg drains them at ~1:1 over
+    // a sustained window.
+    const orgAEnvs = Array.from({ length: 6 }, (_, i) => `env_orgA_${i}`);
+    const orgBEnv = "env_orgB_only";
+    const envOrg = new Map<string, string>();
+    for (const e of orgAEnvs) envOrg.set(e, "org_A");
+    envOrg.set(orgBEnv, "org_B");
+    const queues = new Map<string, Array<{ runId: string; orgId: string }>>();
+    for (const e of orgAEnvs) {
+      queues.set(
+        e,
+        Array.from({ length: 100 }, (_, i) => ({
+          runId: `${e}_run_${i}`,
+          orgId: "org_A",
+        })),
+      );
+    }
+    queues.set(
+      orgBEnv,
+      Array.from({ length: 100 }, (_, i) => ({
+        runId: `${orgBEnv}_run_${i}`,
+        orgId: "org_B",
+      })),
+    );
+
+    const drainedByOrg: Record<string, number> = { org_A: 0, org_B: 0 };
+    const buffer = makeStubBuffer({
+      listOrgs: async () => {
+        const orgs = new Set<string>();
+        for (const [envId, items] of queues.entries()) {
+          if (items.length > 0) orgs.add(envOrg.get(envId)!);
+        }
+        return [...orgs];
+      },
+      listEnvsForOrg: async (orgId: string) => {
+        const envs: string[] = [];
+        for (const [envId, items] of queues.entries()) {
+          if (items.length > 0 && envOrg.get(envId) === orgId) envs.push(envId);
+        }
+        return envs;
+      },
+      pop: async (envId: string) => {
+        const q = queues.get(envId);
+        if (!q || q.length === 0) return null;
+        const entry = q.shift()!;
+        return {
+          runId: entry.runId,
+          envId,
+          orgId: entry.orgId,
+          payload: "{}",
+          status: "DRAINING",
+          attempts: 0,
+          createdAt: new Date(),
+        } as any;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async (input) => {
+        drainedByOrg[input.orgId] = (drainedByOrg[input.orgId] ?? 0) + 1;
+      },
+      concurrency: 10,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: 100, // unsliced — every org gets a slot every tick
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    for (let i = 0; i < 20; i++) {
+      await drainer.runOnce();
+    }
+
+    // Under per-env rotation, drainedByOrg.org_A would be ~6× larger than
+    // drainedByOrg.org_B. Under hierarchical, the ratio is ~1.
+    expect(drainedByOrg["org_A"]).toBeGreaterThan(0);
+    expect(drainedByOrg["org_B"]).toBeGreaterThan(0);
+    const ratio = drainedByOrg["org_A"]! / drainedByOrg["org_B"]!;
+    expect(ratio).toBeGreaterThan(0.7);
+    expect(ratio).toBeLessThan(1.5);
+  });
+
+  it("within an org, envs are rotated round-robin across ticks", async () => {
+    // An org with N envs picks one env per tick, cycling through its
+    // envs via the per-org env cursor. Inner cursor advances by 1 per
+    // visit to the org (analogous to head-of-line fairness within a
+    // slice, but at the env-within-org layer).
+    const orgEnvs = ["env_x", "env_y", "env_z"];
+    const orgId = "org_solo";
+    const queues = new Map<string, number>();
+    for (const e of orgEnvs) queues.set(e, 100);
+
+    const poppedSequence: string[] = [];
+    const buffer = makeStubBuffer({
+      listOrgs: async () => {
+        const anyEnvActive = [...queues.values()].some((n) => n > 0);
+        return anyEnvActive ? [orgId] : [];
+      },
+      listEnvsForOrg: async (org: string) =>
+        org === orgId
+          ? [...queues.keys()].filter((k) => (queues.get(k) ?? 0) > 0)
+          : [],
+      pop: async (envId: string) => {
+        const remaining = queues.get(envId) ?? 0;
+        if (remaining === 0) return null;
+        queues.set(envId, remaining - 1);
+        poppedSequence.push(envId);
+        return {
+          runId: `${envId}_${remaining}`,
+          envId,
+          orgId,
+          payload: "{}",
+          status: "DRAINING",
+          attempts: 0,
+          createdAt: new Date(),
+        } as any;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: 100,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    // 6 ticks × 1 env per tick = 6 pops, cycling x, y, z, x, y, z. Every
+    // env should be picked exactly twice across the 6 ticks.
+    for (let i = 0; i < 6; i++) {
+      await drainer.runOnce();
+    }
+
+    expect(poppedSequence).toHaveLength(6);
+    const counts = poppedSequence.reduce<Record<string, number>>((acc, e) => {
+      acc[e] = (acc[e] ?? 0) + 1;
+      return acc;
+    }, {});
+    for (const env of orgEnvs) {
+      expect(counts[env]).toBe(2);
+    }
+  });
+});
+
+describe("MollifierDrainer additional coverage", () => {
+
+  it("a malformed payload is treated as a non-retryable handler error and goes terminal", async () => {
+    // The deserialise call lives inside processEntry's try, so a JSON parse
+    // failure is caught by the same handler-error branch. With
+    // isRetryable=false, the entry transitions directly to FAILED — the
+    // handler is never invoked because the throw happens before the
+    // handler call.
+    let handlerCalled = false;
+    const failedEntries: Array<{ runId: string; error: { code: string; message: string } }> = [];
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(["env_a"]),
+      pop: async () =>
+        ({
+          runId: "run_malformed",
+          envId: "env_a",
+          orgId: "org_1",
+          payload: "not valid json {",
+          status: "DRAINING",
+          attempts: 0,
+          createdAt: new Date(),
+        }) as any,
+      fail: async (runId: string, error: { code: string; message: string }) => {
+        failedEntries.push({ runId, error });
+        return true;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {
+        handlerCalled = true;
+      },
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    const result = await drainer.runOnce();
+
+    expect(handlerCalled).toBe(false);
+    expect(result.failed).toBe(1);
+    expect(result.drained).toBe(0);
+    expect(failedEntries).toHaveLength(1);
+    expect(failedEntries[0]?.runId).toBe("run_malformed");
+  });
+
+  it("an ack failure after a successful handler is currently treated as a handler error (documented behaviour)", async () => {
+    // CAVEAT: this pins a known behaviour gap, not the ideal behaviour.
+    // ack() lives inside the same try as the handler call, so if the
+    // handler succeeds but ack throws (e.g. transient Redis blip), the
+    // entry is routed through the retry/terminal path even though the
+    // handler-side work completed. Phase 2's engine-replay handler will
+    // need idempotency to absorb the re-execution this implies on retry,
+    // OR ack should be lifted out of the try block.
+    let handlerCalls = 0;
+    const failedEntries: string[] = [];
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(["env_a"]),
+      pop: async () =>
+        ({
+          runId: "run_x",
+          envId: "env_a",
+          orgId: "org_1",
+          payload: "{}",
+          status: "DRAINING",
+          attempts: 0,
+          createdAt: new Date(),
+        }) as any,
+      ack: async () => {
+        throw new Error("simulated ack failure");
+      },
+      fail: async (runId: string) => {
+        failedEntries.push(runId);
+        return true;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {
+        handlerCalls += 1;
+      },
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    await drainer.runOnce();
+
+    expect(handlerCalls).toBe(1); // handler did run
+    expect(failedEntries).toEqual(["run_x"]); // but entry was marked failed anyway
+  });
+
+  it("start() called twice does not spawn a second loop", async () => {
+    let listEnvsCalls = 0;
+    const buffer = makeStubBuffer({
+      listOrgs: async () => {
+        listEnvsCalls += 1;
+        return [];
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      pollIntervalMs: 50,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    drainer.start();
+    drainer.start(); // no-op
+    await new Promise((r) => setTimeout(r, 150));
+    await drainer.stop({ timeoutMs: 500 });
+
+    // One loop's worth of polling, not two. Allow a small fudge for timing —
+    // a doubled loop would produce ~2x the calls in the same window.
+    expect(listEnvsCalls).toBeLessThan(10);
+  });
+
+  it("stop() is idempotent and safe to call when never started", async () => {
+    const buffer = makeStubBuffer({});
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    // Never started.
+    await expect(drainer.stop()).resolves.toBeUndefined();
+
+    // Started then stopped twice.
+    drainer.start();
+    await expect(drainer.stop()).resolves.toBeUndefined();
+    await expect(drainer.stop()).resolves.toBeUndefined();
+  });
+
+  it("rotation cursors reset on start() so a stop+start cycle begins fresh", async () => {
+    const allEnvs = ["env_a", "env_b", "env_c", "env_d", "env_e", "env_f"];
+    const popLog: string[] = [];
+    const buffer = makeStubBuffer({
+      ...eachEnvAsOwnOrg(allEnvs),
+      pop: async (envId: string) => {
+        popLog.push(envId);
+        return null;
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 3,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      maxOrgsPerTick: 3,
+      // Long sleep so the loop ticks exactly once between start() and stop().
+      pollIntervalMs: 10_000,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    // Advance the cursor via runOnce so it's nonzero before start().
+    await drainer.runOnce();
+    await drainer.runOnce();
+    popLog.length = 0;
+
+    drainer.start();
+    // Wait long enough for the loop's first tick to complete.
+    await new Promise((r) => setTimeout(r, 100));
+    await drainer.stop({ timeoutMs: 1_000 });
+
+    // The first slice after start() should begin at envs[0] (cursor reset)
+    // — the slice is [env_a, env_b, env_c]. Without the reset, it would
+    // start at env_c (cursor was 2).
+    expect(popLog.slice(0, 3)).toEqual(["env_a", "env_b", "env_c"]);
+  });
+
+  it("loop backoff grows with consecutive runOnce failures and resets on success", async () => {
+    // The loop catches runOnce-level errors (e.g. listEnvs blip), increments
+    // `consecutiveErrors`, and delays for backoffMs(consecutiveErrors) —
+    // capped at 5s. This test pins the growth curve by failing N times in a
+    // row and observing increasing inter-tick gaps, then succeeding to
+    // verify the counter resets.
+    const tickTimestamps: number[] = [];
+    let listEnvsCalls = 0;
+    const buffer = makeStubBuffer({
+      listOrgs: async () => {
+        listEnvsCalls += 1;
+        tickTimestamps.push(Date.now());
+        if (listEnvsCalls <= 4) {
+          throw new Error("simulated sustained outage");
+        }
+        return []; // success — resets consecutiveErrors
+      },
+    });
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler: async () => {},
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      pollIntervalMs: 100,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    drainer.start();
+    // Allow time for 4 failures + first success + a few subsequent successes.
+    // Backoff schedule on errors 1..4: 200ms, 400ms, 800ms, 1.6s ≈ 3s total
+    // worst case. Add headroom for jitter.
+    await new Promise((r) => setTimeout(r, 4_000));
+    await drainer.stop({ timeoutMs: 1_000 });
+
+    expect(listEnvsCalls).toBeGreaterThanOrEqual(5);
+    // Inter-tick gaps during the failure run should grow (exponential).
+    const gap1 = tickTimestamps[1]! - tickTimestamps[0]!;
+    const gap2 = tickTimestamps[2]! - tickTimestamps[1]!;
+    const gap3 = tickTimestamps[3]! - tickTimestamps[2]!;
+    expect(gap2).toBeGreaterThan(gap1);
+    expect(gap3).toBeGreaterThan(gap2);
+
+    // After the first success (tick 5), counter resets, so the gap between
+    // tick 5 and tick 6 should drop back to pollIntervalMs-ish — much
+    // smaller than gap3 (which was the longest backoff).
+    if (tickTimestamps.length >= 6) {
+      const postRecoveryGap = tickTimestamps[5]! - tickTimestamps[4]!;
+      expect(postRecoveryGap).toBeLessThan(gap3);
+    }
+  });
+});
+
+describe("MollifierDrainer.start/stop", () => {
+  redisTest("start polls and processes, stop halts the loop", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      ...noopOptions,
+    });
+
+    const handled: string[] = [];
+    const handler = async (input: { runId: string }) => {
+      handled.push(input.runId);
+    };
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler,
+      concurrency: 5,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      pollIntervalMs: 20,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "live_1", envId: "env_a", orgId: "org_1", payload: "{}" });
+      await buffer.accept({ runId: "live_2", envId: "env_a", orgId: "org_1", payload: "{}" });
+
+      drainer.start();
+
+      const deadline = Date.now() + 5_000;
+      while (handled.length < 2 && Date.now() < deadline) {
+        await new Promise((r) => setTimeout(r, 50));
+      }
+
+      await drainer.stop();
+
+      expect(new Set(handled)).toEqual(new Set(["live_1", "live_2"]));
+    } finally {
+      await buffer.close();
+    }
+  });
+
+  redisTest("stop returns after timeoutMs even if a handler is hung", { timeout: 20_000 }, async ({ redisContainer }) => {
+    const buffer = new MollifierBuffer({
+      redisOptions: {
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+        password: redisContainer.getPassword(),
+      },
+      ...noopOptions,
+    });
+
+    let handlerStarted = false;
+    const handler = async () => {
+      handlerStarted = true;
+      await new Promise<void>(() => {});
+    };
+
+    const drainer = new MollifierDrainer({
+      buffer,
+      handler,
+      concurrency: 1,
+      maxAttempts: 3,
+      isRetryable: () => false,
+      pollIntervalMs: 20,
+      logger: new Logger("test-drainer", "log"),
+    });
+
+    try {
+      await buffer.accept({ runId: "hung", envId: "env_a", orgId: "org_1", payload: "{}" });
+
+      drainer.start();
+
+      const deadline = Date.now() + 2_000;
+      while (!handlerStarted && Date.now() < deadline) {
+        await new Promise((r) => setTimeout(r, 25));
+      }
+      expect(handlerStarted).toBe(true);
+
+      const stopStart = Date.now();
+      await drainer.stop({ timeoutMs: 500 });
+      const stopElapsed = Date.now() - stopStart;
+
+      // Allow a small jitter window below `timeoutMs` — Node's setTimeout can
+      // fire a millisecond or two early under CI load. The behaviour we're
+      // pinning is "stop honors the deadline instead of waiting for the hung
+      // handler indefinitely", not millisecond-precise timing.
+      expect(stopElapsed).toBeGreaterThanOrEqual(450);
+      expect(stopElapsed).toBeLessThan(2_000);
+    } finally {
+      await buffer.close();
+    }
+  });
+});
+
+describe("MollifierDrainer concurrency cap", () => {
+  redisTest(
+    "runOnce never exceeds configured concurrency in flight",
+    { timeout: 30_000 },
+    async ({ redisContainer }) => {
+      const buffer = new MollifierBuffer({
+        redisOptions: {
+          host: redisContainer.getHost(),
+          port: redisContainer.getPort(),
+          password: redisContainer.getPassword(),
+        },
+        ...noopOptions,
+      });
+
+      const concurrency = 3;
+      const envCount = 12;
+      let inflight = 0;
+      let peak = 0;
+      let handlerCalls = 0;
+      const handler = async () => {
+        handlerCalls++;
+        inflight++;
+        if (inflight > peak) peak = inflight;
+        // Sleep long enough that handlers definitely overlap if scheduling
+        // allowed it — the assertion is meaningful only if multiple handlers
+        // would be running simultaneously without the cap.
+        await new Promise((r) => setTimeout(r, 75));
+        inflight--;
+      };
+
+      const drainer = new MollifierDrainer({
+        buffer,
+        handler,
+        concurrency,
+        maxAttempts: 1,
+        isRetryable: () => false,
+        logger: new Logger("test-drainer", "log"),
+      });
+
+      try {
+        // One entry per (env, org) so runOnce sees `envCount` distinct
+        // orgs as scheduling candidates and pLimits them through
+        // pLimit(concurrency). Spread across orgs (not envs in one org)
+        // because the drainer picks one env per org per tick — a single
+        // org with 12 envs would only see 1 pop per tick.
+        for (let i = 0; i < envCount; i++) {
+          await buffer.accept({
+            runId: `run_${i}`,
+            envId: `env_${i}`,
+            orgId: `org_${i}`,
+            payload: "{}",
+          });
+        }
+
+        const result = await drainer.runOnce();
+        expect(result.drained).toBe(envCount);
+        expect(handlerCalls).toBe(envCount);
+        expect(peak).toBeGreaterThan(1); // concurrency is real, not serialised
+        expect(peak).toBeLessThanOrEqual(concurrency);
+      } finally {
+        await buffer.close();
+      }
+    },
+  );
+});
diff --git a/packages/redis-worker/src/mollifier/drainer.ts b/packages/redis-worker/src/mollifier/drainer.ts
new file mode 100644
index 00000000000..407b389e14e
--- /dev/null
+++ b/packages/redis-worker/src/mollifier/drainer.ts
@@ -0,0 +1,289 @@
+import { Logger } from "@trigger.dev/core/logger";
+import pLimit from "p-limit";
+import { MollifierBuffer } from "./buffer.js";
+import { BufferEntry, deserialiseSnapshot } from "./schemas.js";
+
+export type MollifierDrainerHandler<TPayload> = (input: {
+  runId: string;
+  envId: string;
+  orgId: string;
+  payload: TPayload;
+  attempts: number;
+  createdAt: Date;
+}) => Promise<void>;
+
+export type MollifierDrainerOptions<TPayload> = {
+  buffer: MollifierBuffer;
+  handler: MollifierDrainerHandler<TPayload>;
+  concurrency: number;
+  maxAttempts: number;
+  isRetryable: (err: unknown) => boolean;
+  pollIntervalMs?: number;
+  // Cap on how many ORGS `runOnce` processes per tick. The drainer rotates
+  // through orgs at the top level and picks one env per org per tick, so
+  // the actual per-tick pop count is at most `maxOrgsPerTick`. Tune for
+  // "typical orgs with pending entries" rather than total system org
+  // count. Defaults to 500.
+  //
+  // The buffer maintains `mollifier:orgs` and `mollifier:org-envs:${orgId}`
+  // atomically with per-env queues, so the drainer can walk orgs → envs
+  // directly. An org with N envs gets the same per-tick scheduling slot
+  // as an org with 1 env — tenant-level drainage throughput is determined
+  // by org count, not env count.
+  maxOrgsPerTick?: number;
+  logger?: Logger;
+};
+
+export type DrainResult = {
+  drained: number;
+  failed: number;
+};
+
+export class MollifierDrainer<TPayload = unknown> {
+  private readonly buffer: MollifierBuffer;
+  private readonly handler: MollifierDrainerHandler<TPayload>;
+  private readonly maxAttempts: number;
+  private readonly isRetryable: (err: unknown) => boolean;
+  private readonly pollIntervalMs: number;
+  private readonly maxOrgsPerTick: number;
+  private readonly logger: Logger;
+  private readonly limit: ReturnType<typeof pLimit>;
+  // Rotation state. `orgCursor` advances through the active-orgs list.
+  // Each org has its own internal cursor in `perOrgEnvCursors` for
+  // cycling through that org's envs. Both reset on `start()`.
+  private orgCursor = 0;
+  private perOrgEnvCursors = new Map<string, number>();
+  private isRunning = false;
+  private stopping = false;
+  private loopPromise: Promise<void> | null = null;
+
+  constructor(options: MollifierDrainerOptions<TPayload>) {
+    this.buffer = options.buffer;
+    this.handler = options.handler;
+    this.maxAttempts = options.maxAttempts;
+    this.isRetryable = options.isRetryable;
+    this.pollIntervalMs = options.pollIntervalMs ?? 100;
+    this.maxOrgsPerTick = options.maxOrgsPerTick ?? 500;
+    this.logger = options.logger ?? new Logger("MollifierDrainer", "debug");
+    this.limit = pLimit(options.concurrency);
+  }
+
+  async runOnce(): Promise<DrainResult> {
+    const orgs = await this.buffer.listOrgs();
+    if (orgs.length === 0) return { drained: 0, failed: 0 };
+
+    const orgSlice = this.takeOrgSlice(orgs);
+
+    // Fan the per-org SMEMBERS out in a single pipelined round-trip. Serial
+    // awaits would otherwise add `orgSlice.length × RTT` of dead time before
+    // pops start — at the default `maxOrgsPerTick=500` and a ~1ms ElastiCache
+    // RTT that's a ~500ms per-tick floor. ioredis auto-pipelines concurrent
+    // commands into one batch, so the burst is cheap; SMEMBERS on a small set
+    // is O(N) per org and trivial at this scale. `Promise.all` preserves
+    // order, so the org→envs pairing below stays deterministic.
+    const envsByOrg = await Promise.all(
+      orgSlice.map((orgId) => this.buffer.listEnvsForOrg(orgId)),
+    );
+    const targets: string[] = [];
+    for (let i = 0; i < orgSlice.length; i++) {
+      const orgId = orgSlice[i]!;
+      const envsForOrg = envsByOrg[i]!;
+      if (envsForOrg.length === 0) continue;
+      const envId = this.pickEnvForOrg(orgId, envsForOrg);
+      targets.push(envId);
+    }
+
+    const inflight: Promise<"drained" | "failed" | "empty">[] = [];
+    for (const envId of targets) {
+      inflight.push(this.limit(() => this.processOneFromEnv(envId)));
+    }
+
+    const results = await Promise.all(inflight);
+    return {
+      drained: results.filter((r) => r === "drained").length,
+      failed: results.filter((r) => r === "failed").length,
+    };
+  }
+
+  start(): void {
+    if (this.isRunning) return;
+    this.isRunning = true;
+    this.stopping = false;
+    // Reset rotation state on each (re)start. A stop+start cycle means
+    // operator intent to "begin clean" — between-restart cursor drift
+    // would otherwise carry implicit state across what should look like
+    // a fresh boot.
+    this.orgCursor = 0;
+    this.perOrgEnvCursors = new Map();
+    this.loopPromise = this.loop();
+  }
+
+  // Signal the loop to exit (`stopping = true`) and wait for it. With no
+  // timeout, wait indefinitely for the in-flight `runOnce` and its handlers
+  // to settle — same semantic as FairQueue / BatchQueue's `stop()`. With a
+  // timeout, race the loop promise against a deadline so a hung handler
+  // can't wedge the process past its termination grace period.
+  async stop(options: { timeoutMs?: number } = {}): Promise<void> {
+    if (!this.isRunning || !this.loopPromise) return;
+    this.stopping = true;
+    if (options.timeoutMs == null) {
+      await this.loopPromise;
+      return;
+    }
+    // Hold the timer handle so we can clearTimeout() it after the race.
+    // Without this, when the loop wins the race, the discarded timer is
+    // still ref'd and pins the Node event loop for up to `timeoutMs`,
+    // delaying process shutdown by exactly the slack we were trying to
+    // bound. try/finally clears the handle in every exit path (loop-won,
+    // timeout-won, or exception).
+    const timeoutSentinel = Symbol("mollifier.stop.timeout");
+    let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
+    const timeoutPromise = new Promise<typeof timeoutSentinel>((resolve) => {
+      timeoutHandle = setTimeout(() => resolve(timeoutSentinel), options.timeoutMs);
+    });
+    try {
+      const winner = await Promise.race([
+        this.loopPromise.then(() => "done" as const),
+        timeoutPromise,
+      ]);
+      if (winner === timeoutSentinel) {
+        this.logger.warn(
+          "MollifierDrainer.stop: deadline exceeded; returning while loop iteration is in flight",
+          { timeoutMs: options.timeoutMs },
+        );
+      }
+    } finally {
+      if (timeoutHandle) clearTimeout(timeoutHandle);
+    }
+  }
+
+  // Transient Redis errors (e.g. a connection blip in `listOrgs` /
+  // `listEnvsForOrg` / `pop`) must not kill the polling loop permanently.
+  // We log each `runOnce` failure, back off so we don't spin tight on a
+  // sustained outage, and resume. The loop only exits when `stop()` flips
+  // `stopping`.
+  private async loop(): Promise<void> {
+    try {
+      let consecutiveErrors = 0;
+      while (!this.stopping) {
+        try {
+          const result = await this.runOnce();
+          consecutiveErrors = 0;
+          if (result.drained === 0 && result.failed === 0) {
+            await this.delay(this.pollIntervalMs);
+          }
+        } catch (err) {
+          consecutiveErrors += 1;
+          this.logger.error("MollifierDrainer.runOnce failed; backing off", {
+            err,
+            consecutiveErrors,
+          });
+          await this.delay(this.backoffMs(consecutiveErrors));
+        }
+      }
+    } finally {
+      this.isRunning = false;
+    }
+  }
+
+  // Exponential backoff capped at 5s. Keeps the loop responsive after a
+  // brief blip while preventing a tight retry loop during a long Redis
+  // outage. 1 → 200ms, 2 → 400ms, 3 → 800ms, 4 → 1.6s, 5 → 3.2s, 6+ → 5s.
+  private backoffMs(consecutiveErrors: number): number {
+    const base = Math.max(this.pollIntervalMs, 100);
+    const capped = Math.min(base * 2 ** (consecutiveErrors - 1), 5_000);
+    return capped;
+  }
+
+  private delay(ms: number): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, ms));
+  }
+
+  // Take up to `maxOrgsPerTick` orgs starting at the current cursor, with
+  // wrap-around. Cursor advances by 1 each tick so every org reaches
+  // every slot position (0..sliceSize-1) over a full cycle — no
+  // head-of-line bias within the slice. Orgs are sorted before slicing
+  // so rotation is deterministic regardless of Redis SET iteration order.
+  private takeOrgSlice(orgs: string[]): string[] {
+    const sorted = [...orgs].sort();
+    const n = sorted.length;
+    const sliceSize = Math.min(this.maxOrgsPerTick, n);
+    const start = this.orgCursor % n;
+    this.orgCursor = (this.orgCursor + 1) % Math.max(n, 1);
+    const end = start + sliceSize;
+    if (end <= n) return sorted.slice(start, end);
+    return [...sorted.slice(start), ...sorted.slice(0, end - n)];
+  }
+
+  // Pick one env from the org's active-envs list, rotating per org via
+  // the per-org cursor. Each org's cursor advances by 1 each visit, so
+  // an org with N envs cycles through them across N visits.
+  private pickEnvForOrg(orgId: string, envsForOrg: string[]): string {
+    const sorted = [...envsForOrg].sort();
+    const cursor = this.perOrgEnvCursors.get(orgId) ?? 0;
+    const idx = cursor % sorted.length;
+    this.perOrgEnvCursors.set(orgId, (cursor + 1) % sorted.length);
+    return sorted[idx]!;
+  }
+
+  // A failure for one env (e.g. a Redis hiccup mid-batch in `pop`, or in
+  // `requeue`/`fail` during error recovery inside `processEntry`) must not
+  // poison the rest of the batch — `Promise.all` would otherwise reject and
+  // bubble all the way to `loop()`. Catch both stages here so the failed env
+  // is just counted as "failed" for this tick and we move on.
+  private async processOneFromEnv(envId: string): Promise<"drained" | "failed" | "empty"> {
+    let entry: BufferEntry | null;
+    try {
+      entry = await this.buffer.pop(envId);
+    } catch (err) {
+      this.logger.error("MollifierDrainer.pop failed", { envId, err });
+      return "failed";
+    }
+    if (!entry) return "empty";
+    try {
+      return await this.processEntry(entry);
+    } catch (err) {
+      this.logger.error("MollifierDrainer.processEntry failed", {
+        envId,
+        runId: entry.runId,
+        err,
+      });
+      return "failed";
+    }
+  }
+
+  private async processEntry(entry: BufferEntry): Promise<"drained" | "failed"> {
+    try {
+      const payload = deserialiseSnapshot<TPayload>(entry.payload);
+      await this.handler({
+        runId: entry.runId,
+        envId: entry.envId,
+        orgId: entry.orgId,
+        payload,
+        attempts: entry.attempts,
+        createdAt: entry.createdAt,
+      });
+      await this.buffer.ack(entry.runId);
+      return "drained";
+    } catch (err) {
+      const nextAttempts = entry.attempts + 1;
+      if (this.isRetryable(err) && nextAttempts < this.maxAttempts) {
+        await this.buffer.requeue(entry.runId);
+        this.logger.warn("MollifierDrainer: retryable error, requeued", {
+          runId: entry.runId,
+          attempts: nextAttempts,
+        });
+        return "failed";
+      }
+      const code = err instanceof Error ? err.name : "Unknown";
+      const message = err instanceof Error ? err.message : String(err);
+      await this.buffer.fail(entry.runId, { code, message });
+      this.logger.error("MollifierDrainer: terminal failure", {
+        runId: entry.runId,
+        code,
+        message,
+      });
+      return "failed";
+    }
+  }
+}
diff --git a/packages/redis-worker/src/mollifier/index.ts b/packages/redis-worker/src/mollifier/index.ts
new file mode 100644
index 00000000000..5e6fe202e3d
--- /dev/null
+++ b/packages/redis-worker/src/mollifier/index.ts
@@ -0,0 +1,15 @@
+export { MollifierBuffer, type MollifierBufferOptions } from "./buffer.js";
+export {
+  MollifierDrainer,
+  type MollifierDrainerOptions,
+  type MollifierDrainerHandler,
+  type DrainResult,
+} from "./drainer.js";
+export {
+  BufferEntrySchema,
+  BufferEntryStatus,
+  BufferEntryError,
+  serialiseSnapshot,
+  deserialiseSnapshot,
+  type BufferEntry,
+} from "./schemas.js";
diff --git a/packages/redis-worker/src/mollifier/schemas.ts b/packages/redis-worker/src/mollifier/schemas.ts
new file mode 100644
index 00000000000..f93b0f0a3c3
--- /dev/null
+++ b/packages/redis-worker/src/mollifier/schemas.ts
@@ -0,0 +1,58 @@
+import { z } from "zod";
+
+export const BufferEntryStatus = z.enum(["QUEUED", "DRAINING", "FAILED"]);
+export type BufferEntryStatus = z.infer<typeof BufferEntryStatus>;
+
+export const BufferEntryError = z.object({
+  code: z.string(),
+  message: z.string(),
+});
+export type BufferEntryError = z.infer<typeof BufferEntryError>;
+
+const stringToInt = z.string().transform((v, ctx) => {
+  const n = Number(v);
+  if (!Number.isInteger(n) || n < 0) {
+    ctx.addIssue({ code: z.ZodIssueCode.custom, message: "expected non-negative integer string" });
+    return z.NEVER;
+  }
+  return n;
+});
+
+const stringToDate = z.string().transform((v, ctx) => {
+  const d = new Date(v);
+  if (Number.isNaN(d.getTime())) {
+    ctx.addIssue({ code: z.ZodIssueCode.custom, message: "expected ISO date string" });
+    return z.NEVER;
+  }
+  return d;
+});
+
+const stringToError = z.string().transform((v, ctx) => {
+  try {
+    return BufferEntryError.parse(JSON.parse(v));
+  } catch {
+    ctx.addIssue({ code: z.ZodIssueCode.custom, message: "expected JSON-encoded BufferEntryError" });
+    return z.NEVER;
+  }
+});
+
+export const BufferEntrySchema = z.object({
+  runId: z.string().min(1),
+  envId: z.string().min(1),
+  orgId: z.string().min(1),
+  payload: z.string(),
+  status: BufferEntryStatus,
+  attempts: stringToInt,
+  createdAt: stringToDate,
+  lastError: stringToError.optional(),
+});
+
+export type BufferEntry = z.infer<typeof BufferEntrySchema>;
+
+export function serialiseSnapshot(snapshot: unknown): string {
+  return JSON.stringify(snapshot);
+}
+
+export function deserialiseSnapshot<T = unknown>(serialised: string): T {
+  return JSON.parse(serialised) as T;
+}
diff --git a/packages/rsc/CHANGELOG.md b/packages/rsc/CHANGELOG.md
index 9304f8caaec..f076634753e 100644
--- a/packages/rsc/CHANGELOG.md
+++ b/packages/rsc/CHANGELOG.md
@@ -1,5 +1,19 @@
 # @trigger.dev/rsc
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.0`
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/rsc/package.json b/packages/rsc/package.json
index e41126cd7ba..6dadddedc8e 100644
--- a/packages/rsc/package.json
+++ b/packages/rsc/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/rsc",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "trigger.dev rsc",
   "license": "MIT",
   "publishConfig": {
@@ -37,14 +37,14 @@
     "check-exports": "attw --pack ."
   },
   "dependencies": {
-    "@trigger.dev/core": "workspace:^4.4.6",
+    "@trigger.dev/core": "workspace:^4.5.0-rc.1",
     "mlly": "^1.7.1",
     "react": "19.0.0-rc.1",
     "react-dom": "19.0.0-rc.1"
   },
   "devDependencies": {
     "@arethetypeswrong/cli": "^0.15.4",
-    "@trigger.dev/build": "workspace:^4.4.6",
+    "@trigger.dev/build": "workspace:^4.5.0-rc.1",
     "@types/node": "^20.14.14",
     "@types/react": "*",
     "@types/react-dom": "*",
diff --git a/packages/schema-to-json/CHANGELOG.md b/packages/schema-to-json/CHANGELOG.md
index 707367feff5..4a8f1e33751 100644
--- a/packages/schema-to-json/CHANGELOG.md
+++ b/packages/schema-to-json/CHANGELOG.md
@@ -1,5 +1,19 @@
 # @trigger.dev/schema-to-json
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.0`
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/schema-to-json/package.json b/packages/schema-to-json/package.json
index 0721caaa5c8..59b336a8699 100644
--- a/packages/schema-to-json/package.json
+++ b/packages/schema-to-json/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/schema-to-json",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "Convert various schema validation libraries to JSON Schema",
   "license": "MIT",
   "publishConfig": {
diff --git a/packages/trigger-sdk/CHANGELOG.md b/packages/trigger-sdk/CHANGELOG.md
index 6254af0aad9..fcafefd145b 100644
--- a/packages/trigger-sdk/CHANGELOG.md
+++ b/packages/trigger-sdk/CHANGELOG.md
@@ -1,5 +1,214 @@
 # @trigger.dev/sdk
 
+## 4.5.0-rc.1
+
+### Patch Changes
+
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.1`
+
+## 4.5.0-rc.0
+
+### Minor Changes
+
+- **AI Prompts** — define prompt templates as code alongside your tasks, version them on deploy, and override the text or model from the dashboard without redeploying. Prompts integrate with the Vercel AI SDK via `toAISDKTelemetry()` (links every generation span back to the prompt) and with `chat.agent` via `chat.prompt.set()` + `chat.toStreamTextOptions()`. ([#3629](https://github.com/triggerdotdev/trigger.dev/pull/3629))
+
+  ```ts
+  import { prompts } from "@trigger.dev/sdk";
+  import { generateText } from "ai";
+  import { openai } from "@ai-sdk/openai";
+  import { z } from "zod";
+
+  export const supportPrompt = prompts.define({
+    id: "customer-support",
+    model: "gpt-4o",
+    config: { temperature: 0.7 },
+    variables: z.object({
+      customerName: z.string(),
+      plan: z.string(),
+      issue: z.string(),
+    }),
+    content: `You are a support agent for Acme.
+  
+  Customer: {{customerName}} ({{plan}} plan)
+  Issue: {{issue}}`,
+  });
+
+  const resolved = await supportPrompt.resolve({
+    customerName: "Alice",
+    plan: "Pro",
+    issue: "Can't access billing",
+  });
+
+  const result = await generateText({
+    model: openai(resolved.model ?? "gpt-4o"),
+    system: resolved.text,
+    prompt: "Can't access billing",
+    ...resolved.toAISDKTelemetry(),
+  });
+  ```
+
+  **What you get:**
+
+  - **Code-defined, deploy-versioned templates** — define with `prompts.define({ id, model, config, variables, content })`. Every deploy creates a new version visible in the dashboard. Mustache-style placeholders (`{{var}}`, `{{#cond}}...{{/cond}}`) with Zod / ArkType / Valibot-typed variables.
+  - **Dashboard overrides** — change a prompt's text or model from the dashboard without redeploying. Overrides take priority over the deployed "current" version and are environment-scoped (dev / staging / production independent).
+  - **Resolve API** — `prompt.resolve(vars, { version?, label? })` returns the compiled `text`, resolved `model`, `version`, and labels. Standalone `prompts.resolve<typeof handle>(slug, vars)` for cross-file resolution with full type inference on slug and variable shape.
+  - **AI SDK integration** — spread `resolved.toAISDKTelemetry({ ...extra })` into any `generateText` / `streamText` call and every generation span links to the prompt in the dashboard alongside its input variables, model, tokens, and cost.
+  - **`chat.agent` integration** — `chat.prompt.set(resolved)` stores the resolved prompt run-scoped; `chat.toStreamTextOptions({ registry })` pulls `system`, `model` (resolved via the AI SDK provider registry), `temperature` / `maxTokens` / etc., and telemetry into a single spread for `streamText`.
+  - **Management SDK** — `prompts.list()`, `prompts.versions(slug)`, `prompts.promote(slug, version)`, `prompts.createOverride(slug, body)`, `prompts.updateOverride(slug, body)`, `prompts.removeOverride(slug)`, `prompts.reactivateOverride(slug, version)`.
+  - **Dashboard** — prompts list with per-prompt usage sparklines; per-prompt detail with Template / Details / Versions / Generations / Metrics tabs. AI generation spans get a custom inspector showing the linked prompt's metadata, input variables, and template content alongside model, tokens, cost, and the message thread.
+
+  See [/docs/ai/prompts](https://trigger.dev/docs/ai/prompts) for the full reference — template syntax, version resolution order, override workflow, and type utilities (`PromptHandle`, `PromptIdentifier`, `PromptVariables`).
+
+- Adds `onBoot` to `chat.agent` — a lifecycle hook that fires once per worker process picking up the chat. Runs for the initial run, preloaded runs, AND reactive continuation runs (post-cancel, crash, `endRun`, `requestUpgrade`, OOM retry), before any other hook. Use it to initialize `chat.local`, open per-process resources, or re-hydrate state from your DB on continuation — anywhere the SAME run picking up after suspend/resume isn't enough. ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  ```ts
+  const userContext = chat.local<{ name: string; plan: string }>({ id: "userContext" });
+
+  export const myChat = chat.agent({
+    id: "my-chat",
+    onBoot: async ({ clientData, continuation }) => {
+      const user = await db.user.findUnique({ where: { id: clientData.userId } });
+      userContext.init({ name: user.name, plan: user.plan });
+    },
+    run: async ({ messages, signal }) =>
+      streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }),
+  });
+  ```
+
+  Use `onBoot` (not `onChatStart`) for state setup that must run every time a worker picks up the chat — `onChatStart` fires once per chat and won't run on continuation, leaving `chat.local` uninitialized when `run()` tries to use it.
+
+- **AI Agents** — run AI SDK chat completions as durable Trigger.dev agents instead of fragile API routes. Define an agent in one function, point `useChat` at it from React, and the conversation survives page refreshes, network blips, and process restarts. ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  ```ts
+  import { chat } from "@trigger.dev/sdk/ai";
+  import { streamText } from "ai";
+  import { openai } from "@ai-sdk/openai";
+
+  export const myChat = chat.agent({
+    id: "my-chat",
+    run: async ({ messages, signal }) =>
+      streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }),
+  });
+  ```
+
+  ```tsx
+  import { useChat } from "@ai-sdk/react";
+  import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+
+  const transport = useTriggerChatTransport({ task: "my-chat", accessToken, startSession });
+  const { messages, sendMessage } = useChat({ transport });
+  ```
+
+  **What you get:**
+
+  - **AI SDK `useChat` integration** — a custom [`ChatTransport`](https://sdk.vercel.ai/docs/ai-sdk-ui/transport) (`useTriggerChatTransport`) plugs straight into Vercel AI SDK's `useChat` hook. Text streaming, tool calls, reasoning, and `data-*` parts all work natively over Trigger.dev's realtime streams. No custom API routes needed.
+  - **First-turn fast path (`chat.headStart`)** — opt-in handler that runs the first turn's `streamText` step in your warm server process while the agent run boots in parallel, cutting cold-start TTFC by roughly half (measured 2801ms → 1218ms on `claude-sonnet-4-6`). The agent owns step 2+ (tool execution, persistence, hooks) so heavy deps stay where they belong. Web Fetch handler works natively in Next.js, Hono, SvelteKit, Remix, Workers, etc.; bridge to Express/Fastify/Koa via `chat.toNodeListener`. New `@trigger.dev/sdk/chat-server` subpath.
+  - **Multi-turn durability via Sessions** — every chat is backed by a durable Session that outlives any individual run. Conversations resume across page refreshes, idle timeout, crashes, and deploys; `resume: true` reconnects via `lastEventId` so clients only see new chunks. `sessions.list` enumerates chats for inbox-style UIs.
+  - **Auto-accumulated history, delta-only wire** — the backend accumulates the full conversation across turns; clients only ship the new message each turn. Long chats never hit the 512 KiB body cap. Register `hydrateMessages` to be the source of truth yourself.
+  - **Lifecycle hooks** — `onPreload`, `onChatStart`, `onValidateMessages`, `hydrateMessages`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`, `onChatSuspend`, `onChatResume` — for persistence, validation, and post-turn work.
+  - **Stop generation** — client-driven `transport.stopGeneration(chatId)` aborts mid-stream; the run stays alive for the next message, partial response is captured, and aborted parts (stuck `partial-call` tools, in-progress reasoning) are auto-cleaned.
+  - **Tool approvals (HITL)** — tools with `needsApproval: true` pause until the user approves or denies via `addToolApprovalResponse`. The runtime reconciles the updated assistant message by ID and continues `streamText`.
+  - **Steering and background injection** — `pendingMessages` injects user messages between tool-call steps so users can steer the agent mid-execution; `chat.inject()` + `chat.defer()` adds context from background work (self-review, RAG, safety checks) between turns.
+  - **Actions** — non-turn frontend commands (undo, rollback, regenerate, edit) sent via `transport.sendAction`. Fire `hydrateMessages` + `onAction` only — no turn hooks, no `run()`. `onAction` can return a `StreamTextResult` for a model response, or `void` for side-effect-only.
+  - **Typed state primitives** — `chat.local<T>` for per-run state accessible from hooks, `run()`, tools, and subtasks (auto-serialized through `ai.toolExecute`); `chat.store` for typed shared data between agent and client; `chat.history` for reading and mutating the message chain; `clientDataSchema` for typed `clientData` in every hook.
+  - **`chat.toStreamTextOptions()`** — one spread into `streamText` wires up versioned system [Prompts](https://trigger.dev/docs/ai/prompts), model resolution, telemetry metadata, compaction, steering, and background injection.
+  - **Multi-tab coordination** — `multiTab: true` + `useMultiTabChat` prevents duplicate sends and syncs state across browser tabs via `BroadcastChannel`. Non-active tabs go read-only with live updates.
+  - **Network resilience** — built-in indefinite retry with bounded backoff, reconnect on `online` / tab refocus / bfcache restore, `Last-Event-ID` mid-stream resume. No app code needed.
+
+  See [/docs/ai-chat](https://trigger.dev/docs/ai-chat/overview) for the full surface — quick start, three backend approaches (`chat.agent`, `chat.createSession`, raw task), persistence and code-sandbox patterns, type-level guides, and API reference.
+
+- Add read primitives to `chat.history` for HITL flows: `getPendingToolCalls()`, `getResolvedToolCalls()`, `extractNewToolResults(message)`, `getChain()`, and `findMessage(messageId)`. These lift the accumulator-walking logic that customers building human-in-the-loop tools were re-implementing into the SDK. ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  Use `getPendingToolCalls()` to gate fresh user turns while a tool call is awaiting an answer. Use `extractNewToolResults(message)` to dedup tool results when persisting to your own store — the helper returns only the parts whose `toolCallId` is not already resolved on the chain.
+
+  ```ts
+  const pending = chat.history.getPendingToolCalls();
+  if (pending.length > 0) {
+    // an addToolOutput is expected before a new user message
+  }
+
+  onTurnComplete: async ({ responseMessage }) => {
+    const newResults = chat.history.extractNewToolResults(responseMessage);
+    for (const r of newResults) {
+      await db.toolResults.upsert({ id: r.toolCallId, output: r.output, errorText: r.errorText });
+    }
+  };
+  ```
+
+- **Sessions** — a durable, run-aware stream channel keyed on a stable `externalId`. A Session is the unit of state that owns a multi-run conversation: messages flow through `.in`, responses through `.out`, both survive run boundaries. Sessions back the new `chat.agent` runtime, and you can build on them directly for any pattern that needs durable bi-directional streaming across runs. ([#3542](https://github.com/triggerdotdev/trigger.dev/pull/3542))
+
+  ```ts
+  import { sessions, tasks } from "@trigger.dev/sdk";
+
+  // Trigger a task and subscribe to its session output in one call
+  const { runId, stream } = await tasks.triggerAndSubscribe("my-task", payload, {
+    externalId: "user-456",
+  });
+
+  for await (const chunk of stream) {
+    // ...
+  }
+
+  // Enumerate existing sessions (powers inbox-style UIs without a separate index)
+  for await (const s of sessions.list({ type: "chat.agent", tag: "user:user-456" })) {
+    console.log(s.id, s.externalId, s.createdAt, s.closedAt);
+  }
+  ```
+
+  See [/docs/ai-chat/overview](https://trigger.dev/docs/ai-chat/overview) for the full surface — Sessions powers the durable, resumable chat runtime described there.
+
+### Patch Changes
+
+- Add Agent Skills for `chat.agent`. Drop a folder with a `SKILL.md` and any helper scripts/references next to your task code, register it with `skills.define({ id, path })`, and the CLI bundles it into the deploy image automatically — no `trigger.config.ts` changes. The agent gets a one-line summary in its system prompt and discovers full instructions on demand via `loadSkill`, with `bash` and `readFile` tools scoped per-skill (path-traversal guards, output caps, abort-signal propagation). ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  ```ts
+  const pdfSkill = skills.define({ id: "pdf-extract", path: "./skills/pdf-extract" });
+
+  chat.skills.set([await pdfSkill.local()]);
+  ```
+
+  Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills) — portable across providers. SDK + CLI only for now; dashboard-editable `SKILL.md` text is on the roadmap.
+
+- Add `ai.toolExecute(task)` so you can wire a Trigger subtask in as the `execute` handler of an AI SDK `tool()` while defining `description` and `inputSchema` yourself — useful when you want full control over the tool surface and just need Trigger's subtask machinery for the body. ([#3546](https://github.com/triggerdotdev/trigger.dev/pull/3546))
+
+  ```ts
+  const myTool = tool({
+    description: "...",
+    inputSchema: z.object({ ... }),
+    execute: ai.toolExecute(mySubtask),
+  });
+  ```
+
+  `ai.tool(task)` (`toolFromTask`) keeps doing the all-in-one wrap and now aligns its return type with AI SDK's `ToolSet`. Minimum `ai` peer raised to `^6.0.116` to avoid cross-version `ToolSet` mismatches in monorepos.
+
+- Stamp `gen_ai.conversation.id` (the chat id) on every span and metric emitted from inside a `chat.task` or `chat.agent` run. Lets you filter dashboard spans, runs, and metrics by the chat conversation that produced them — independent of the run boundary, so multi-run chats correlate cleanly. No code changes required on the user side. ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+- Type `chat.createStartSessionAction` against your chat agent so `clientData` is typed end-to-end on the first turn: ([#3684](https://github.com/triggerdotdev/trigger.dev/pull/3684))
+
+  ```ts
+  import { chat } from "@trigger.dev/sdk/ai";
+  import type { myChat } from "@/trigger/chat";
+
+  export const startChatSession = chat.createStartSessionAction<typeof myChat>("my-chat");
+
+  // In the browser, threaded from the transport's typed startSession callback:
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    startSession: ({ chatId, clientData }) => startChatSession({ chatId, clientData }),
+    // ...
+  });
+  ```
+
+  `ChatStartSessionParams` gains a typed `clientData` field — folded into the first run's `payload.metadata` so `onPreload` / `onChatStart` see the same shape per-turn `metadata` carries via the transport. The opaque session-level `metadata` field is unchanged.
+
+- Unit-test `chat.agent` definitions offline with `mockChatAgent` from `@trigger.dev/sdk/ai/test`. Drives a real agent's turn loop in-process — no network, no task runtime — so you can send messages, actions, and stop signals via driver methods, inspect captured output chunks, and verify hooks fire. Pairs with `MockLanguageModelV3` from `ai/test` for model mocking. `setupLocals` lets you pre-seed `locals` (DB clients, service stubs) before `run()` starts. ([#3543](https://github.com/triggerdotdev/trigger.dev/pull/3543))
+
+  The broader `runInMockTaskContext` harness it's built on lives at `@trigger.dev/core/v3/test` — useful for unit-testing any task code, not just chat.
+
+- Add `region` to the runs list / retrieve API: filter runs by region (`runs.list({ region: "..." })` / `filter[region]=<masterQueue>`) and read each run's executing region from the new `region` field on the response. ([#3612](https://github.com/triggerdotdev/trigger.dev/pull/3612))
+- Updated dependencies:
+  - `@trigger.dev/core@4.5.0-rc.0`
+
 ## 4.4.6
 
 ### Patch Changes
diff --git a/packages/trigger-sdk/package.json b/packages/trigger-sdk/package.json
index 18446acbb9f..f1780901ab0 100644
--- a/packages/trigger-sdk/package.json
+++ b/packages/trigger-sdk/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@trigger.dev/sdk",
-  "version": "4.4.6",
+  "version": "4.5.0-rc.1",
   "description": "trigger.dev Node.JS SDK",
   "license": "MIT",
   "publishConfig": {
@@ -72,7 +72,7 @@
   "dependencies": {
     "@opentelemetry/api": "1.9.0",
     "@opentelemetry/semantic-conventions": "1.36.0",
-    "@trigger.dev/core": "workspace:4.4.6",
+    "@trigger.dev/core": "workspace:4.5.0-rc.1",
     "chalk": "^5.2.0",
     "cronstrue": "^2.21.0",
     "debug": "^4.3.4",
diff --git a/packages/trigger-sdk/src/v3/ai.ts b/packages/trigger-sdk/src/v3/ai.ts
index 81994a03685..d5c176a4a56 100644
--- a/packages/trigger-sdk/src/v3/ai.ts
+++ b/packages/trigger-sdk/src/v3/ai.ts
@@ -35,6 +35,7 @@ import {
   type TaskWithSchema,
   SESSION_IN_EVENT_ID_HEADER,
   TRIGGER_CONTROL_SUBTYPE,
+  generateJWT,
   type WriterStreamOptions,
 } from "@trigger.dev/core/v3";
 import type {
@@ -77,7 +78,7 @@ import type { ResolvedSkill } from "./skill.js";
 // never touches `ai.ts`'s module graph, so the `node:*` builtins
 // pulled in transitively here never reach a client chunk.
 import { runBashInSkill, readFileInSkill } from "./agentSkillsRuntime.js";
-import { streams } from "./streams.js";
+import { streams, markChatAgentRunForStreamsWarning } from "./streams.js";
 import {
   sessions,
   type SessionHandle,
@@ -218,20 +219,6 @@ async function findLatestSessionInCursor(
  */
 export type { ChatSnapshotV1 } from "@trigger.dev/core/v3";
 
-/**
- * S3 key suffix for a session's snapshot blob. The webapp's presigned-URL
- * routes prefix this with `packets/{projectRef}/{envSlug}/` server-side, so
- * the final S3 key lands at
- * `packets/{projectRef}/{envSlug}/sessions/{sessionId}/snapshot.json`.
- *
- * Stable per session: the friendlyId persists across `chat.requestUpgrade`
- * continuations and idle-suspend restarts.
- * @internal
- */
-function snapshotFilename(sessionId: string): string {
-  return `sessions/${sessionId}/snapshot.json`;
-}
-
 /**
  * Test-only override hook — `mockChatAgent` installs a fake to return
  * synthetic snapshots without hitting S3. Mirrors the `__set*ImplForTests`
@@ -284,7 +271,7 @@ async function readChatSnapshot<TUIMessage extends UIMessage>(
   const apiClient = apiClientManager.clientOrThrow();
   let presignedUrl: string;
   try {
-    const resp = await apiClient.getPayloadUrl(snapshotFilename(sessionId));
+    const resp = await apiClient.getChatSnapshotUrl(sessionId);
     presignedUrl = resp.presignedUrl;
   } catch (error) {
     logger.warn("chat.agent: snapshot presign (read) failed; continuing without snapshot", {
@@ -359,7 +346,7 @@ async function writeChatSnapshot<TUIMessage extends UIMessage>(
   const apiClient = apiClientManager.clientOrThrow();
   let presignedUrl: string;
   try {
-    const resp = await apiClient.createUploadPayloadUrl(snapshotFilename(sessionId));
+    const resp = await apiClient.createChatSnapshotUploadUrl(sessionId);
     presignedUrl = resp.presignedUrl;
   } catch (error) {
     logger.warn("chat.agent: snapshot presign (write) failed; next run will replay further", {
@@ -499,10 +486,29 @@ export function __mergeByIdReplaceWinsForTests<TUIMessage extends UIMessage>(
  * part of the public API.
  * @internal
  */
+type ReplaySessionOutTailResult<TUIMessage extends UIMessage> = {
+  /** Messages whose `finish` chunk landed before the run died. Safe to seed the chain. */
+  settled: TUIMessage[];
+  /**
+   * The trailing assistant message whose `finish` chunk never arrived —
+   * an orphan from a cancel / crash / OOM. `cleanupAbortedParts` has
+   * already stripped streaming-in-progress fragments. `undefined` if
+   * the tail ended cleanly (every segment closed).
+   */
+  partial: TUIMessage | undefined;
+  /**
+   * The trailing assistant message BEFORE `cleanupAbortedParts` ran. Same
+   * `undefined` semantics as `partial`. Use this when you need to inspect
+   * tool parts the cleanup would strip (e.g. `input-available` /
+   * `input-streaming` orphans surfaced via `pendingToolCalls`).
+   */
+  partialRaw: TUIMessage | undefined;
+};
+
 type ReplaySessionOutTailImpl = <TUIMessage extends UIMessage>(
   sessionId: string,
   options?: { lastEventId?: string }
-) => Promise<TUIMessage[]>;
+) => Promise<ReplaySessionOutTailResult<TUIMessage>>;
 let replaySessionOutTailImpl: ReplaySessionOutTailImpl | undefined;
 
 export function __setReplaySessionOutTailImplForTests(
@@ -543,7 +549,7 @@ export function __setReplaySessionOutTailImplForTests(
 async function replaySessionOutTail<TUIMessage extends UIMessage>(
   sessionId: string,
   options?: { lastEventId?: string }
-): Promise<TUIMessage[]> {
+): Promise<ReplaySessionOutTailResult<TUIMessage>> {
   if (replaySessionOutTailImpl) {
     return await replaySessionOutTailImpl<TUIMessage>(sessionId, options);
   }
@@ -553,16 +559,12 @@ async function replaySessionOutTail<TUIMessage extends UIMessage>(
   });
   const collected: UIMessageChunk[] = [];
   for (const record of response.records) {
-    // Each record's `data` is the JSON-encoded chunk body the agent
-    // wrote at append time. The records endpoint returns it as an
-    // opaque string so the parsing cost is paid here, not on the
-    // server's hot path.
-    let chunk: unknown;
-    try {
-      chunk = JSON.parse(record.data);
-    } catch {
-      continue;
-    }
+    // `data` is the chunk object as written by the SDK's session-out
+    // writer (an AI SDK `UIMessageChunk` or a Trigger control object).
+    // The route forwards it as-is — no JSON envelope to unwrap here.
+    // Defensive shape checks below tolerate malformed records by
+    // skipping them instead of throwing.
+    const chunk: unknown = record.data;
     if (!chunk || typeof chunk !== "object") continue;
     const type = (chunk as { type?: unknown }).type;
     if (typeof type !== "string") continue;
@@ -572,7 +574,7 @@ async function replaySessionOutTail<TUIMessage extends UIMessage>(
     if (type.startsWith("trigger:")) continue;
     collected.push(chunk as UIMessageChunk);
   }
-  if (collected.length === 0) return [];
+  if (collected.length === 0) return { settled: [], partial: undefined, partialRaw: undefined };
 
   // Split chunks into per-message segments. A `start` chunk demarcates the
   // beginning of an assistant message; chunks before any `start` (rare —
@@ -601,7 +603,9 @@ async function replaySessionOutTail<TUIMessage extends UIMessage>(
     }
   }
 
-  const messages: TUIMessage[] = [];
+  const settled: TUIMessage[] = [];
+  let partial: TUIMessage | undefined;
+  let partialRaw: TUIMessage | undefined;
   for (let i = 0; i < segments.length; i++) {
     const seg = segments[i]!;
     const isTrailing = i === segments.length - 1 && !seg.closed;
@@ -630,12 +634,17 @@ async function replaySessionOutTail<TUIMessage extends UIMessage>(
     if (isTrailing) {
       const cleaned = cleanupAbortedParts(last as TUIMessage);
       if (cleaned.parts.length === 0) continue;
-      messages.push(cleaned);
+      partial = cleaned;
+      // Keep the raw pre-cleanup message too — recovery boot extracts
+      // `pendingToolCalls` from it, since `cleanupAbortedParts` strips
+      // exactly the input-streaming / input-available tool parts that
+      // we want to surface.
+      partialRaw = last as TUIMessage;
     } else {
-      messages.push(last as TUIMessage);
+      settled.push(last as TUIMessage);
     }
   }
-  return messages;
+  return { settled, partial, partialRaw };
 }
 
 /**
@@ -662,12 +671,125 @@ export async function __replaySessionOutTailProductionPathForTests<
   const saved = replaySessionOutTailImpl;
   replaySessionOutTailImpl = undefined;
   try {
-    return await replaySessionOutTail<TUIMessage>(sessionId, options);
+    const { settled, partial } = await replaySessionOutTail<TUIMessage>(sessionId, options);
+    return partial !== undefined ? [...settled, partial] : settled;
   } finally {
     replaySessionOutTailImpl = saved;
   }
 }
 
+/**
+ * Test-only override hook for `replaySessionInTail`. Mirrors
+ * `__setReplaySessionOutTailImplForTests` so unit tests can drive the boot
+ * loop's chain-reconstruction logic without an HTTP round-trip.
+ * @internal
+ */
+type ReplaySessionInTailImpl = <TUIMessage extends UIMessage>(
+  sessionId: string,
+  options?: { lastEventId?: string }
+) => Promise<{ message: TUIMessage; metadata: unknown; seqNum: number }[]>;
+let replaySessionInTailImpl: ReplaySessionInTailImpl | undefined;
+
+export function __setReplaySessionInTailImplForTests(
+  impl: ReplaySessionInTailImpl | undefined
+): void {
+  replaySessionInTailImpl = impl;
+}
+
+/**
+ * Drain `session.in` from `lastEventId` (or the start) and surface the user
+ * messages that landed past the cursor. Mirror of `replaySessionOutTail` —
+ * both reads run at continuation boot so the SDK can reconstruct
+ * conversational order across a dead run that never wrote `onTurnComplete`.
+ *
+ * `session.in` carries the {@link ChatInputChunk} tagged union:
+ *   - `kind: "message"` — a `ChatTaskWirePayload` envelope for a new user
+ *     message (`trigger: "submit-message"`) or a regeneration. Only the
+ *     submit-message records carry a `payload.message`; regenerations,
+ *     preload / close / action / handover-prepare have no message.
+ *   - `kind: "stop"` — mid-turn cancellation signal. Not a message.
+ *   - `kind: "handover"` / `kind: "handover-skip"` — head-start signals.
+ *     Not user messages.
+ *
+ * This function filters to the first variant and returns the embedded
+ * `UIMessage`s in seq_num order, paired with their seq_num so the caller
+ * can advance the session.in cursor past them.
+ *
+ * Errors are propagated to the caller (the boot loop wraps in try/catch
+ * and `logger.warn`s); we don't swallow here so test code can observe
+ * failures directly.
+ * @internal
+ */
+async function replaySessionInTail<TUIMessage extends UIMessage>(
+  sessionId: string,
+  options?: { lastEventId?: string }
+): Promise<{ message: TUIMessage; metadata: unknown; seqNum: number }[]> {
+  if (replaySessionInTailImpl) {
+    return await replaySessionInTailImpl<TUIMessage>(sessionId, options);
+  }
+  const apiClient = apiClientManager.clientOrThrow();
+  const response = await apiClient.readSessionStreamRecords(sessionId, "in", {
+    afterEventId: options?.lastEventId,
+  });
+  const out: { message: TUIMessage; metadata: unknown; seqNum: number }[] = [];
+  for (const record of response.records) {
+    // session.in writers POST `JSON.stringify(chunk)` directly; the
+    // webapp wraps that in `{ data: <string>, id }` and stores it on
+    // S2. The records endpoint hands `data` back as the original
+    // string — unlike session.out (where the writer puts a chunk
+    // OBJECT into the envelope and the route forwards it as an
+    // object). Defensive: handle both shapes so future writer changes
+    // on either side don't silently lose records.
+    let chunk: unknown = record.data;
+    if (typeof chunk === "string") {
+      try {
+        chunk = JSON.parse(chunk);
+      } catch {
+        continue;
+      }
+    }
+    if (!chunk || typeof chunk !== "object") continue;
+    const kind = (chunk as { kind?: unknown }).kind;
+    if (kind !== "message") continue;
+    const payload = (
+      chunk as {
+        payload?: { trigger?: unknown; message?: unknown; metadata?: unknown };
+      }
+    ).payload;
+    if (!payload || payload.trigger !== "submit-message") continue;
+    const message = payload.message;
+    if (!message || typeof message !== "object") continue;
+    out.push({
+      message: message as TUIMessage,
+      metadata: payload.metadata,
+      seqNum: record.seqNum,
+    });
+  }
+  return out;
+}
+
+/**
+ * Test-only entry point that bypasses
+ * `__setReplaySessionInTailImplForTests` and reaches the real
+ * `apiClient.readSessionStreamRecords` + filter pipeline. Mirrors
+ * `__replaySessionOutTailProductionPathForTests`.
+ * @internal
+ */
+export async function __replaySessionInTailProductionPathForTests<
+  TUIMessage extends UIMessage,
+>(
+  sessionId: string,
+  options?: { lastEventId?: string }
+): Promise<{ message: TUIMessage; metadata: unknown; seqNum: number }[]> {
+  const saved = replaySessionInTailImpl;
+  replaySessionInTailImpl = undefined;
+  try {
+    return await replaySessionInTail<TUIMessage>(sessionId, options);
+  } finally {
+    replaySessionInTailImpl = saved;
+  }
+}
+
 /**
  * Resolve the Session handle for the current chat.agent run.
  *
@@ -1899,6 +2021,40 @@ function* iterateToolParts(
   }
 }
 
+/**
+ * Walk a partial assistant message and surface the tool calls the model
+ * had started but never received a result for. Used at recovery boot to
+ * populate `RecoveryBootEvent.pendingToolCalls`.
+ *
+ * The partial assistant in question is the orphan from a dead run — its
+ * `turn-complete` never fired, so any `input-available` tool part is
+ * truly orphan (NOT a stable HITL pause; HITL parts live on settled
+ * messages, not on the partial).
+ *
+ * @internal
+ */
+function extractPendingToolCallsFromPartial(
+  partial: UIMessage | undefined
+): RecoveryPendingToolCall[] {
+  if (!partial) return [];
+  const out: RecoveryPendingToolCall[] = [];
+  const parts = (partial.parts ?? []) as any[];
+  for (let i = 0; i < parts.length; i++) {
+    const part = parts[i];
+    if (!isToolUIPart(part)) continue;
+    if (!isPendingToolState(part.state)) continue;
+    const toolCallId = part.toolCallId;
+    if (typeof toolCallId !== "string" || toolCallId.length === 0) continue;
+    out.push({
+      toolCallId,
+      toolName: getToolName(part),
+      input: part.input,
+      partIndex: i,
+    });
+  }
+  return out;
+}
+
 /**
  * Tool parts on the *leaf* assistant message that are still waiting on
  * an answer (`input-available` state). Used to gate fresh user turns
@@ -3530,6 +3686,126 @@ export type BootEvent<TClientData = unknown> = {
   preloaded: boolean;
 };
 
+/**
+ * A tool call extracted from the partial assistant message of a dead run.
+ * Surfaced on `RecoveryBootEvent.pendingToolCalls` so the customer can
+ * decide how to repair the chain (synthesize a result, drop the partial,
+ * etc.).
+ */
+export type RecoveryPendingToolCall = {
+  /** The AI SDK tool call id. */
+  toolCallId: string;
+  /** The tool name (the `tool-${name}` suffix). */
+  toolName: string;
+  /** The input the model produced for the tool call. */
+  input: unknown;
+  /** The part index inside `partialAssistant.parts` for in-place edits. */
+  partIndex: number;
+};
+
+/**
+ * Event passed to the `onRecoveryBoot` callback.
+ *
+ * Fires once at boot when a continuation run inherits in-flight state from
+ * a dead predecessor (cancel / crash / OOM / deploy eviction / graceful
+ * `chat.requestUpgrade`). The runtime reads both `session.in` and
+ * `session.out` past the last `turn-complete` cursor and surfaces the
+ * recovered pieces here so the customer can shape the conversational
+ * chain before the first turn fires.
+ *
+ * Does NOT fire when there's nothing to recover (clean continuation after
+ * `chat.endRun()` with no buffered user messages, fresh chat, OOM retry
+ * after a successful turn-complete with no in-flight tail).
+ *
+ * Does NOT fire when `hydrateMessages` is registered (the customer owns
+ * persistence; recovery decisions live in their own DB query).
+ */
+export type RecoveryBootEvent<TUIM extends UIMessage = UIMessage> = {
+  /** Task run context — same as `task({ run })` second-argument `ctx`. */
+  ctx: TaskRunContext;
+  /** The unique identifier for the chat session. */
+  chatId: string;
+  /** The Trigger.dev run ID for this run boot. */
+  runId: string;
+  /** Public id of the prior run that died. */
+  previousRunId: string;
+  /**
+   * Best-effort cause of the predecessor's death. Currently always
+   * `"unknown"` — the run engine doesn't yet plumb the real reason
+   * into the continuation payload. Future SDK versions will narrow
+   * this. Don't branch behavior on it yet.
+   */
+  cause: "cancelled" | "crashed" | "unknown";
+  /**
+   * The conversation chain that was successfully persisted by the
+   * predecessor's last `onTurnComplete`. Empty if the predecessor died
+   * before turn 1 ever completed.
+   */
+  settledMessages: TUIM[];
+  /**
+   * User messages that arrived on `session.in` past the cursor — i.e.
+   * the message(s) the predecessor was processing or had queued when
+   * it died. The runtime's default is to re-dispatch each as a fresh
+   * turn after the chain is restored. Return a different list via
+   * `recoveredTurns` to skip / reorder / collapse them.
+   */
+  inFlightUsers: TUIM[];
+  /**
+   * The trailing assistant message the predecessor was streaming when
+   * it died — the orphan whose `turn-complete` never fired. Undefined
+   * if the predecessor died before any assistant output reached
+   * `session.out` (cancel-before-first-token, snapshot-only path).
+   */
+  partialAssistant: TUIM | undefined;
+  /**
+   * Tool calls extracted from `partialAssistant.parts` that the model
+   * had started but the tool runtime never resolved. Empty when
+   * `partialAssistant` is undefined or carries no `input-available`
+   * tool parts.
+   */
+  pendingToolCalls: RecoveryPendingToolCall[];
+  /**
+   * Lazy session.out writer — identical to the `writer` passed to
+   * `onTurnStart` / `onTurnComplete` / `onChatStart`. Use this to emit
+   * a recovery signal (e.g. a `data-chat-recovery` UIMessage chunk)
+   * BEFORE the first recovered turn fires so the bridge can render a
+   * "recovering..." banner. Lazy: no overhead if unused.
+   */
+  writer: ChatWriter;
+};
+
+/**
+ * Return shape for the `onRecoveryBoot` callback. Every field is optional —
+ * omit one to accept the default.
+ */
+export type RecoveryBootResult<TUIM extends UIMessage = UIMessage> = {
+  /**
+   * The chain the new run boots with. Replaces the default
+   * (`settledMessages`). Use this to keep the partial assistant in
+   * context, mutate its tool parts to inject synthesized results,
+   * collapse history, etc.
+   *
+   * Ignored when `hydrateMessages` is registered (the hydrate hook
+   * runs per-turn and overwrites the chain).
+   */
+  chain?: TUIM[];
+  /**
+   * The user messages to re-dispatch as fresh turns after the chain is
+   * restored. Default: `inFlightUsers` (re-process every in-flight
+   * user). Return `[]` to suppress all of them; return a filtered /
+   * reordered subset to skip specific ones.
+   */
+  recoveredTurns?: TUIM[];
+  /**
+   * Awaitable run AFTER the writer flushes and BEFORE the first
+   * recovered turn fires. Use for blocking persistence (e.g. write the
+   * partial assistant to your DB so a follow-up turn can reference
+   * it). Errors bubble — wrap your own try/catch if you want to soft-
+   * fail.
+   */
+  beforeBoot?: () => Promise<void>;
+};
+
 /**
  * Event passed to the `onChatStart` callback.
  *
@@ -4015,6 +4291,43 @@ export type ChatAgentOptions<
    */
   onBoot?: (event: BootEvent<inferSchemaOut<TClientDataSchema>>) => Promise<void> | void;
 
+  /**
+   * Recovery boot hook — fires once on a continuation run that inherited
+   * in-flight state from a dead predecessor (cancel / crash / OOM /
+   * deploy eviction / `chat.requestUpgrade()`). The runtime reads both
+   * stream tails past the last `turn-complete` cursor and hands the
+   * customer the recovered pieces (settled chain, in-flight users,
+   * partial assistant, pending tool calls) so the chain can be shaped
+   * before the first recovered turn fires.
+   *
+   * Does NOT fire when there's nothing to recover — e.g. a clean
+   * continuation after `chat.endRun()` with no buffered user, a fresh
+   * chat, or an OOM retry on top of a complete snapshot.
+   *
+   * Does NOT fire when `hydrateMessages` is registered — that hook owns
+   * the per-turn chain and overlapping recovery decisions belong in the
+   * customer's DB.
+   *
+   * Defaults (returned when the hook is omitted or returns no field):
+   *   - `chain` = `settledMessages` (drop the orphan partial)
+   *   - `recoveredTurns` = `inFlightUsers` (re-dispatch every user)
+   *
+   * @example
+   * ```ts
+   * onRecoveryBoot: async ({ partialAssistant, inFlightUsers, writer, cause }) => {
+   *   writer.write({
+   *     type: "data-chat-recovery",
+   *     id: generateId(),
+   *     data: { cause, partial: partialAssistant?.id },
+   *   });
+   *   return {}; // accept defaults: drop partial, re-dispatch users
+   * }
+   * ```
+   */
+  onRecoveryBoot?: (
+    event: RecoveryBootEvent<TUIMessage>
+  ) => Promise<RecoveryBootResult<TUIMessage> | void> | RecoveryBootResult<TUIMessage> | void;
+
   /**
    * Called when a preloaded run starts, before the first message arrives.
    *
@@ -4495,6 +4808,7 @@ function chatCustomAgent<
       // No client-side upsert needed.
       locals.set(chatSessionHandleKey, sessions.open(payload.chatId));
       locals.set(chatAgentRunContextKey, runOptions.ctx);
+      markChatAgentRunForStreamsWarning();
       taskContext.setConversationId(payload.chatId);
       stampConversationIdOnActiveSpan(payload.chatId);
       return userRun(payload, runOptions);
@@ -4524,6 +4838,7 @@ function chatAgent<
     run: userRun,
     clientDataSchema,
     onBoot,
+    onRecoveryBoot,
     onPreload,
     onChatStart,
     onValidateMessages,
@@ -4591,6 +4906,7 @@ function chatAgent<
       // Mutable holder; advances in `writeTurnCompleteChunk` after each turn
       // and is the trim target for the NEXT turn's trim record.
       locals.set(lastTurnCompleteSeqNumKey, { value: undefined });
+      markChatAgentRunForStreamsWarning();
       taskContext.setConversationId(payload.chatId);
 
       // Stamp `gen_ai.conversation.id` on the run-level span. Every
@@ -4653,51 +4969,136 @@ function chatAgent<
       // swallow errors internally; the agent stays available either way.
       const sessionIdForSnapshot = payload.sessionId ?? payload.chatId;
       let bootSnapshot: ChatSnapshotV1<TUIMessage> | undefined;
-      let replayed: TUIMessage[] = [];
+      let replayedSettled: TUIMessage[] = [];
+      let replayedPartial: TUIMessage | undefined;
+      let replayedPartialRaw: TUIMessage | undefined;
+      let replayedInTail: { message: TUIMessage; metadata: unknown; seqNum: number }[] = [];
+      // Wire payloads to dispatch as turns before the regular session.in
+      // pump kicks in. Populated by `onRecoveryBoot.recoveredTurns` (or its
+      // default, `inFlightUsers`). The turn-loop checks this queue ahead of
+      // `messagesInput.waitWithIdleTimeout` so recovered turns fire first.
+      const bootInjectedQueue: ChatTaskWirePayload<
+        TUIMessage,
+        inferSchemaIn<TClientDataSchema>
+      >[] = [];
       const couldHavePriorState =
         payload.continuation === true || ctx.attempt.number > 1;
 
       if (!hydrateMessages && couldHavePriorState) {
-        try {
-          bootSnapshot = await tracer.startActiveSpan(
-            "chat.boot.snapshot.read",
-            async () => readChatSnapshot<TUIMessage>(sessionIdForSnapshot)
-          );
-        } catch (error) {
-          // `readChatSnapshot` already swallows + warns internally; this catch
-          // is just belt-and-suspenders against tracer/span errors.
-          logger.warn("chat.agent: snapshot read failed; continuing without snapshot", {
-            error: error instanceof Error ? error.message : String(error),
-            sessionId: sessionIdForSnapshot,
-          });
-        }
+        // Single parent span for the whole boot read phase — snapshot
+        // read, session.out replay, session.in replay. Per-phase timing
+        // + result counts are attributes on the span.
+        await tracer.startActiveSpan(
+          "chat.boot",
+          async (bootSpan) => {
+            // snapshot read
+            const snapStart = Date.now();
+            try {
+              bootSnapshot = await readChatSnapshot<TUIMessage>(sessionIdForSnapshot);
+            } catch (error) {
+              // `readChatSnapshot` already swallows + warns internally; this catch
+              // is just belt-and-suspenders against tracer/span errors.
+              logger.warn(
+                "chat.agent: snapshot read failed; continuing without snapshot",
+                {
+                  error: error instanceof Error ? error.message : String(error),
+                  sessionId: sessionIdForSnapshot,
+                }
+              );
+            }
+            bootSpan.setAttribute("chat.boot.snapshot.durationMs", Date.now() - snapStart);
+            bootSpan.setAttribute("chat.boot.snapshot.present", !!bootSnapshot);
+            bootSpan.setAttribute(
+              "chat.boot.snapshot.messageCount",
+              bootSnapshot?.messages?.length ?? 0
+            );
 
-        // Seed the trim chain from the snapshot's `lastOutEventId` (the SSE
-        // id of the previous turn's `turn-complete` control record). The
-        // first turn-complete this worker writes will then trim back to it.
-        // Without seeding, the new worker would emit no trim on its first
-        // turn (chain self-bootstraps from turn 2), so this is purely an
-        // optimization to keep continuation runs bounded from the first turn.
-        if (bootSnapshot?.lastOutEventId !== undefined) {
-          const seeded = Number.parseInt(bootSnapshot.lastOutEventId, 10);
-          if (Number.isFinite(seeded)) {
-            const slot = locals.get(lastTurnCompleteSeqNumKey);
-            if (slot) slot.value = seeded;
-          }
-        }
+            // Seed the trim chain from the snapshot's `lastOutEventId` (the SSE
+            // id of the previous turn's `turn-complete` control record). The
+            // first turn-complete this worker writes will then trim back to it.
+            // Without seeding, the new worker would emit no trim on its first
+            // turn (chain self-bootstraps from turn 2), so this is purely an
+            // optimization to keep continuation runs bounded from the first turn.
+            if (bootSnapshot?.lastOutEventId !== undefined) {
+              const seeded = Number.parseInt(bootSnapshot.lastOutEventId, 10);
+              if (Number.isFinite(seeded)) {
+                const slot = locals.get(lastTurnCompleteSeqNumKey);
+                if (slot) slot.value = seeded;
+              }
+            }
 
-        try {
-          replayed = await tracer.startActiveSpan("chat.boot.replay", async () =>
-            replaySessionOutTail<TUIMessage>(sessionIdForSnapshot, {
-              lastEventId: bootSnapshot?.lastOutEventId,
-            })
-          );
-        } catch (error) {
-          logger.warn("chat.agent: session.out replay failed; using snapshot only", {
-            error: error instanceof Error ? error.message : String(error),
-            sessionId: sessionIdForSnapshot,
-          });
-        }
+            // session.out replay
+            const replayOutStart = Date.now();
+            try {
+              const replayResult = await replaySessionOutTail<TUIMessage>(
+                sessionIdForSnapshot,
+                { lastEventId: bootSnapshot?.lastOutEventId }
+              );
+              replayedSettled = replayResult.settled;
+              replayedPartial = replayResult.partial;
+              replayedPartialRaw = replayResult.partialRaw;
+            } catch (error) {
+              logger.warn(
+                "chat.agent: session.out replay failed; using snapshot only",
+                {
+                  error: error instanceof Error ? error.message : String(error),
+                  sessionId: sessionIdForSnapshot,
+                }
+              );
+            }
+            bootSpan.setAttribute(
+              "chat.boot.replay.out.durationMs",
+              Date.now() - replayOutStart
+            );
+            bootSpan.setAttribute("chat.boot.replay.out.settledCount", replayedSettled.length);
+            bootSpan.setAttribute(
+              "chat.boot.replay.out.partialPresent",
+              replayedPartial !== undefined
+            );
+
+            // session.in tail read
+            //
+            // session.in carries the user-side of the conversation
+            // (ChatInputChunk records). On a continuation boot we read past
+            // the last turn-complete's `session-in-event-id` header so any
+            // user message the dead predecessor hadn't acknowledged surfaces
+            // here. Without this read, in-flight user messages would only be
+            // visible via the live SSE subscription — by which point they
+            // would arrive AFTER the partial-assistant orphan and look like
+            // brand-new turns to the model, producing inverted chains.
+            const replayInStart = Date.now();
+            const lastInEventId = await findLatestSessionInCursor(payload.chatId)
+              .then((cursor) => (cursor !== undefined ? String(cursor) : undefined))
+              .catch(() => undefined);
+            try {
+              replayedInTail = await replaySessionInTail<TUIMessage>(payload.chatId, {
+                lastEventId: lastInEventId,
+              });
+            } catch (error) {
+              logger.warn(
+                "chat.agent: session.in replay failed; in-flight users may not be recovered",
+                { error: error instanceof Error ? error.message : String(error) }
+              );
+            }
+            bootSpan.setAttribute(
+              "chat.boot.replay.in.durationMs",
+              Date.now() - replayInStart
+            );
+            bootSpan.setAttribute(
+              "chat.boot.replay.in.userCount",
+              replayedInTail.length
+            );
+          },
+          {
+            attributes: {
+              [SemanticInternalAttributes.STYLE_ICON]: "tabler-rotate-clockwise",
+              [SemanticInternalAttributes.COLLAPSED]: true,
+              "chat.id": payload.chatId,
+              "chat.continuation": payload.continuation ?? false,
+              "chat.attempt": ctx.attempt.number,
+            },
+          }
+        );
       }
 
       // ── session.in resume cursor ───────────────────────────────────
@@ -4745,12 +5146,158 @@ function chatAgent<
         }
       }
 
-      // ── Merge + head-start bootstrap ────────────────────────────────
+      // ── Recovery boot + chain reconstruction ────────────────────────
       if (!hydrateMessages) {
-        accumulatedUIMessages = mergeByIdReplaceWins<TUIMessage>(
+        const settledMessages = mergeByIdReplaceWins<TUIMessage>(
           (bootSnapshot?.messages as TUIMessage[]) ?? [],
-          replayed
+          replayedSettled
         );
+        const inFlightUsers = replayedInTail.map((r) => r.message);
+        const partialAssistant = replayedPartial;
+        // Fire the hook only when there's a partial assistant — the
+        // mid-stream-died signal. In-flight users alone (no partial)
+        // cover graceful exits like `chat.requestUpgrade()` where the
+        // predecessor chose to end before processing the message;
+        // those route through the normal continuation-wait path.
+        const hasRecoveredState = partialAssistant !== undefined;
+
+        let hookChain: TUIMessage[] | undefined;
+        let hookRecoveredTurns: TUIMessage[] | undefined;
+        let hookBeforeBoot: (() => Promise<void>) | undefined;
+        if (couldHavePriorState && hasRecoveredState && onRecoveryBoot) {
+          // Extract from the RAW partial (pre-cleanup). `cleanupAbortedParts`
+          // strips exactly the input-streaming / input-available tool parts
+          // we want to surface here, so the cleaned `partialAssistant` would
+          // always report zero pending tool calls.
+          const pendingToolCalls = extractPendingToolCallsFromPartial(replayedPartialRaw);
+          const previousRunIdForHook = previousRunId ?? "";
+          let hookResult: RecoveryBootResult<TUIMessage> | void = undefined;
+          const { writer: hookWriter, flush: hookFlush } = createLazyChatWriter();
+          try {
+            hookResult = await tracer.startActiveSpan(
+              "onRecoveryBoot()",
+              async () =>
+                onRecoveryBoot({
+                  ctx,
+                  chatId: payload.chatId,
+                  runId: ctx.run.id,
+                  previousRunId: previousRunIdForHook,
+                  cause: "unknown",
+                  settledMessages,
+                  inFlightUsers,
+                  partialAssistant,
+                  pendingToolCalls,
+                  writer: hookWriter,
+                }),
+              {
+                attributes: {
+                  [SemanticInternalAttributes.STYLE_ICON]: "task-hook-onStart",
+                  [SemanticInternalAttributes.COLLAPSED]: true,
+                  "chat.id": payload.chatId,
+                },
+              }
+            );
+          } catch (error) {
+            logger.warn("chat.agent: onRecoveryBoot threw; using defaults", {
+              error: error instanceof Error ? error.message : String(error),
+              chatId: payload.chatId,
+            });
+          }
+          // Flush any chunks the hook wrote so they land on session.out
+          // BEFORE the first recovered turn fires.
+          try {
+            await hookFlush();
+          } catch (error) {
+            logger.warn("chat.agent: onRecoveryBoot writer flush failed", {
+              error: error instanceof Error ? error.message : String(error),
+            });
+          }
+          if (hookResult && typeof hookResult === "object") {
+            if (Array.isArray(hookResult.chain)) hookChain = hookResult.chain;
+            if (Array.isArray(hookResult.recoveredTurns))
+              hookRecoveredTurns = hookResult.recoveredTurns;
+            if (typeof hookResult.beforeBoot === "function")
+              hookBeforeBoot = hookResult.beforeBoot;
+          }
+        }
+
+        // Default: splice partial + the user it was answering into
+        // the chain so follow-ups like "keep going" still have context.
+        let seedChain: TUIMessage[];
+        let recoveredTurns: TUIMessage[];
+        if (hookChain !== undefined) {
+          seedChain = hookChain;
+        } else if (partialAssistant !== undefined && inFlightUsers.length > 0) {
+          seedChain = [...settledMessages, inFlightUsers[0]!, partialAssistant];
+        } else {
+          seedChain = settledMessages;
+        }
+        if (hookRecoveredTurns !== undefined) {
+          recoveredTurns = hookRecoveredTurns;
+        } else if (partialAssistant !== undefined && inFlightUsers.length > 0) {
+          recoveredTurns = inFlightUsers.slice(1);
+        } else {
+          recoveredTurns = inFlightUsers;
+        }
+        // `beforeBoot` errors bubble — the customer opted into blocking
+        // persistence and a failure there should fail the run rather than
+        // dispatch recovered turns against half-persisted state.
+        if (hookBeforeBoot) {
+          await hookBeforeBoot();
+        }
+
+        // Advance the session.in cursor past every recovered user so
+        // the live subscription doesn't re-deliver them.
+        if (replayedInTail.length > 0) {
+          const lastRecoveredSeq = replayedInTail[replayedInTail.length - 1]!.seqNum;
+          const currentCursor = sessionStreams.lastSeqNum(payload.chatId, "in");
+          if (currentCursor === undefined || lastRecoveredSeq > currentCursor) {
+            sessionStreams.setLastSeqNum(payload.chatId, "in", lastRecoveredSeq);
+            sessionStreams.setLastDispatchedSeqNum(payload.chatId, "in", lastRecoveredSeq);
+          }
+        }
+
+        // Synthesize wire payloads for each recoveredTurn. The turn-loop
+        // pops these ahead of `messagesInput.waitWithIdleTimeout` so they
+        // dispatch as normal turns with the existing hook stack.
+        //
+        // Per-record metadata preservation: each session.in record
+        // carries its own `payload.metadata` (the transport sets it at
+        // send time). Look up the original by message id so a recovered
+        // turn dispatches with the metadata its writer actually sent.
+        // Fall back to the boot payload's metadata for hook-synthesized
+        // messages (customer returned a recoveredTurn with no matching
+        // session.in record).
+        //
+        // OOM-retry dedup: if `payload.message` is the same user message
+        // the queue is about to redispatch (the wire payload survives
+        // across attempts, but session.in records it once), the wire
+        // payload already runs turn 0 — drop the duplicate from the queue
+        // so we don't fire the same turn twice.
+        const wireMessageId =
+          (payload.message as { id?: string } | undefined)?.id;
+        const metadataById = new Map<string, unknown>();
+        for (const entry of replayedInTail) {
+          metadataById.set(entry.message.id, entry.metadata);
+        }
+        for (const msg of recoveredTurns) {
+          if (wireMessageId && msg.id === wireMessageId) continue;
+          const recoveredMetadata = metadataById.has(msg.id)
+            ? metadataById.get(msg.id)
+            : payload.metadata;
+          bootInjectedQueue.push({
+            chatId: payload.chatId,
+            sessionId: payload.sessionId,
+            metadata: recoveredMetadata,
+            trigger: "submit-message",
+            message: msg,
+            messageId: msg.id,
+            continuation: payload.continuation,
+            previousRunId: payload.previousRunId,
+          } as ChatTaskWirePayload<TUIMessage, inferSchemaIn<TClientDataSchema>>);
+        }
+
+        accumulatedUIMessages = seedChain;
 
         // ── Head-start bootstrap ─────────────────────────────────────
         //
@@ -5122,6 +5669,15 @@ function chatAgent<
             parseClientData ? await parseClientData(payload.metadata) : payload.metadata
           ) as inferSchemaOut<TClientDataSchema>;
 
+          // Recovery-boot injection: if `onRecoveryBoot` (or its default
+          // `inFlightUsers`) populated `bootInjectedQueue`, dispatch the
+          // first synthesized payload as the very first turn instead of
+          // waiting on the live session.in. Subsequent recovered turns
+          // get drained by the end-of-turn picker below.
+          if (bootInjectedQueue.length > 0) {
+            currentWirePayload = bootInjectedQueue.shift()!;
+          } else {
+
           const effectiveIdleTimeout =
             idleTimeoutInSeconds ?? payload.idleTimeoutInSeconds;
           const effectiveTurnTimeout =
@@ -5194,6 +5750,7 @@ function chatAgent<
           if (currentWirePayload.trigger === "close") {
             return;
           }
+          } // end else (no boot-injected first turn)
         }
 
         for (let turn = 0; turn < maxTurns; turn++) {
@@ -6475,6 +7032,15 @@ function chatAgent<
                   // before the next message, their injected context is picked up in prepareStep.
                   // The pre-onBeforeTurnComplete drain handles promises from onTurnStart/run().
 
+                  // Recovery-boot injection: drain remaining recovered turns
+                  // before any other source. `onRecoveryBoot` (or its default)
+                  // produced these from in-flight user messages on session.in
+                  // that the dead predecessor never acknowledged.
+                  if (bootInjectedQueue.length > 0) {
+                    currentWirePayload = bootInjectedQueue.shift()!;
+                    return "continue";
+                  }
+
                   // If messages arrived during streaming (without pendingMessages config),
                   // use the first one immediately as the next turn.
                   if (pendingMessages.length > 0) {
@@ -6624,6 +7190,14 @@ function chatAgent<
                 return;
               }
 
+              // Drain remaining recovered turns before idling — a thrown
+              // recovered turn shouldn't strand the rest of the boot queue
+              // until an unrelated live message arrives.
+              if (bootInjectedQueue.length > 0) {
+                currentWirePayload = bootInjectedQueue.shift()!;
+                continue;
+              }
+
               // Wait for the next message — same as after a successful turn
               const effectiveIdleTimeout =
                 (metadata.get(IDLE_TIMEOUT_METADATA_KEY) as number | undefined) ??
@@ -6943,6 +7517,7 @@ function createChatBuilder<
         ...(config.clientDataSchema ? { clientDataSchema: config.clientDataSchema } : {}),
         uiMessageStreamOptions: mergedUiStream,
         onBoot: composeHooks(config.hooks.onBoot, options.onBoot),
+        onRecoveryBoot: options.onRecoveryBoot,
         onPreload: composeHooks(config.hooks.onPreload, options.onPreload),
         onChatStart: composeHooks(config.hooks.onChatStart, options.onChatStart),
         onTurnStart: composeHooks(config.hooks.onTurnStart, options.onTurnStart),
@@ -8411,6 +8986,32 @@ export type { InferChatClientData, InferChatUIMessage } from "./ai-shared.js";
 /**
  * Options for {@link createChatStartSessionAction}.
  */
+/**
+ * Discriminator for per-endpoint `baseURL` / `fetch` callbacks on
+ * `createChatStartSessionAction`.
+ *
+ * - `"sessions"` — `POST /api/v1/sessions` (session create + first run trigger).
+ * - `"auth"` — `POST /api/v1/auth/jwt/claims` (only fired when
+ *   `tokenTTL` is set; otherwise the publicAccessToken from session create
+ *   is reused as-is).
+ */
+export type ChatStartSessionEndpoint = "sessions" | "auth";
+
+export type ChatStartSessionEndpointContext = {
+  endpoint: ChatStartSessionEndpoint;
+  chatId: string;
+};
+
+export type ChatStartSessionBaseURLResolver = (
+  ctx: ChatStartSessionEndpointContext
+) => string;
+
+export type ChatStartSessionFetchOverride = (
+  url: string,
+  init: RequestInit,
+  ctx: ChatStartSessionEndpointContext
+) => Promise<Response>;
+
 export type CreateChatStartSessionActionOptions = {
   /** TTL for the session-scoped public access token. @default "1h" */
   tokenTTL?: string | number | Date;
@@ -8419,14 +9020,37 @@ export type CreateChatStartSessionActionOptions = {
    * Per-call `params.triggerConfig` shallow-merges on top.
    */
   triggerConfig?: Partial<SessionTriggerConfig>;
+  /**
+   * Override the Trigger.dev API base URL. String applies to both
+   * `/api/v1/sessions` and `/api/v1/auth/jwt/claims`; function picks per
+   * endpoint. When unset, falls back to `apiClientManager.baseURL`
+   * (typically the `TRIGGER_API_URL` env var). Set this to route session
+   * create through a trusted edge proxy that injects server-side signal
+   * into `basePayload.metadata` before forwarding upstream.
+   */
+  baseURL?: string | ChatStartSessionBaseURLResolver;
+  /**
+   * Per-request fetch override. Receives the resolved URL, RequestInit,
+   * and endpoint context. Use for header injection, proxy routing, or
+   * custom retry. Applies to both session-create and JWT-claims POSTs.
+   */
+  fetch?: ChatStartSessionFetchOverride;
 };
 
 /**
  * Params for the function returned by {@link createChatStartSessionAction}.
  */
-export type ChatStartSessionParams = {
+export type ChatStartSessionParams<TChat extends AnyTask = AnyTask> = {
   /** Conversation id (mapped to the Session's `externalId`). */
   chatId: string;
+  /**
+   * Typed client data — folded into the first run's `payload.metadata` so
+   * `onPreload`, `onChatStart`, etc. see the same `clientData` shape on the
+   * first turn as subsequent turns get via the transport's `clientData`
+   * option. Typed via the agent's `clientDataSchema` when the action is
+   * parameterised with `createStartSessionAction<typeof myChat>(...)`.
+   */
+  clientData?: InferChatClientData<TChat>;
   /**
    * Per-call trigger config. Shallow-merged over the action's default
    * `triggerConfig`. `basePayload` is the customer's wire payload (for
@@ -8434,7 +9058,11 @@ export type ChatStartSessionParams = {
    * which the runtime injects automatically).
    */
   triggerConfig?: Partial<SessionTriggerConfig>;
-  /** Pass-through metadata folded into the session row. */
+  /**
+   * Opaque session-level metadata stored on the Session row. Separate from
+   * the per-turn `clientData` above. Use this when you want to attach
+   * server-side metadata that doesn't go through the agent's `clientDataSchema`.
+   */
   metadata?: Record<string, unknown>;
 };
 
@@ -8462,33 +9090,37 @@ export type ChatStartSessionResult = {
  * Wrap in a Next.js server action (or any server-side handler) so the
  * customer's secret key never crosses to the browser.
  *
+ * Parameterise the action with `<typeof yourChatAgent>` to type the
+ * `clientData` field against your agent's `clientDataSchema`.
+ *
  * @example
  * ```ts
  * // actions.ts
  * "use server";
  * import { chat } from "@trigger.dev/sdk/ai";
+ * import type { myChat } from "@/trigger/chat";
  *
- * export const startChatSession = chat.createStartSessionAction("my-chat", {
- *   triggerConfig: { machine: "small-1x" },
- * });
+ * export const startChatSession = chat.createStartSessionAction<typeof myChat>(
+ *   "my-chat",
+ *   { triggerConfig: { machine: "small-1x" } }
+ * );
  * ```
  *
- * Then in the browser:
+ * Then in the browser, threading the typed `clientData` from the transport:
  * ```tsx
- * const transport = useTriggerChatTransport({
+ * const transport = useTriggerChatTransport<typeof myChat>({
  *   task: "my-chat",
- *   accessToken: async ({ chatId }) => {
- *     const { publicAccessToken } = await startChatSession({ chatId });
- *     return publicAccessToken;
- *   },
+ *   accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+ *   startSession: ({ chatId, clientData }) =>
+ *     startChatSession({ chatId, clientData }),
  * });
  * ```
  */
-function createChatStartSessionAction(
+function createChatStartSessionAction<TChat extends AnyTask = AnyTask>(
   taskId: string,
   options?: CreateChatStartSessionActionOptions
-): (params: ChatStartSessionParams) => Promise<ChatStartSessionResult> {
-  return async (params: ChatStartSessionParams): Promise<ChatStartSessionResult> => {
+): (params: ChatStartSessionParams<TChat>) => Promise<ChatStartSessionResult> {
+  return async (params: ChatStartSessionParams<TChat>): Promise<ChatStartSessionResult> => {
     if (!params.chatId) {
       throw new Error(
         "chat.createStartSessionAction: params.chatId is required — used as the session externalId."
@@ -8501,21 +9133,25 @@ function createChatStartSessionAction(
     // `onPreload` fires, the runtime opens its `.in` subscription, the
     // first user message arrives moments later via `.in/append`.
     //
-    // `metadata` is the customer's transport-level `clientData`,
-    // threaded through so the agent's `clientDataSchema` validates on
-    // the very first turn (the typical schema requires `userId` etc.).
+    // `clientData` is folded into `basePayload.metadata` so the agent's
+    // `clientDataSchema` validates on the very first turn against the same
+    // shape per-turn `metadata` carries via the transport.
     // Auto-tag every chat.agent run with `chat:{chatId}` so the dashboard /
     // run-list filter by chat works without the customer having to wire it
     // up. Mirrors the browser-mediated `TriggerChatTransport.doStart` path.
     const userTags = params.triggerConfig?.tags ?? options?.triggerConfig?.tags ?? [];
     const tags = [`chat:${params.chatId}`, ...userTags].slice(0, 5);
 
+    const clientDataMetadata =
+      params.clientData !== undefined ? { metadata: params.clientData } : {};
+
     const triggerConfig: SessionTriggerConfig = {
       basePayload: {
         messages: [],
         trigger: "preload",
         ...(options?.triggerConfig?.basePayload ?? {}),
         ...(params.triggerConfig?.basePayload ?? {}),
+        ...clientDataMetadata,
         chatId: params.chatId,
       },
       ...(options?.triggerConfig?.machine || params.triggerConfig?.machine
@@ -8542,13 +9178,26 @@ function createChatStartSessionAction(
         : {}),
     };
 
-    const created = await sessions.start({
-      type: "chat.agent",
+    const startBody = {
+      type: "chat.agent" as const,
       externalId: params.chatId,
       taskIdentifier: taskId,
       triggerConfig,
       metadata: params.metadata,
-    });
+    };
+
+    const baseURLOption = options?.baseURL;
+    const fetchOverride = options?.fetch;
+    const hasOverride = baseURLOption !== undefined || fetchOverride !== undefined;
+
+    const created: { id: string; runId: string; publicAccessToken: string } = hasOverride
+      ? await callSessionsCreateWithOverride({
+          chatId: params.chatId,
+          body: startBody,
+          baseURLOption,
+          fetchOverride,
+        })
+      : await sessions.start(startBody);
 
     // Session create returns a session PAT directly when called with a
     // start token, but when the SDK call goes via the secret key we still
@@ -8556,13 +9205,20 @@ function createChatStartSessionAction(
     // re-minting here lets the customer override `tokenTTL`).
     const publicAccessToken =
       options?.tokenTTL !== undefined
-        ? await auth.createPublicToken({
-            scopes: {
-              read: { sessions: params.chatId },
-              write: { sessions: params.chatId },
-            },
-            expirationTime: options.tokenTTL,
-          })
+        ? hasOverride
+          ? await mintPublicTokenWithOverride({
+              chatId: params.chatId,
+              expirationTime: options.tokenTTL,
+              baseURLOption,
+              fetchOverride,
+            })
+          : await auth.createPublicToken({
+              scopes: {
+                read: { sessions: params.chatId },
+                write: { sessions: params.chatId },
+              },
+              expirationTime: options.tokenTTL,
+            })
         : created.publicAccessToken;
 
     return {
@@ -8573,6 +9229,101 @@ function createChatStartSessionAction(
   };
 }
 
+function resolveChatStartBaseURL(
+  endpoint: ChatStartSessionEndpoint,
+  chatId: string,
+  option: string | ChatStartSessionBaseURLResolver | undefined
+): string {
+  const fallback = apiClientManager.baseURL ?? "https://api.trigger.dev";
+  const raw =
+    typeof option === "function"
+      ? option({ endpoint, chatId })
+      : option ?? fallback;
+  return raw.replace(/\/$/, "");
+}
+
+function overrideRequestHeaders(accessToken: string): Record<string, string> {
+  const headers: Record<string, string> = {
+    "Content-Type": "application/json",
+    Authorization: `Bearer ${accessToken}`,
+    "x-trigger-source": "sdk",
+  };
+  // Forward the preview-branch hint so override-mode requests land on the
+  // same env the standard ApiClient path would have routed to. Mirrors
+  // ApiClient.#getHeaders. Read from TRIGGER_PREVIEW_BRANCH /
+  // VERCEL_GIT_COMMIT_REF via apiClientManager.branchName.
+  if (apiClientManager.branchName) {
+    headers["x-trigger-branch"] = apiClientManager.branchName;
+  }
+  return headers;
+}
+
+async function callSessionsCreateWithOverride(args: {
+  chatId: string;
+  body: { type: "chat.agent"; externalId: string; taskIdentifier: string; triggerConfig: SessionTriggerConfig; metadata?: Record<string, unknown> };
+  baseURLOption: string | ChatStartSessionBaseURLResolver | undefined;
+  fetchOverride: ChatStartSessionFetchOverride | undefined;
+}): Promise<{ id: string; runId: string; publicAccessToken: string }> {
+  const accessToken = apiClientManager.accessToken;
+  if (!accessToken) {
+    throw new Error(
+      "chat.createStartSessionAction: no API access token configured. Set TRIGGER_SECRET_KEY or call apiClientManager.setGlobalAPIClientConfiguration before invoking the action."
+    );
+  }
+  const ctx: ChatStartSessionEndpointContext = { endpoint: "sessions", chatId: args.chatId };
+  const url = `${resolveChatStartBaseURL("sessions", args.chatId, args.baseURLOption)}/api/v1/sessions`;
+  const init: RequestInit = {
+    method: "POST",
+    headers: overrideRequestHeaders(accessToken),
+    body: JSON.stringify(args.body),
+  };
+  const response = args.fetchOverride
+    ? await args.fetchOverride(url, init, ctx)
+    : await fetch(url, init);
+  if (!response.ok) {
+    const text = await response.text().catch(() => "");
+    throw new Error(`sessions.start failed: ${response.status} ${text}`);
+  }
+  const json = (await response.json()) as { id: string; runId: string; publicAccessToken: string };
+  return json;
+}
+
+async function mintPublicTokenWithOverride(args: {
+  chatId: string;
+  expirationTime: string | number | Date;
+  baseURLOption: string | ChatStartSessionBaseURLResolver | undefined;
+  fetchOverride: ChatStartSessionFetchOverride | undefined;
+}): Promise<string> {
+  const accessToken = apiClientManager.accessToken;
+  if (!accessToken) {
+    throw new Error(
+      "chat.createStartSessionAction: no API access token configured for JWT mint."
+    );
+  }
+  const ctx: ChatStartSessionEndpointContext = { endpoint: "auth", chatId: args.chatId };
+  const url = `${resolveChatStartBaseURL("auth", args.chatId, args.baseURLOption)}/api/v1/auth/jwt/claims`;
+  const init: RequestInit = {
+    method: "POST",
+    headers: overrideRequestHeaders(accessToken),
+  };
+  const response = args.fetchOverride
+    ? await args.fetchOverride(url, init, ctx)
+    : await fetch(url, init);
+  if (!response.ok) {
+    const text = await response.text().catch(() => "");
+    throw new Error(`auth.createPublicToken failed: ${response.status} ${text}`);
+  }
+  const claims = (await response.json()) as Record<string, unknown>;
+  return generateJWT({
+    secretKey: accessToken,
+    payload: {
+      ...claims,
+      scopes: [`read:sessions:${args.chatId}`, `write:sessions:${args.chatId}`],
+    },
+    expirationTime: args.expirationTime,
+  });
+}
+
 export const chat = {
   /** Create a chat agent. See {@link chatAgent}. */
   agent: chatAgent,
diff --git a/packages/trigger-sdk/src/v3/auth.ts b/packages/trigger-sdk/src/v3/auth.ts
index 16de798b0a3..614019941db 100644
--- a/packages/trigger-sdk/src/v3/auth.ts
+++ b/packages/trigger-sdk/src/v3/auth.ts
@@ -74,8 +74,7 @@ type PublicTokenPermissionProperties = {
    *
    * `read:sessions:{id}` lets the bearer read both the `.out` and `.in`
    * channels and list runs on the session. `write:sessions:{id}` lets the
-   * bearer append to the session's channels. `trigger:sessions:{id}` permits
-   * triggering new runs on the session.
+   * bearer append to the session's channels and create new runs against it.
    */
   sessions?: string | string[];
 };
diff --git a/packages/trigger-sdk/src/v3/chat-client.ts b/packages/trigger-sdk/src/v3/chat-client.ts
index 40132a624e1..98380f1e8be 100644
--- a/packages/trigger-sdk/src/v3/chat-client.ts
+++ b/packages/trigger-sdk/src/v3/chat-client.ts
@@ -20,7 +20,6 @@ import type { SessionTriggerConfig, Task } from "@trigger.dev/core/v3";
 import type { ModelMessage, UIMessage, UIMessageChunk } from "ai";
 import { readUIMessageStream } from "ai";
 import {
-  ApiClient,
   apiClientManager,
   controlSubtype,
   SSEStreamSubscription,
@@ -53,6 +52,26 @@ export type ChatSession = {
   lastEventId?: string;
 };
 
+/**
+ * Discriminator passed to per-endpoint `baseURL` and `fetch` callbacks on
+ * `AgentChat`. Same shape as the type on `TriggerChatTransport` — these
+ * mirror so customers can share a single resolver between the two clients.
+ */
+export type AgentChatEndpoint = "in" | "out";
+
+export type AgentChatEndpointContext = {
+  endpoint: AgentChatEndpoint;
+  chatId: string;
+};
+
+export type AgentChatBaseURLResolver = (ctx: AgentChatEndpointContext) => string;
+
+export type AgentChatFetchOverride = (
+  url: string,
+  init: RequestInit,
+  ctx: AgentChatEndpointContext
+) => Promise<Response>;
+
 export type AgentChatOptions<TAgent = unknown> = {
   /** The agent task ID to trigger. */
   agent: string;
@@ -89,6 +108,26 @@ export type AgentChatOptions<TAgent = unknown> = {
    * chat. Folded into `sessions.start({...triggerConfig})` body.
    */
   triggerConfig?: SessionTriggerConfig;
+  /**
+   * Override the Trigger.dev API base URL for the chat's `.in/append` and
+   * `.out` SSE endpoints. String form applies to both; pass a function to
+   * pick per endpoint. Defaults to `apiClientManager.baseURL` (whatever
+   * `@trigger.dev/sdk` was configured with — typically `TRIGGER_API_URL`
+   * env var).
+   *
+   * Session creation (`POST /api/v1/sessions`) and token mint
+   * (`POST /api/v1/auth/jwt/claims`) still flow through
+   * `apiClientManager` — pass equivalent options to
+   * `chat.createStartSessionAction` if you need those routed too.
+   */
+  baseURL?: string | AgentChatBaseURLResolver;
+  /**
+   * Optional per-request fetch override. Receives the resolved URL, the
+   * RequestInit, and endpoint context. Use this for header injection
+   * (tracing), proxy routing, or custom retries. Applies to both the
+   * `.in/append` POSTs and the `.out` SSE GET.
+   */
+  fetch?: AgentChatFetchOverride;
 };
 
 // ─── ChatStream ────────────────────────────────────────────────────
@@ -272,6 +311,8 @@ export class AgentChat<TAgent = unknown> {
   private readonly triggerConfigDefault: SessionTriggerConfig | undefined;
   private readonly onTriggered: AgentChatOptions["onTriggered"];
   private readonly onTurnComplete: AgentChatOptions["onTurnComplete"];
+  private readonly baseURLResolver: AgentChatBaseURLResolver;
+  private readonly fetchOverride: AgentChatFetchOverride | undefined;
 
   private state: SessionState;
 
@@ -283,6 +324,11 @@ export class AgentChat<TAgent = unknown> {
     this.triggerConfigDefault = options.triggerConfig;
     this.onTriggered = options.onTriggered;
     this.onTurnComplete = options.onTurnComplete;
+    const baseURLOption = options.baseURL;
+    this.baseURLResolver = typeof baseURLOption === "function"
+      ? baseURLOption
+      : () => baseURLOption ?? apiClientManager.baseURL ?? "https://api.trigger.dev";
+    this.fetchOverride = options.fetch;
 
     // Hydration: a non-empty `session` means the caller knows the
     // session already exists (started in a previous request). Mark
@@ -378,12 +424,7 @@ export class AgentChat<TAgent = unknown> {
       metadata: this.clientData,
     } as ChatTaskWirePayload;
 
-    const api = this.createApiClient();
-    await api.appendToSessionStream(
-      this.chatId,
-      "in",
-      serializeInputChunk({ kind: "message", payload })
-    );
+    await this.appendInputChunk(serializeInputChunk({ kind: "message", payload }));
 
     return this.subscribeToSessionStream(options?.abortSignal);
   }
@@ -404,15 +445,7 @@ export class AgentChat<TAgent = unknown> {
     };
 
     try {
-      const api = this.createApiClient();
-      await api.appendToSessionStream(
-        this.chatId,
-        "in",
-        serializeInputChunk({
-          kind: "message",
-          payload,
-        })
-      );
+      await this.appendInputChunk(serializeInputChunk({ kind: "message", payload }));
       return true;
     } catch {
       return false;
@@ -424,14 +457,7 @@ export class AgentChat<TAgent = unknown> {
     if (!this.state.started) return;
 
     this.state.skipToTurnComplete = true;
-    const api = this.createApiClient();
-    await api
-      .appendToSessionStream(
-        this.chatId,
-        "in",
-        serializeInputChunk({ kind: "stop" })
-      )
-      .catch(() => {});
+    await this.appendInputChunk(serializeInputChunk({ kind: "stop" })).catch(() => {});
   }
 
   /**
@@ -459,10 +485,7 @@ export class AgentChat<TAgent = unknown> {
      */
     isFinal: boolean;
   }): Promise<void> {
-    const api = this.createApiClient();
-    await api.appendToSessionStream(
-      this.chatId,
-      "in",
+    await this.appendInputChunk(
       serializeInputChunk({
         kind: "handover",
         partialAssistantMessage: args.partialAssistantMessage,
@@ -481,12 +504,7 @@ export class AgentChat<TAgent = unknown> {
    * surface.
    */
   async sendHandoverSkip(): Promise<void> {
-    const api = this.createApiClient();
-    await api.appendToSessionStream(
-      this.chatId,
-      "in",
-      serializeInputChunk({ kind: "handover-skip" })
-    );
+    await this.appendInputChunk(serializeInputChunk({ kind: "handover-skip" }));
   }
 
   /**
@@ -531,15 +549,7 @@ export class AgentChat<TAgent = unknown> {
     };
 
     try {
-      const api = this.createApiClient();
-      await api.appendToSessionStream(
-        this.chatId,
-        "in",
-        serializeInputChunk({
-          kind: "message",
-          payload,
-        })
-      );
+      await this.appendInputChunk(serializeInputChunk({ kind: "message", payload }));
     } catch {
       throw new Error("Failed to send action. The session may have ended.");
     }
@@ -553,10 +563,7 @@ export class AgentChat<TAgent = unknown> {
     if (!this.state.started) return false;
 
     try {
-      const api = this.createApiClient();
-      await api.appendToSessionStream(
-        this.chatId,
-        "in",
+      await this.appendInputChunk(
         serializeInputChunk({
           kind: "message",
           payload: {
@@ -582,10 +589,41 @@ export class AgentChat<TAgent = unknown> {
 
   // ─── Private ───────────────────────────────────────────────────
 
-  private createApiClient(): ApiClient {
-    const baseURL = apiClientManager.baseURL ?? "https://api.trigger.dev";
+  private resolveBaseURL(endpoint: AgentChatEndpoint): string {
+    return this.baseURLResolver({ endpoint, chatId: this.chatId }).replace(/\/$/, "");
+  }
+
+  private async doFetch(
+    ctx: AgentChatEndpointContext,
+    url: string,
+    init: RequestInit
+  ): Promise<Response> {
+    return this.fetchOverride ? this.fetchOverride(url, init, ctx) : fetch(url, init);
+  }
+
+  private async appendInputChunk(body: string): Promise<void> {
     const accessToken = apiClientManager.accessToken ?? "";
-    return new ApiClient(baseURL, accessToken);
+    const ctx: AgentChatEndpointContext = { endpoint: "in", chatId: this.chatId };
+    const url = `${this.resolveBaseURL("in")}/realtime/v1/sessions/${encodeURIComponent(this.chatId)}/in/append`;
+    const headers: Record<string, string> = {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${accessToken}`,
+      "x-trigger-source": "sdk",
+    };
+    const response = await this.doFetch(ctx, url, { method: "POST", headers, body });
+    if (!response.ok) {
+      const text = await response.text().catch(() => "");
+      // Match the error shape that ApiClient/zodfetch produced before the
+      // inline-POST refactor so callers inspecting `error.name ===
+      // "TriggerApiError"` or `error.status` keep working.
+      const err = new Error(`appendToSessionStream failed: ${response.status} ${text}`) as Error & {
+        name: string;
+        status: number;
+      };
+      err.name = "TriggerApiError";
+      err.status = response.status;
+      throw err;
+    }
   }
 
   /**
@@ -650,10 +688,33 @@ export class AgentChat<TAgent = unknown> {
     options?: { sendStopOnAbort?: boolean }
   ): ReadableStream<UIMessageChunk> {
     const state = this.state;
-    const baseURL = apiClientManager.baseURL ?? "https://api.trigger.dev";
     const accessToken = apiClientManager.accessToken ?? "";
     const onTurnComplete = this.onTurnComplete;
     const chatId = this.chatId;
+    const sseCtx: AgentChatEndpointContext = { endpoint: "out", chatId };
+    const fetchOverride = this.fetchOverride;
+    const sseFetchClient: typeof fetch | undefined = fetchOverride
+      ? ((input, init) => {
+          if (typeof input === "string") {
+            return fetchOverride(input, init ?? {}, sseCtx);
+          }
+          if (input instanceof URL) {
+            return fetchOverride(input.toString(), init ?? {}, sseCtx);
+          }
+          // Request — preserve its url + intrinsic init, let any provided
+          // init override on top (matches fetch(Request, init) semantics).
+          return fetchOverride(
+            input.url,
+            {
+              method: input.method,
+              headers: input.headers,
+              signal: input.signal,
+              ...(init ?? {}),
+            },
+            sseCtx
+          );
+        }) as typeof fetch
+      : undefined;
 
     const internalAbort = new AbortController();
     const combinedSignal = abortSignal
@@ -666,14 +727,7 @@ export class AgentChat<TAgent = unknown> {
         () => {
           if (options?.sendStopOnAbort !== false) {
             state.skipToTurnComplete = true;
-            const api = new ApiClient(baseURL, accessToken);
-            api
-              .appendToSessionStream(
-                chatId,
-                "in",
-                serializeInputChunk({ kind: "stop" })
-              )
-              .catch(() => {});
+            this.appendInputChunk(serializeInputChunk({ kind: "stop" })).catch(() => {});
           }
           internalAbort.abort();
         },
@@ -681,7 +735,7 @@ export class AgentChat<TAgent = unknown> {
       );
     }
 
-    const streamUrl = `${baseURL}/realtime/v1/sessions/${encodeURIComponent(chatId)}/out`;
+    const streamUrl = `${this.resolveBaseURL("out")}/realtime/v1/sessions/${encodeURIComponent(chatId)}/out`;
 
     return new ReadableStream<UIMessageChunk>({
       start: async (controller) => {
@@ -693,6 +747,7 @@ export class AgentChat<TAgent = unknown> {
             signal: combinedSignal,
             timeoutInSeconds: this.streamTimeoutSeconds,
             lastEventId: state.lastEventId,
+            fetchClient: sseFetchClient,
           });
           const sseStream = await subscription.subscribe();
           const reader = sseStream.getReader();
diff --git a/packages/trigger-sdk/src/v3/chat.test.ts b/packages/trigger-sdk/src/v3/chat.test.ts
index 5f50854ec41..6469f1ac86c 100644
--- a/packages/trigger-sdk/src/v3/chat.test.ts
+++ b/packages/trigger-sdk/src/v3/chat.test.ts
@@ -609,6 +609,94 @@ describe("TriggerChatTransport", () => {
       expect(subscribe!).toContain("/realtime/v1/sessions/chat-by-chatid/out");
     });
 
+    it("functional baseURL dispatches per endpoint (in vs out)", async () => {
+      const requests: Array<{ url: string; ctxEndpoint: string | undefined }> = [];
+      global.fetch = vi.fn().mockImplementation(async (url: string | URL) => {
+        const urlStr = typeof url === "string" ? url : url.toString();
+        requests.push({ url: urlStr, ctxEndpoint: undefined });
+        if (isSessionStreamAppendUrl(urlStr)) return defaultAppendResponse();
+        if (isSessionOutSubscribeUrl(urlStr)) return defaultSseResponse();
+        throw new Error(`Unexpected URL: ${urlStr}`);
+      });
+
+      const baseURLFn = vi.fn(({ endpoint }: { endpoint: "in" | "out"; chatId: string }) =>
+        endpoint === "out"
+          ? "https://stream.example.com"
+          : "https://api.example.com"
+      );
+
+      const transport = new TriggerChatTransport({
+        task: "my-chat-task",
+        accessToken: () => "pat",
+        baseURL: baseURLFn,
+        sessions: { "chat-fn": { publicAccessToken: "p" } },
+      });
+
+      const stream = await transport.sendMessages({
+        trigger: "submit-message",
+        chatId: "chat-fn",
+        messageId: undefined,
+        messages: [createUserMessage("Hi")],
+        abortSignal: undefined,
+      });
+      await drainChunks(stream);
+
+      const appendCalls = baseURLFn.mock.calls.filter((c) => c[0].endpoint === "in");
+      const outCalls = baseURLFn.mock.calls.filter((c) => c[0].endpoint === "out");
+      expect(appendCalls.length).toBeGreaterThanOrEqual(1);
+      expect(outCalls.length).toBeGreaterThanOrEqual(1);
+      expect(appendCalls[0]![0].chatId).toBe("chat-fn");
+      expect(outCalls[0]![0].chatId).toBe("chat-fn");
+
+      const append = requests.find((r) => isSessionStreamAppendUrl(r.url));
+      const subscribe = requests.find((r) => isSessionOutSubscribeUrl(r.url));
+      expect(append!.url.startsWith("https://api.example.com/")).toBe(true);
+      expect(subscribe!.url.startsWith("https://stream.example.com/")).toBe(true);
+    });
+
+    it("fetch override is invoked for both .in/append and .out SSE with endpoint ctx", async () => {
+      const fetchCalls: Array<{ url: string; endpoint: string; chatId: string }> = [];
+
+      const customFetch = vi.fn(
+        async (
+          url: string,
+          init: RequestInit,
+          ctx: { endpoint: "in" | "out"; chatId: string }
+        ) => {
+          fetchCalls.push({ url, endpoint: ctx.endpoint, chatId: ctx.chatId });
+          if (isSessionStreamAppendUrl(url)) return defaultAppendResponse();
+          if (isSessionOutSubscribeUrl(url)) return defaultSseResponse();
+          throw new Error(`Unexpected URL: ${url}`);
+        }
+      );
+
+      global.fetch = vi.fn().mockRejectedValue(new Error("global fetch should not be called"));
+
+      const transport = new TriggerChatTransport({
+        task: "my-chat-task",
+        accessToken: () => "pat",
+        baseURL: "https://api.test.trigger.dev",
+        fetch: customFetch,
+        sessions: { "chat-fetch": { publicAccessToken: "p" } },
+      });
+
+      const stream = await transport.sendMessages({
+        trigger: "submit-message",
+        chatId: "chat-fetch",
+        messageId: undefined,
+        messages: [createUserMessage("Hi")],
+        abortSignal: undefined,
+      });
+      await drainChunks(stream);
+
+      const inCalls = fetchCalls.filter((c) => c.endpoint === "in");
+      const outCalls = fetchCalls.filter((c) => c.endpoint === "out");
+      expect(inCalls.length).toBeGreaterThanOrEqual(1);
+      expect(outCalls.length).toBeGreaterThanOrEqual(1);
+      expect(inCalls[0]!.chatId).toBe("chat-fetch");
+      expect(outCalls[0]!.chatId).toBe("chat-fetch");
+    });
+
     it("routes .out SSE through streamBaseURL while appends stay on baseURL", async () => {
       const requests: string[] = [];
       global.fetch = vi.fn().mockImplementation(async (url: string | URL) => {
diff --git a/packages/trigger-sdk/src/v3/chat.ts b/packages/trigger-sdk/src/v3/chat.ts
index a979b8f2b11..aaa3871e34a 100644
--- a/packages/trigger-sdk/src/v3/chat.ts
+++ b/packages/trigger-sdk/src/v3/chat.ts
@@ -25,7 +25,6 @@
 
 import type { ChatTransport, UIMessage, UIMessageChunk, ChatRequestOptions } from "ai";
 import {
-  ApiClient,
   controlSubtype,
   headerValue,
   PUBLIC_ACCESS_TOKEN_HEADER,
@@ -38,6 +37,43 @@ import type { ChatInputChunk, ChatTaskWirePayload } from "./ai-shared.js";
 const DEFAULT_BASE_URL = "https://api.trigger.dev";
 const DEFAULT_STREAM_TIMEOUT_SECONDS = 120;
 
+/**
+ * Discriminator passed to per-endpoint `baseURL` and `fetch` callbacks.
+ *
+ * - `"in"` — `POST /realtime/v1/sessions/{chatId}/in/append` (user messages,
+ *   stops, actions).
+ * - `"out"` — `GET /realtime/v1/sessions/{chatId}/out` (SSE response stream).
+ *
+ * Other endpoints (`/api/v1/sessions`, `/api/v1/auth/jwt/claims`) are reached
+ * from the server-side `chat.createStartSessionAction` and `accessToken`
+ * callback, not the transport — they accept the same callback shape on their
+ * own option objects.
+ */
+export type ChatTransportEndpoint = "in" | "out";
+
+/** Context passed to `baseURL` and `fetch` callbacks. */
+export type ChatTransportEndpointContext = {
+  endpoint: ChatTransportEndpoint;
+  chatId: string;
+};
+
+/** Resolver form of `baseURL` — return the base for the given endpoint. */
+export type ChatBaseURLResolver = (ctx: ChatTransportEndpointContext) => string;
+
+/**
+ * Per-request fetch override. Receives the fully-resolved URL and the
+ * RequestInit the transport would have used, plus endpoint context for
+ * routing decisions. Customers can rewrite the URL, inject headers, or
+ * delegate to a custom transport (e.g. a Cloudflare worker fronting
+ * `api.trigger.dev`). Must return a `Response` semantically equivalent to
+ * what `globalThis.fetch(url, init)` would have returned.
+ */
+export type ChatFetchOverride = (
+  url: string,
+  init: RequestInit,
+  ctx: ChatTransportEndpointContext
+) => Promise<Response>;
+
 /**
  * Detect 401/403 from realtime/input-stream calls without relying on `instanceof`
  * (Vitest can load duplicate `@trigger.dev/core` copies, which breaks subclass checks).
@@ -229,18 +265,45 @@ export type TriggerChatTransportOptions<TClientData = unknown> = {
     >
   ) => Promise<StartSessionResult>;
 
-  /** Base URL for the Trigger.dev API. @default "https://api.trigger.dev" */
-  baseURL?: string;
+  /**
+   * Base URL for the Trigger.dev API. Either a single string applied to every
+   * endpoint, or a function called per request that picks a base URL from the
+   * endpoint discriminator and chat ID. @default "https://api.trigger.dev"
+   *
+   * @example Route appends through a proxy, SSE direct:
+   * ```ts
+   * baseURL: ({ endpoint }) =>
+   *   endpoint === "out" ? "https://api.trigger.dev" : "https://proxy.example.com",
+   * ```
+   */
+  baseURL?: string | ChatBaseURLResolver;
 
   /**
    * Base URL for the SSE stream subscription only (`GET .../sessions/{chatId}/out`).
-   * Falls back to `baseURL` when unset. Set this to route the long-lived
-   * stream through a custom proxy (e.g. a Cloudflare worker capturing JA4
-   * fingerprints for bot detection) while keeping append POSTs direct to
-   * `baseURL` to avoid an extra hop on every user message.
+   * @deprecated Pass a function for `baseURL` instead and branch on
+   * `endpoint === "out"`. `streamBaseURL` continues to work for backwards
+   * compatibility and wins over `baseURL` for the SSE endpoint when both
+   * are set.
    */
   streamBaseURL?: string;
 
+  /**
+   * Optional per-request fetch override. Called with the resolved URL and the
+   * RequestInit the transport built, plus endpoint context. Use this to
+   * inject custom headers (e.g. distributed tracing), redirect via a proxy,
+   * or wrap fetch with retries/logging.
+   *
+   * @example Add a tracing header to every chat request:
+   * ```ts
+   * fetch: (url, init, ctx) => {
+   *   init.headers = new Headers(init.headers);
+   *   init.headers.set("traceparent", currentTraceparent());
+   *   return globalThis.fetch(url, init);
+   * },
+   * ```
+   */
+  fetch?: ChatFetchOverride;
+
   /** Additional headers included in every API request. */
   headers?: Record<string, string>;
 
@@ -361,8 +424,8 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
   private readonly resolveStartSession:
     | ((params: StartSessionParams<Record<string, unknown>>) => Promise<StartSessionResult>)
     | undefined;
-  private readonly baseURL: string;
-  private readonly streamBaseURL: string;
+  private readonly resolveBaseURLFn: ChatBaseURLResolver;
+  private readonly fetchOverride: ChatFetchOverride | undefined;
   private readonly extraHeaders: Record<string, string>;
   private readonly streamTimeoutSeconds: number;
   private defaultMetadata: Record<string, unknown> | undefined;
@@ -383,8 +446,12 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
     this.resolveStartSession = options.startSession as
       | ((params: StartSessionParams<Record<string, unknown>>) => Promise<StartSessionResult>)
       | undefined;
-    this.baseURL = options.baseURL ?? DEFAULT_BASE_URL;
-    this.streamBaseURL = options.streamBaseURL ?? this.baseURL;
+    const baseURLOption = options.baseURL ?? DEFAULT_BASE_URL;
+    const streamOverride = options.streamBaseURL;
+    this.resolveBaseURLFn = typeof baseURLOption === "function"
+      ? (ctx) => (ctx.endpoint === "out" && streamOverride ? streamOverride : baseURLOption(ctx))
+      : (ctx) => (ctx.endpoint === "out" && streamOverride ? streamOverride : baseURLOption);
+    this.fetchOverride = options.fetch;
     this.extraHeaders = options.headers ?? {};
     this.streamTimeoutSeconds = options.streamTimeoutSeconds ?? DEFAULT_STREAM_TIMEOUT_SECONDS;
     this.defaultMetadata = options.clientData;
@@ -528,10 +595,9 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
     const state = await this.ensureSessionState(chatId);
 
     const sendChatMessage = async (token: string) => {
-      const apiClient = new ApiClient(this.baseURL, token);
-      await apiClient.appendToSessionStream(
+      await this.appendInputChunk(
         chatId,
-        "in",
+        token,
         this.serializeInputChunk({ kind: "message", payload: wirePayload })
       );
     };
@@ -605,17 +671,23 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
     }
 
     // Hydrate session state from response headers so subsequent turns
-    // skip the endpoint and write directly to session.in.
+    // skip the endpoint and write directly to session.in. Failing fast
+    // when the header is missing avoids a quiet degraded state where
+    // every later turn re-runs the handover route instead of taking
+    // the slim-wire path.
     const accessToken = response.headers.get("X-Trigger-Chat-Access-Token");
     const chatId = args.chatId;
-    if (accessToken) {
-      const state: ChatSessionState = {
-        publicAccessToken: accessToken,
-        isStreaming: true,
-      };
-      this.sessions.set(chatId, state);
-      this.notifySessionChange(chatId, state);
+    if (!accessToken) {
+      throw new Error(
+        "chat.handover response is missing the X-Trigger-Chat-Access-Token header. chat.agent's handover endpoint must echo the session PAT so the transport can hydrate."
+      );
     }
+    const state: ChatSessionState = {
+      publicAccessToken: accessToken,
+      isStreaming: true,
+    };
+    this.sessions.set(chatId, state);
+    this.notifySessionChange(chatId, state);
 
     // Filter the parsed UIMessage stream:
     //   - Drop control chunks (`trigger:turn-complete`,
@@ -708,10 +780,9 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
     };
 
     const send = async (token: string) => {
-      const apiClient = new ApiClient(this.baseURL, token);
-      await apiClient.appendToSessionStream(
+      await this.appendInputChunk(
         chatId,
-        "in",
+        token,
         this.serializeInputChunk({ kind: "message", payload: wirePayload })
       );
     };
@@ -768,12 +839,7 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
     if (!state) return false;
 
     const send = async (token: string) => {
-      const api = new ApiClient(this.baseURL, token);
-      await api.appendToSessionStream(
-        chatId,
-        "in",
-        this.serializeInputChunk({ kind: "stop" })
-      );
+      await this.appendInputChunk(chatId, token, this.serializeInputChunk({ kind: "stop" }));
     };
 
     try {
@@ -822,8 +888,7 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
 
     const body = this.serializeInputChunk({ kind: "message", payload: wirePayload });
     const send = async (token: string) => {
-      const apiClient = new ApiClient(this.baseURL, token);
-      await apiClient.appendToSessionStream(chatId, "in", body);
+      await this.appendInputChunk(chatId, token, body);
     };
 
     await this.callWithAuthRetry(chatId, state, send);
@@ -894,6 +959,14 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
     this.coordinator?.removeMessagesListener(fn);
   }
   dispose(): void {
+    // Tear down any open session.out subscriptions before the coordinator
+    // goes away. Otherwise controllers in `activeStreams` keep reading
+    // until they time out, leaking network and memory on every
+    // unmount/navigation.
+    for (const controller of this.activeStreams.values()) {
+      controller.abort();
+    }
+    this.activeStreams.clear();
     this.coordinator?.dispose();
     this.coordinator = null;
   }
@@ -978,6 +1051,41 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
    * Run `op` with the session's stored PAT. On 401/403, refresh the PAT
    * via `accessToken` and retry once. Surfaces non-auth errors as-is.
    */
+  private resolveBaseURL(ctx: ChatTransportEndpointContext): string {
+    const raw = this.resolveBaseURLFn(ctx);
+    return raw.replace(/\/$/, "");
+  }
+
+  private async doFetch(
+    ctx: ChatTransportEndpointContext,
+    url: string,
+    init: RequestInit
+  ): Promise<Response> {
+    return this.fetchOverride ? this.fetchOverride(url, init, ctx) : fetch(url, init);
+  }
+
+  private async appendInputChunk(chatId: string, token: string, body: string): Promise<void> {
+    const ctx: ChatTransportEndpointContext = { endpoint: "in", chatId };
+    const url = `${this.resolveBaseURL(ctx)}/realtime/v1/sessions/${encodeURIComponent(chatId)}/in/append`;
+    const headers: Record<string, string> = {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${token}`,
+      "x-trigger-source": "sdk",
+      ...this.extraHeaders,
+    };
+    const response = await this.doFetch(ctx, url, { method: "POST", headers, body });
+    if (!response.ok) {
+      const text = await response.text().catch(() => "");
+      const err = new Error(`appendToSessionStream failed: ${response.status} ${text}`) as Error & {
+        name: string;
+        status: number;
+      };
+      err.name = "TriggerApiError";
+      err.status = response.status;
+      throw err;
+    }
+  }
+
   private async callWithAuthRetry(
     chatId: string,
     state: ChatSessionState,
@@ -1026,14 +1134,11 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
         () => {
           if (options?.sendStopOnAbort !== false) {
             state.skipToTurnComplete = true;
-            const api = new ApiClient(this.baseURL, state.publicAccessToken);
-            api
-              .appendToSessionStream(
-                chatId,
-                "in",
-                this.serializeInputChunk({ kind: "stop" })
-              )
-              .catch(() => {});
+            this.appendInputChunk(
+              chatId,
+              state.publicAccessToken,
+              this.serializeInputChunk({ kind: "stop" })
+            ).catch(() => {});
           }
           internalAbort.abort();
         },
@@ -1041,7 +1146,7 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
       );
     }
 
-    const streamUrl = `${this.streamBaseURL}/realtime/v1/sessions/${encodeURIComponent(chatId)}/out`;
+    const streamUrl = `${this.resolveBaseURL({ endpoint: "out", chatId })}/realtime/v1/sessions/${encodeURIComponent(chatId)}/out`;
 
     return new ReadableStream<UIMessageChunk>({
       start: async (controller) => {
@@ -1099,6 +1204,31 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
               })()
             : () => {};
 
+        const sseCtx: ChatTransportEndpointContext = { endpoint: "out", chatId };
+        const fetchOverride = this.fetchOverride;
+        const sseFetchClient: typeof fetch | undefined = fetchOverride
+          ? ((input, init) => {
+              if (typeof input === "string") {
+                return fetchOverride(input, init ?? {}, sseCtx);
+              }
+              if (input instanceof URL) {
+                return fetchOverride(input.toString(), init ?? {}, sseCtx);
+              }
+              // Request — preserve its url + intrinsic init, let any
+              // provided init override on top (matches fetch(Request, init)
+              // semantics).
+              return fetchOverride(
+                input.url,
+                {
+                  method: input.method,
+                  headers: input.headers,
+                  signal: input.signal,
+                  ...(init ?? {}),
+                },
+                sseCtx
+              );
+            }) as typeof fetch
+          : undefined;
         const connectSseOnce = async (token: string) => {
           const subscription = new SSEStreamSubscription(streamUrl, {
             headers: {
@@ -1113,6 +1243,7 @@ export class TriggerChatTransport implements ChatTransport<UIMessage> {
             // keepalive) arrives in 60s, force reconnect. Sized
             // generously over typical agent thinking pauses.
             stallTimeoutMs: 60_000,
+            fetchClient: sseFetchClient,
           });
           currentSubscription = subscription;
           const sseStream = await subscription.subscribe();
diff --git a/packages/trigger-sdk/src/v3/createStartSessionAction.test.ts b/packages/trigger-sdk/src/v3/createStartSessionAction.test.ts
new file mode 100644
index 00000000000..2b3214b77d1
--- /dev/null
+++ b/packages/trigger-sdk/src/v3/createStartSessionAction.test.ts
@@ -0,0 +1,136 @@
+import { afterEach, describe, expect, expectTypeOf, it } from "vitest";
+import { z } from "zod";
+import type { CreateSessionRequestBody, CreatedSessionResponseBody } from "@trigger.dev/core/v3";
+
+import { chat } from "./ai.js";
+import {
+  __setSessionStartImplForTests,
+  __setSessionOpenImplForTests,
+  SessionHandle,
+} from "./sessions.js";
+import { apiClientManager } from "@trigger.dev/core/v3";
+
+// `auth.createPublicToken` is called by the action when no start token is
+// supplied. Provide a minimal API client config so the mint path doesn't
+// throw before we get to assert the captured request body.
+apiClientManager.setGlobalAPIClientConfiguration({
+  baseURL: "https://example.invalid",
+  accessToken: "tr_test_secret",
+});
+
+// Capture the request body the action would send to `sessions.start()`.
+let lastStartBody: CreateSessionRequestBody | undefined;
+
+function installStartFixture() {
+  __setSessionStartImplForTests(async (body): Promise<CreatedSessionResponseBody> => {
+    lastStartBody = body;
+    return {
+      id: "session_fixture",
+      externalId: body.externalId ?? null,
+      type: body.type,
+      taskIdentifier: body.taskIdentifier,
+      triggerConfig: body.triggerConfig,
+      currentRunId: "run_fixture",
+      tags: body.triggerConfig.tags ?? [],
+      metadata: body.metadata ?? null,
+      closedAt: null,
+      closedReason: null,
+      expiresAt: null,
+      createdAt: new Date(),
+      updatedAt: new Date(),
+      runId: "run_fixture",
+      publicAccessToken: "tr_pat_fixture",
+      isCached: false,
+    };
+  });
+  __setSessionOpenImplForTests(() => new SessionHandle("session_fixture"));
+}
+
+afterEach(() => {
+  __setSessionStartImplForTests(undefined);
+  __setSessionOpenImplForTests(undefined);
+  lastStartBody = undefined;
+});
+
+// Build a fake chat agent task shape that the generic can narrow against.
+// We only need the static type — the runtime never invokes this task because
+// `__setSessionStartImplForTests` intercepts the network call.
+const fakeChat = chat
+  .withClientData({
+    schema: z.object({
+      userId: z.string(),
+      plan: z.enum(["free", "pro"]),
+    }),
+  })
+  .agent({
+    id: "fake-chat",
+    run: async () => undefined as any,
+  });
+
+describe("chat.createStartSessionAction — runtime", () => {
+  it("folds typed clientData into basePayload.metadata so onChatStart sees it on the first turn", async () => {
+    installStartFixture();
+
+    const start = chat.createStartSessionAction<typeof fakeChat>("fake-chat");
+
+    const result = await start({
+      chatId: "chat-1",
+      clientData: { userId: "u-1", plan: "pro" },
+    });
+
+    expect(result.publicAccessToken).toBe("tr_pat_fixture");
+    expect(lastStartBody?.triggerConfig.basePayload).toMatchObject({
+      messages: [],
+      trigger: "preload",
+      metadata: { userId: "u-1", plan: "pro" },
+      chatId: "chat-1",
+    });
+  });
+
+  it("leaves basePayload.metadata unset when clientData is not provided", async () => {
+    installStartFixture();
+
+    const start = chat.createStartSessionAction("fake-chat");
+    await start({ chatId: "chat-2" });
+
+    expect(lastStartBody?.triggerConfig.basePayload).not.toHaveProperty("metadata");
+  });
+
+  it("keeps session-level metadata distinct from per-turn clientData", async () => {
+    installStartFixture();
+
+    const start = chat.createStartSessionAction<typeof fakeChat>("fake-chat");
+    await start({
+      chatId: "chat-3",
+      clientData: { userId: "u-3", plan: "free" },
+      metadata: { source: "marketing-site" },
+    });
+
+    // Per-turn shape (visible to onPreload / onChatStart):
+    expect(lastStartBody?.triggerConfig.basePayload).toMatchObject({
+      metadata: { userId: "u-3", plan: "free" },
+    });
+    // Session-row metadata (opaque, never typed via clientDataSchema):
+    expect(lastStartBody?.metadata).toEqual({ source: "marketing-site" });
+  });
+});
+
+describe("chat.createStartSessionAction — types", () => {
+  it("narrows clientData against the chat agent's clientDataSchema", () => {
+    const start = chat.createStartSessionAction<typeof fakeChat>("fake-chat");
+
+    // The clientData field is typed off the agent's schema.
+    expectTypeOf<Parameters<typeof start>[0]["clientData"]>().toEqualTypeOf<
+      { userId: string; plan: "free" | "pro" } | undefined
+    >();
+    // The agent's typed clientData is strictly narrower than `unknown`.
+    expectTypeOf<Parameters<typeof start>[0]["clientData"]>().not.toEqualTypeOf<unknown>();
+  });
+
+  it("defaults clientData to unknown when called without a generic", () => {
+    const start = chat.createStartSessionAction("fake-chat");
+    expectTypeOf(start).parameter(0).toHaveProperty("clientData");
+    // Untyped variant — clientData is `unknown`.
+    expectTypeOf<Parameters<typeof start>[0]["clientData"]>().toEqualTypeOf<unknown>();
+  });
+});
diff --git a/packages/trigger-sdk/src/v3/sessions.test.ts b/packages/trigger-sdk/src/v3/sessions.test.ts
new file mode 100644
index 00000000000..abeccb0c12d
--- /dev/null
+++ b/packages/trigger-sdk/src/v3/sessions.test.ts
@@ -0,0 +1,186 @@
+import { describe, expect, it, vi } from "vitest";
+
+// Per-test override for the stubbed SessionStreamInstance's wait() so a
+// test can simulate downstream writer failures (e.g. S2 auth error after
+// initializeSessionStream returned a stale token). Reset at the top of
+// each test that touches it.
+let stubWaitImpl: (() => Promise<{ lastEventId?: string }>) | undefined;
+
+// Stub `SessionStreamInstance` so constructing a channel writer doesn't try
+// to reach S2. The stub still invokes the `initializeSession` callback the
+// channel passes in, which is the whole point: that's how the cache gets
+// exercised. wait() resolves immediately by default; tests can override it
+// via `stubWaitImpl` to verify reactive invalidation on writer failure.
+vi.mock("@trigger.dev/core/v3", async (importActual) => {
+  const actual = (await importActual()) as Record<string, unknown>;
+  class StubSessionStreamInstance<T> {
+    private waitPromise: Promise<{ lastEventId?: string }>;
+    constructor(opts: {
+      source: ReadableStream<T>;
+      initializeSession?: () => Promise<{ headers?: Record<string, string> }>;
+    }) {
+      // Drain the source so the upstream tee doesn't backpressure-stall.
+      void (async () => {
+        const reader = opts.source.getReader();
+        try {
+          while (true) {
+            const { done } = await reader.read();
+            if (done) break;
+          }
+        } finally {
+          reader.releaseLock();
+        }
+      })();
+      // Trigger the initializeSession callback so the cache path runs.
+      opts.initializeSession?.().catch(() => {
+        // Failures are observed via the spy; swallow here so unhandled
+        // rejection warnings don't leak through the stub.
+      });
+      // Capture the wait outcome once at construction (mirrors real
+      // SessionStreamInstance which kicks off initializeWriter from the
+      // ctor). All subsequent wait() calls return the same promise so
+      // a single failure is observable by every consumer in the channel
+      // (`.finally`, reactive `.catch`, and customer `waitUntilComplete`).
+      this.waitPromise = stubWaitImpl
+        ? stubWaitImpl()
+        : Promise.resolve({ lastEventId: undefined });
+      // Claim any rejection so test runs don't surface as unhandled.
+      // Real awaiters still observe the rejection when they `await` it.
+      this.waitPromise.catch(() => {});
+    }
+    async wait() {
+      return this.waitPromise;
+    }
+    get stream() {
+      return new ReadableStream<T>({ start: (c) => c.close() });
+    }
+  }
+  return { ...actual, SessionStreamInstance: StubSessionStreamInstance };
+});
+
+import { SessionOutputChannel } from "./sessions.js";
+import { apiClientManager } from "@trigger.dev/core/v3";
+
+type ApiClientStub = {
+  initializeSessionStream: ReturnType<typeof vi.fn>;
+};
+
+function installStubApiClient(impl: ApiClientStub["initializeSessionStream"]): ApiClientStub {
+  const stub: ApiClientStub = { initializeSessionStream: impl };
+  // `apiClientManager.clientOrThrow()` is what `#pipeInternal` reaches for.
+  vi.spyOn(apiClientManager, "clientOrThrow").mockReturnValue(
+    stub as unknown as ReturnType<typeof apiClientManager.clientOrThrow>
+  );
+  return stub;
+}
+
+function emptyStream(): ReadableStream<unknown> {
+  return new ReadableStream({ start: (c) => c.close() });
+}
+
+describe("SessionOutputChannel initializeSessionStream cache", () => {
+  it("dedupes repeated pipe()/writer() calls for the same channel", async () => {
+    stubWaitImpl = undefined;
+    const spy = vi.fn(async () => ({ version: "v2", headers: {} }));
+    installStubApiClient(spy);
+
+    const channel = new SessionOutputChannel("session-1");
+    const p1 = channel.pipe(emptyStream());
+    const p2 = channel.pipe(emptyStream());
+    const p3 = channel.writer({
+      execute: ({ write }) => {
+        write({ chunk: 1 });
+      },
+    });
+
+    await Promise.all([p1.waitUntilComplete(), p2.waitUntilComplete(), p3.waitUntilComplete()]);
+
+    expect(spy).toHaveBeenCalledTimes(1);
+    expect(spy).toHaveBeenCalledWith("session-1", "out", undefined);
+  });
+
+  it("evicts on initialize failure so the next call retries instead of returning a poisoned entry", async () => {
+    stubWaitImpl = undefined;
+    const spy = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("boom"))
+      .mockResolvedValueOnce({ version: "v2", headers: {} });
+    installStubApiClient(spy);
+
+    const channel = new SessionOutputChannel("session-1");
+    const firstAttempt = channel.pipe(emptyStream());
+    // First call fails — the stub swallows the rejection on the
+    // initializeSession callback, but the cache eviction handler still runs.
+    await firstAttempt.waitUntilComplete();
+    // Settle pending microtasks so the .catch() eviction fires.
+    await new Promise<void>((resolve) => setTimeout(resolve, 0));
+
+    const retried = channel.pipe(emptyStream());
+    await retried.waitUntilComplete();
+
+    expect(spy).toHaveBeenCalledTimes(2);
+  });
+
+  it("reset() clears cached entries so the next call re-PUTs", async () => {
+    stubWaitImpl = undefined;
+    const spy = vi.fn(async () => ({ version: "v2", headers: {} }));
+    installStubApiClient(spy);
+
+    const channel = new SessionOutputChannel("session-1");
+    await channel.pipe(emptyStream()).waitUntilComplete();
+    expect(spy).toHaveBeenCalledTimes(1);
+
+    channel.reset();
+
+    await channel.pipe(emptyStream()).waitUntilComplete();
+    expect(spy).toHaveBeenCalledTimes(2);
+  });
+
+  it("scopes the cache per channel instance", async () => {
+    stubWaitImpl = undefined;
+    const spy = vi.fn(async () => ({ version: "v2", headers: {} }));
+    installStubApiClient(spy);
+
+    const channelA = new SessionOutputChannel("session-a");
+    const channelB = new SessionOutputChannel("session-b");
+
+    await Promise.all([
+      channelA.pipe(emptyStream()).waitUntilComplete(),
+      channelB.pipe(emptyStream()).waitUntilComplete(),
+    ]);
+
+    expect(spy).toHaveBeenCalledTimes(2);
+    expect(spy).toHaveBeenCalledWith("session-a", "out", undefined);
+    expect(spy).toHaveBeenCalledWith("session-b", "out", undefined);
+  });
+
+  it("evicts the cache when a writer's wait() rejects (simulated stale-token failure)", async () => {
+    const spy = vi.fn(async () => ({ version: "v2", headers: {} }));
+    installStubApiClient(spy);
+
+    // First writer's wait() rejects (e.g. S2 returned 401 after the cached
+    // token expired mid-process); subsequent writers' wait() resolve cleanly.
+    let waitCallCount = 0;
+    stubWaitImpl = async () => {
+      waitCallCount++;
+      if (waitCallCount === 1) throw new Error("S2 auth failed: token expired");
+      return { lastEventId: undefined };
+    };
+
+    const channel = new SessionOutputChannel("session-1");
+
+    const failed = channel.pipe(emptyStream());
+    await expect(failed.waitUntilComplete()).rejects.toThrow(/token expired/);
+
+    // Settle microtasks so the reactive .catch eviction handler fires.
+    await new Promise<void>((resolve) => setTimeout(resolve, 0));
+
+    const recovered = channel.pipe(emptyStream());
+    await recovered.waitUntilComplete();
+
+    // Cache evicted ⇒ second pipe() re-PUT ⇒ two distinct initialize calls.
+    expect(spy).toHaveBeenCalledTimes(2);
+
+    stubWaitImpl = undefined;
+  });
+});
diff --git a/packages/trigger-sdk/src/v3/sessions.ts b/packages/trigger-sdk/src/v3/sessions.ts
index 663dbbebc30..ea3ebd8d937 100644
--- a/packages/trigger-sdk/src/v3/sessions.ts
+++ b/packages/trigger-sdk/src/v3/sessions.ts
@@ -34,7 +34,11 @@ import {
   trimSessionStream,
   writeSessionControlRecord,
 } from "@trigger.dev/core/v3";
-import type { ControlEvent, StreamWriteResult } from "@trigger.dev/core/v3";
+import type {
+  ControlEvent,
+  InitializeSessionStreamResponseLike,
+  StreamWriteResult,
+} from "@trigger.dev/core/v3";
 import { conditionallyImportAndParsePacket } from "@trigger.dev/core/v3/utils/ioSerialization";
 import { SpanStatusCode } from "@opentelemetry/api";
 import { tracer } from "./tracer.js";
@@ -266,8 +270,30 @@ export type SessionPipeStreamOptions = Omit<PipeStreamOptions, "target">;
  * internally by `pipe`/`writer` — there's no public `initialize()`.
  */
 export class SessionOutputChannel {
+  // Cache of the in-flight / resolved `initializeSessionStream` PUT for
+  // this channel. Every `pipe()` / `writer()` call needs the same S2
+  // credentials, so we share a single promise instead of re-PUTing on
+  // every chunk. Hot-loop writers (per-chunk `chat.response.write` /
+  // direct `session.out.writer` calls) drop from N PUTs to 1 PUT for
+  // the lifetime of the channel. The S2 access token has a 1-day TTL
+  // server-side so reusing it across calls within a single run is safe.
+  // Evicts on failure (so the next call retries) and on `reset()`.
+  #initPromise?: Promise<InitializeSessionStreamResponseLike>;
+
   constructor(public readonly sessionId: string) {}
 
+  /**
+   * Drop the cached `initializeSessionStream` response. Surfaces for
+   * tests and lifecycle hooks that need the next write to re-mint S2
+   * credentials. The cache also self-evicts on `initializeSession`
+   * rejection, so callers don't need to invoke this on failures.
+   *
+   * @internal
+   */
+  reset(): void {
+    this.#initPromise = undefined;
+  }
+
   /**
    * Append a single record. Routes through {@link writer} internally so
    * subscribers receive the same parsed-object shape as multi-record
@@ -425,9 +451,52 @@ export class SessionOutputChannel {
     const readableStreamSource = ensureReadableStream(value);
 
     const abortController = new AbortController();
-    const combinedSignal = options?.signal
-      ? AbortSignal.any?.([options.signal, abortController.signal]) ?? abortController.signal
-      : abortController.signal;
+    // `AbortSignal.any` lands in Node 20.3; the SDK still supports Node
+    // 18.20+. On older runtimes fall back to wiring `options.signal` into
+    // `abortController` manually so caller-driven cancellation propagates.
+    let combinedSignal: AbortSignal = abortController.signal;
+    // Set in the Node 18 fallback path so the caller's `signal.addEventListener`
+    // registration can be cleared once the stream finishes. Without this, a
+    // long-lived caller signal (e.g. one reused across many `writer()` calls)
+    // accumulates listeners on every completed turn.
+    let removeCallerAbortListener: (() => void) | undefined;
+    if (options?.signal) {
+      if (typeof AbortSignal.any === "function") {
+        combinedSignal = AbortSignal.any([options.signal, abortController.signal]);
+      } else {
+        const callerSignal = options.signal;
+        if (callerSignal.aborted) {
+          abortController.abort(callerSignal.reason);
+        } else {
+          const onCallerAbort = () => abortController.abort(callerSignal.reason);
+          callerSignal.addEventListener("abort", onCallerAbort, { once: true });
+          removeCallerAbortListener = () =>
+            callerSignal.removeEventListener("abort", onCallerAbort);
+        }
+      }
+    }
+
+    // Resolve the init promise eagerly so we can capture which one this
+    // writer uses for reactive invalidation below.
+    const writerInitPromise = ((): Promise<InitializeSessionStreamResponseLike> => {
+      if (this.#initPromise) {
+        return this.#initPromise;
+      }
+      const fresh = apiClient.initializeSessionStream(
+        this.sessionId,
+        "out",
+        options?.requestOptions
+      );
+      this.#initPromise = fresh;
+      // Evict on failure so the next call retries instead of returning a
+      // poisoned cache entry forever.
+      fresh.catch((err) => {
+        if (this.#initPromise === fresh) {
+          this.#initPromise = undefined;
+        }
+      });
+      return fresh;
+    })();
 
     try {
       const instance = new SessionStreamInstance<T>({
@@ -438,11 +507,30 @@ export class SessionOutputChannel {
         source: readableStreamSource,
         signal: combinedSignal,
         requestOptions: options?.requestOptions,
+        initializeSession: () => writerInitPromise,
       });
 
-      instance.wait().finally(() => {
-        span.end();
-      });
+      // Single internal chain that handles span lifecycle AND reactive
+      // invalidation. On rejection we evict the cached init promise so
+      // the next pipe()/writer() re-PUTs and recovers (e.g. when a
+      // cached S2 access token expired mid-process). Compare by identity
+      // so a concurrent caller's fresh promise isn't accidentally cleared.
+      // Customer awaiters still observe the rejection via the returned
+      // `waitUntilComplete()`; this chain just keeps the cleanup path
+      // from surfacing as unhandled.
+      instance.wait().then(
+        () => {
+          removeCallerAbortListener?.();
+          span.end();
+        },
+        () => {
+          removeCallerAbortListener?.();
+          if (this.#initPromise === writerInitPromise) {
+            this.#initPromise = undefined;
+          }
+          span.end();
+        }
+      );
 
       return {
         stream: instance.stream,
@@ -451,6 +539,7 @@ export class SessionOutputChannel {
         },
       };
     } catch (error) {
+      removeCallerAbortListener?.();
       if (error instanceof Error && error.name === "AbortError") {
         span.end();
         throw error;
diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts
index 0e6389a053c..b8e1874b5be 100644
--- a/packages/trigger-sdk/src/v3/shared.ts
+++ b/packages/trigger-sdk/src/v3/shared.ts
@@ -2585,7 +2585,8 @@ async function triggerAndSubscribe_internal<TIdentifier extends string, TPayload
             debounce: options?.debounce,
           },
         },
-        {}
+        {},
+        requestOptions
       );
 
       // Set attributes after trigger so the dashboard can link to the child run
diff --git a/packages/trigger-sdk/src/v3/streams.ts b/packages/trigger-sdk/src/v3/streams.ts
index 6ccaea8891a..f987872d80a 100644
--- a/packages/trigger-sdk/src/v3/streams.ts
+++ b/packages/trigger-sdk/src/v3/streams.ts
@@ -19,6 +19,7 @@ import {
   ManualWaitpointPromise,
   WaitpointTimeoutError,
   runtime,
+  logger,
   type RealtimeDefinedInputStream,
   type InputStreamSubscription,
   type InputStreamOnceOptions,
@@ -32,10 +33,43 @@ import {
 } from "@trigger.dev/core/v3";
 import { conditionallyImportAndParsePacket } from "@trigger.dev/core/v3/utils/ioSerialization";
 import { tracer } from "./tracer.js";
+import { locals } from "./locals.js";
 import { SpanStatusCode } from "@opentelemetry/api";
 
 const DEFAULT_STREAM_KEY = "default";
 
+// `chat.agent` sets this once at the top of every run via
+// `markChatAgentRunForStreamsWarning`. The flag lives on the run's
+// AsyncLocalStorage frame, so it naturally resets between runs and stays
+// invisible to subtasks (where `streams.*` is a normal API).
+const inChatAgentRunKey = locals.create<boolean>("streams.inChatAgentRun");
+// Once-per-run dedup. `streams.*` callers inside a chat.agent run get the
+// nudge on the first call and silence afterwards; a single tight loop
+// won't spam the logs.
+const chatAgentStreamsWarnedKey = locals.create<boolean>("streams.chatAgentWarned");
+
+/**
+ * Marks the current run as a `chat.agent` run so subsequent `streams.pipe` /
+ * `streams.append` / `streams.read` calls can warn the user that they're
+ * writing to a run-scoped stream rather than the chat's `session.out`.
+ *
+ * Called from inside the `chat.agent` task wrapper at the top of every run.
+ *
+ * @internal
+ */
+export function markChatAgentRunForStreamsWarning(): void {
+  locals.set(inChatAgentRunKey, true);
+}
+
+function warnIfChatAgentStreamsMisuse(method: "pipe" | "append" | "read" | "writer"): void {
+  if (!locals.get(inChatAgentRunKey)) return;
+  if (locals.get(chatAgentStreamsWarnedKey)) return;
+  locals.set(chatAgentStreamsWarnedKey, true);
+  logger.warn(
+    `streams.${method}() was called inside a chat.agent run. This writes to a run-scoped realtime stream and is NOT visible on the chat session, so the chat UI will not see these chunks. For chat output use chat.response.write() or chat.stream.* instead. See https://trigger.dev/docs/ai-chat/patterns/large-payloads. (Logged once per run; subsequent streams.${method}() calls in this run are silent.)`
+  );
+}
+
 /**
  * Pipes data to a realtime stream using the default stream key (`"default"`).
  *
@@ -154,6 +188,7 @@ function pipeInternal<T>(
   opts: PipeStreamOptions | undefined,
   spanName: string
 ): PipeStreamResult<T> {
+  warnIfChatAgentStreamsMisuse(spanName === "streams.writer()" ? "writer" : "pipe");
   const runId = getRunIdForOptions(opts);
 
   if (!runId) {
@@ -325,6 +360,7 @@ async function readStreamImpl<T>(
   key: string,
   options?: ReadStreamOptions
 ): Promise<AsyncIterableStream<T>> {
+  warnIfChatAgentStreamsMisuse("read");
   const apiClient = apiClientManager.clientOrThrow();
 
   const span = tracer.startSpan("streams.read()", {
@@ -403,6 +439,7 @@ async function appendInternal<TPart extends BodyInit>(
   part: TPart,
   options?: AppendStreamOptions
 ): Promise<void> {
+  warnIfChatAgentStreamsMisuse("append");
   const runId = getRunIdForOptions(options);
 
   if (!runId) {
diff --git a/packages/trigger-sdk/src/v3/test/mock-chat-agent.ts b/packages/trigger-sdk/src/v3/test/mock-chat-agent.ts
index 32fdba57cd8..fbcc166d14a 100644
--- a/packages/trigger-sdk/src/v3/test/mock-chat-agent.ts
+++ b/packages/trigger-sdk/src/v3/test/mock-chat-agent.ts
@@ -11,6 +11,7 @@ import {
 } from "../sessions.js";
 import {
   __setReadChatSnapshotImplForTests,
+  __setReplaySessionInTailImplForTests,
   __setReplaySessionOutTailImplForTests,
   __setWriteChatSnapshotImplForTests,
   type ChatSnapshotV1,
@@ -238,6 +239,27 @@ export type MockChatAgentHarness = {
    */
   seedSessionOutTail(chunks?: UIMessageChunk[]): void;
 
+  /**
+   * Pre-seed a trailing partial assistant message for the next boot's
+   * replay. The runtime's `replaySessionOutTail` returns this as the
+   * `partial` field (alongside whatever `seedSessionOutTail` reduces
+   * to). Use to simulate cancel-mid-stream: an assistant message whose
+   * `finish` chunk never arrived. Pass `undefined` to clear.
+   *
+   * Effective on the next run boot only.
+   */
+  seedSessionOutPartial(partial: UIMessage | undefined): void;
+
+  /**
+   * Pre-seed user messages on the `session.in` tail for the next boot's
+   * replay. Each message is paired with a synthetic seq_num (`i + 1`).
+   * Used to simulate in-flight users the dead predecessor was supposed
+   * to process. Pass `[]` to clear.
+   *
+   * Effective on the next run boot only.
+   */
+  seedSessionInTail(messages: UIMessage[]): void;
+
   /**
    * The most recently written snapshot, or `undefined` if no snapshot
    * has been written yet. Updated each time `writeChatSnapshot` is
@@ -373,6 +395,8 @@ export function mockChatAgent(
   let seededSnapshot: ChatSnapshotV1 | undefined = options.snapshot;
   let lastWrittenSnapshot: ChatSnapshotV1 | undefined;
   let seededReplayChunks: UIMessageChunk[] = [];
+  let seededReplayPartial: UIMessage | undefined;
+  let seededSessionInMessages: UIMessage[] = [];
 
   __setReadChatSnapshotImplForTests(<T extends UIMessage>(_id: string) => {
     return seededSnapshot as ChatSnapshotV1<T> | undefined;
@@ -382,11 +406,37 @@ export function mockChatAgent(
   });
 
   // Replay override: install a default that returns whatever
-  // `seededReplayChunks` reduces to. Cleared in the same `finally` block
-  // as the other test overrides.
+  // `seededReplayChunks` reduces to. `mockChatAgent` doesn't model the
+  // settled-vs-partial split — seeded chunks always reduce to the
+  // `settled` array with `partial: undefined`. Recovery-specific
+  // tests can install their own override to seed a partial.
+  // Cleared in the same `finally` block as the other test overrides.
   __setReplaySessionOutTailImplForTests(async () => {
-    if (seededReplayChunks.length === 0) return [];
-    return (await reduceChunksToMessages(seededReplayChunks)) as never;
+    const settled =
+      seededReplayChunks.length === 0
+        ? []
+        : ((await reduceChunksToMessages(seededReplayChunks)) as unknown[]);
+    // For the mock harness, `partialRaw` is the same as `partial` — we
+    // don't model cleanupAbortedParts separately. Recovery tests that
+    // need a partialRaw distinct from partial install their own stub.
+    return {
+      settled,
+      partial: seededReplayPartial,
+      partialRaw: seededReplayPartial,
+    } as never;
+  });
+
+  // session.in tail override: each seeded UIMessage becomes a
+  // { message, metadata: undefined, seqNum: i+1 } entry. Mirrors the
+  // seq-num pattern from the out-tail stub so cursor-advance logic is
+  // exercised correctly. `metadata` is `undefined` for seeded users —
+  // the boot path falls back to `payload.metadata` for those.
+  __setReplaySessionInTailImplForTests(async () => {
+    return seededSessionInMessages.map((message, i) => ({
+      message,
+      metadata: undefined,
+      seqNum: i + 1,
+    })) as never;
   });
 
   // Install the session open override so `sessions.open(id)` returns a
@@ -521,6 +571,7 @@ export function mockChatAgent(
       __setReadChatSnapshotImplForTests(undefined);
       __setWriteChatSnapshotImplForTests(undefined);
       __setReplaySessionOutTailImplForTests(undefined);
+      __setReplaySessionInTailImplForTests(undefined);
     });
 
   const sendPayloadAndWait = async (
@@ -616,6 +667,14 @@ export function mockChatAgent(
       seededReplayChunks = chunks ?? [];
     },
 
+    seedSessionOutPartial(partial) {
+      seededReplayPartial = partial;
+    },
+
+    seedSessionInTail(messages) {
+      seededSessionInMessages = messages;
+    },
+
     getSnapshot() {
       return lastWrittenSnapshot;
     },
diff --git a/packages/trigger-sdk/test/chat-snapshot.test.ts b/packages/trigger-sdk/test/chat-snapshot.test.ts
index cac3364639b..85e1fe8adef 100644
--- a/packages/trigger-sdk/test/chat-snapshot.test.ts
+++ b/packages/trigger-sdk/test/chat-snapshot.test.ts
@@ -34,28 +34,27 @@ function buildSnapshot(count = 1): ChatSnapshotV1 {
 
 /**
  * Stub `apiClientManager.clientOrThrow()` so the helpers see a fake API
- * client whose `getPayloadUrl` / `createUploadPayloadUrl` resolve with the
- * presigned URLs the test wants. Returns spies for assertion.
+ * client whose `getChatSnapshotUrl` / `createChatSnapshotUploadUrl` resolve
+ * with the presigned URLs the test wants. Returns spies for assertion.
  */
 function stubApiClient(opts: {
-  getPayloadUrl?: (filename: string) => Promise<{ presignedUrl: string }>;
-  createUploadPayloadUrl?: (filename: string) => Promise<{ presignedUrl: string }>;
+  getChatSnapshotUrl?: (sessionId: string) => Promise<{ presignedUrl: string }>;
+  createChatSnapshotUploadUrl?: (sessionId: string) => Promise<{ presignedUrl: string }>;
 }) {
-  const getPayloadUrl = vi.fn(
-    opts.getPayloadUrl ?? (async (_filename: string) => ({ presignedUrl: "https://example.invalid/get" }))
+  const getChatSnapshotUrl = vi.fn(
+    opts.getChatSnapshotUrl ??
+      (async (_sessionId: string) => ({ presignedUrl: "https://example.invalid/get" }))
   );
-  const createUploadPayloadUrl = vi.fn(
-    opts.createUploadPayloadUrl ??
-      (async (_filename: string) => ({ presignedUrl: "https://example.invalid/put" }))
+  const createChatSnapshotUploadUrl = vi.fn(
+    opts.createChatSnapshotUploadUrl ??
+      (async (_sessionId: string) => ({ presignedUrl: "https://example.invalid/put" }))
   );
   const fakeClient = {
-    getPayloadUrl,
-    createUploadPayloadUrl,
+    getChatSnapshotUrl,
+    createChatSnapshotUploadUrl,
   };
-  vi.spyOn(apiClientManager, "clientOrThrow").mockReturnValue(
-    fakeClient as never
-  );
-  return { getPayloadUrl, createUploadPayloadUrl };
+  vi.spyOn(apiClientManager, "clientOrThrow").mockReturnValue(fakeClient as never);
+  return { getChatSnapshotUrl, createChatSnapshotUploadUrl };
 }
 
 /**
@@ -87,7 +86,7 @@ describe("chat snapshot helpers", () => {
 
   describe("readChatSnapshot", () => {
     it("returns the snapshot on a successful GET", async () => {
-      const { getPayloadUrl } = stubApiClient({});
+      const { getChatSnapshotUrl } = stubApiClient({});
       const snapshot = buildSnapshot(2);
       stubFetch(async () =>
         new Response(JSON.stringify(snapshot), {
@@ -97,7 +96,7 @@ describe("chat snapshot helpers", () => {
       );
 
       const result = await readChatSnapshot("session-1");
-      expect(getPayloadUrl).toHaveBeenCalledWith("sessions/session-1/snapshot.json");
+      expect(getChatSnapshotUrl).toHaveBeenCalledWith("session-1");
       expect(result).toMatchObject({
         version: 1,
         messages: snapshot.messages,
@@ -177,7 +176,7 @@ describe("chat snapshot helpers", () => {
 
     it("returns undefined when presign call fails", async () => {
       stubApiClient({
-        getPayloadUrl: async () => {
+        getChatSnapshotUrl: async () => {
           throw new Error("presign denied");
         },
       });
@@ -202,13 +201,13 @@ describe("chat snapshot helpers", () => {
 
   describe("writeChatSnapshot", () => {
     it("PUTs the snapshot JSON to the presigned URL", async () => {
-      const { createUploadPayloadUrl } = stubApiClient({});
+      const { createChatSnapshotUploadUrl } = stubApiClient({});
       const fetchSpy = stubFetch(async () => new Response(null, { status: 200 }));
 
       const snapshot = buildSnapshot(3);
       await writeChatSnapshot("session-2", snapshot);
 
-      expect(createUploadPayloadUrl).toHaveBeenCalledWith("sessions/session-2/snapshot.json");
+      expect(createChatSnapshotUploadUrl).toHaveBeenCalledWith("session-2");
       expect(fetchSpy).toHaveBeenCalledOnce();
       const [url, init] = fetchSpy.mock.calls[0]!;
       expect(url).toBe("https://example.invalid/put");
@@ -239,7 +238,7 @@ describe("chat snapshot helpers", () => {
 
     it("returns without throwing when presign fails (warns)", async () => {
       stubApiClient({
-        createUploadPayloadUrl: async () => {
+        createChatSnapshotUploadUrl: async () => {
           throw new Error("presign denied");
         },
       });
@@ -250,29 +249,28 @@ describe("chat snapshot helpers", () => {
       expect(fetchSpy).not.toHaveBeenCalled();
     });
 
-    it("uses the same `snapshotFilename(sessionId)` convention as the read path", async () => {
-      // Round-trip check: read and write target the same key for a given
-      // sessionId. The runtime relies on this to make read-after-write
-      // coherent on subsequent boots.
-      const { getPayloadUrl } = stubApiClient({
-        getPayloadUrl: async () => ({ presignedUrl: "https://example.invalid/get" }),
+    it("addresses reads and writes by the same sessionId", async () => {
+      // Round-trip check: both presign methods receive the same sessionId.
+      // The canonical key (`sessions/{id}/snapshot.json`) lives server-side
+      // now, so the SDK has no key string to compare — sessionId equality
+      // is the SDK-visible invariant.
+      const { getChatSnapshotUrl } = stubApiClient({
+        getChatSnapshotUrl: async () => ({ presignedUrl: "https://example.invalid/get" }),
       });
       stubFetch(async () => new Response(null, { status: 404 }));
 
-      // Trigger a read.
       await readChatSnapshot("round-trip-session");
-      const [readKey] = getPayloadUrl.mock.calls[0]!;
+      const [readArg] = getChatSnapshotUrl.mock.calls[0]!;
 
-      // Trigger a write to the same session.
-      const { createUploadPayloadUrl } = stubApiClient({
-        createUploadPayloadUrl: async () => ({ presignedUrl: "https://example.invalid/put" }),
+      const { createChatSnapshotUploadUrl } = stubApiClient({
+        createChatSnapshotUploadUrl: async () => ({ presignedUrl: "https://example.invalid/put" }),
       });
       stubFetch(async () => new Response(null, { status: 200 }));
       await writeChatSnapshot("round-trip-session", buildSnapshot());
-      const [writeKey] = createUploadPayloadUrl.mock.calls[0]!;
+      const [writeArg] = createChatSnapshotUploadUrl.mock.calls[0]!;
 
-      expect(readKey).toBe(writeKey);
-      expect(readKey).toBe("sessions/round-trip-session/snapshot.json");
+      expect(readArg).toBe(writeArg);
+      expect(readArg).toBe("round-trip-session");
     });
   });
 });
diff --git a/packages/trigger-sdk/test/recovery-boot.test.ts b/packages/trigger-sdk/test/recovery-boot.test.ts
new file mode 100644
index 00000000000..5d7b4cd2213
--- /dev/null
+++ b/packages/trigger-sdk/test/recovery-boot.test.ts
@@ -0,0 +1,483 @@
+// Import the test harness FIRST — installs the resource catalog so
+// `chat.agent()` calls register their task functions correctly.
+import { mockChatAgent } from "../src/v3/test/index.js";
+
+import { describe, expect, it, vi } from "vitest";
+import { chat } from "../src/v3/ai.js";
+import type { RecoveryBootEvent, RecoveryBootResult } from "../src/v3/ai.js";
+import { __setReplaySessionOutTailImplForTests } from "../src/v3/ai.js";
+import { simulateReadableStream, streamText } from "ai";
+import { MockLanguageModelV3 } from "ai/test";
+import type { LanguageModelV3StreamPart } from "@ai-sdk/provider";
+
+// ── Helpers ────────────────────────────────────────────────────────────
+
+function userMessage(text: string, id = "u-" + Math.random().toString(36).slice(2)) {
+  return {
+    id,
+    role: "user" as const,
+    parts: [{ type: "text" as const, text }],
+  };
+}
+
+function assistantMessage(text: string, id = "a-" + Math.random().toString(36).slice(2)) {
+  return {
+    id,
+    role: "assistant" as const,
+    parts: [{ type: "text" as const, text }],
+  };
+}
+
+function partialAssistantWithToolCall(id: string, toolCallId: string, toolName: string) {
+  return {
+    id,
+    role: "assistant" as const,
+    parts: [
+      {
+        type: `tool-${toolName}` as const,
+        toolCallId,
+        state: "input-available" as const,
+        input: { q: "search" },
+      },
+    ],
+  } as unknown as ReturnType<typeof assistantMessage>;
+}
+
+function textStream(text: string) {
+  const chunks: LanguageModelV3StreamPart[] = [
+    { type: "text-start", id: "t1" },
+    { type: "text-delta", id: "t1", delta: text },
+    { type: "text-end", id: "t1" },
+    {
+      type: "finish",
+      finishReason: { unified: "stop", raw: "stop" },
+      usage: {
+        inputTokens: { total: 10, noCache: 10, cacheRead: undefined, cacheWrite: undefined },
+        outputTokens: { total: 10, text: 10, reasoning: undefined },
+      },
+    },
+  ];
+  return simulateReadableStream({ chunks });
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────
+
+describe("onRecoveryBoot — chat.agent recovery hook", () => {
+  it("does NOT fire on a clean continuation with no recovered state", async () => {
+    const onRecoveryBoot = vi.fn();
+    const model = new MockLanguageModelV3({
+      doStream: async () => ({ stream: textStream("ok") }),
+    });
+    const agent = chat.agent({
+      id: "recovery-boot.no-state",
+      onRecoveryBoot,
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "no-state",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    try {
+      // Snapshot is empty, no in-flight users, no partial — guard
+      // (partialAssistant !== undefined || inFlightUsers.length > 0) is false.
+      await harness.sendMessage(userMessage("fresh message"));
+      await new Promise((r) => setTimeout(r, 20));
+      expect(onRecoveryBoot).not.toHaveBeenCalled();
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("fires when there's a partial assistant and surfaces it on the ctx", async () => {
+    const captured: { event?: RecoveryBootEvent<ReturnType<typeof userMessage>> } = {};
+    const model = new MockLanguageModelV3({
+      doStream: async () => ({ stream: textStream("recovered") }),
+    });
+    const partial = partialAssistantWithToolCall("a-orphan", "tc-1", "search");
+    const agent = chat.agent({
+      id: "recovery-boot.partial-fires-hook",
+      onRecoveryBoot: async (event) => {
+        captured.event = event as never;
+        return {};
+      },
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "partial-fires-hook",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionOutPartial(partial as never);
+    try {
+      await harness.sendMessage(userMessage("next user message"));
+      await new Promise((r) => setTimeout(r, 20));
+      expect(captured.event).toBeDefined();
+      expect(captured.event!.partialAssistant?.id).toBe("a-orphan");
+      expect(captured.event!.pendingToolCalls).toHaveLength(1);
+      expect(captured.event!.pendingToolCalls[0]!.toolCallId).toBe("tc-1");
+      expect(captured.event!.pendingToolCalls[0]!.toolName).toBe("search");
+      expect(captured.event!.previousRunId).toBe("run_prior");
+      expect(captured.event!.cause).toBe("unknown");
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("pendingToolCalls is extracted from the RAW partial (pre-cleanupAbortedParts)", async () => {
+    // Real-world scenario: cancel-mid-tool-call. Session.out has tool-call
+    // chunks but the tool never returned. cleanupAbortedParts strips the
+    // input-available tool part from the partial used for the chain (you
+    // don't want orphan tool calls poisoning the model context), but
+    // `pendingToolCalls` should still surface what was happening.
+    const cleanedPartial = {
+      id: "a-orphan",
+      role: "assistant" as const,
+      parts: [{ type: "text" as const, text: "Let me look that up" }],
+    };
+    const rawPartial = {
+      id: "a-orphan",
+      role: "assistant" as const,
+      parts: [
+        { type: "text" as const, text: "Let me look that up" },
+        {
+          type: "tool-search" as const,
+          toolCallId: "tc-pending",
+          state: "input-available" as const,
+          input: { q: "vietnamese pho" },
+        },
+      ],
+    } as unknown as typeof cleanedPartial;
+
+    const captured: { event?: RecoveryBootEvent } = {};
+    const model = new MockLanguageModelV3({
+      doStream: async () => ({ stream: textStream("ok") }),
+    });
+    const u1 = userMessage("buffered", "u-1");
+    const agent = chat.agent({
+      id: "recovery-boot.pending-tool-from-raw",
+      onRecoveryBoot: async (event) => {
+        captured.event = event;
+        return {};
+      },
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "pending-tool-from-raw",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionInTail([u1 as never]);
+    // Install AFTER mockChatAgent — its constructor sets its own default
+    // override that we want to replace for this test.
+    __setReplaySessionOutTailImplForTests(async () =>
+      ({
+        settled: [],
+        partial: cleanedPartial,
+        partialRaw: rawPartial,
+      }) as never
+    );
+    try {
+      await new Promise((r) => setTimeout(r, 50));
+      expect(captured.event).toBeDefined();
+      // Cleaned partial → chain (no input-available tool part)
+      expect(captured.event!.partialAssistant?.parts).toHaveLength(1);
+      // pendingToolCalls → from raw (input-available tool part visible)
+      expect(captured.event!.pendingToolCalls).toHaveLength(1);
+      expect(captured.event!.pendingToolCalls[0]!.toolCallId).toBe("tc-pending");
+      expect(captured.event!.pendingToolCalls[0]!.toolName).toBe("search");
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("does NOT fire when there are in-flight users but no partial (graceful exit path)", async () => {
+    // chat.requestUpgrade(), chat.endRun() before processing, and similar
+    // graceful exits leave an unacknowledged user on session.in but no
+    // partial assistant on session.out. That's not recovery — the next
+    // run just dispatches the message normally.
+    const onRecoveryBoot = vi.fn();
+    const model = new MockLanguageModelV3({
+      doStream: async () => ({ stream: textStream("ok") }),
+    });
+    const u1 = userMessage("buffered while dead", "u-buffered");
+    const agent = chat.agent({
+      id: "recovery-boot.inflight-users-no-partial",
+      onRecoveryBoot,
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "inflight-users-no-partial",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionInTail([u1 as never]);
+    try {
+      await new Promise((r) => setTimeout(r, 50));
+      expect(onRecoveryBoot).not.toHaveBeenCalled();
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("default behavior re-dispatches each in-flight user as a turn", async () => {
+    let turnCount = 0;
+    const model = new MockLanguageModelV3({
+      doStream: async () => {
+        turnCount++;
+        return { stream: textStream(`reply ${turnCount}`) };
+      },
+    });
+    const u1 = userMessage("first buffered", "u-1");
+    const u2 = userMessage("second buffered", "u-2");
+    const agent = chat.agent({
+      id: "recovery-boot.default-dispatch",
+      // NO onRecoveryBoot — exercise the default path
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "default-dispatch",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionInTail([u1 as never, u2 as never]);
+    try {
+      await new Promise((r) => setTimeout(r, 100));
+      expect(turnCount).toBe(2);
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("smart default: partial + first user spliced into chain, rest dispatched", async () => {
+    let observedChain: Array<{ role: string; idHead: string }> = [];
+    let turnCount = 0;
+    const model = new MockLanguageModelV3({
+      doStream: async () => {
+        turnCount++;
+        return { stream: textStream("ok") };
+      },
+    });
+    const partial = assistantMessage("partial answer in progress", "a-partial");
+    const u1 = userMessage("original question", "u-1");
+    const u2 = userMessage("follow-up", "u-2");
+    const agent = chat.agent({
+      id: "recovery-boot.smart-default",
+      // NO onRecoveryBoot — exercise the smart default
+      onTurnStart: async ({ uiMessages }) => {
+        if (turnCount === 0) {
+          observedChain = uiMessages.map((m) => ({
+            role: m.role,
+            idHead: m.id.slice(0, 10),
+          }));
+        }
+      },
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "smart-default",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionOutPartial(partial as never);
+    harness.seedSessionInTail([u1 as never, u2 as never]);
+    try {
+      await new Promise((r) => setTimeout(r, 100));
+      // Turn 1 fires with the follow-up user (u2). Its chain should
+      // include [u1 (original), a-partial, u2 (follow-up)].
+      expect(turnCount).toBe(1);
+      expect(observedChain.map((m) => m.role)).toEqual([
+        "user",
+        "assistant",
+        "user",
+      ]);
+      expect(observedChain[0]!.idHead).toBe("u-1");
+      expect(observedChain[1]!.idHead).toBe("a-partial");
+      expect(observedChain[2]!.idHead).toBe("u-2");
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("hook's recoveredTurns: [] suppresses re-dispatch of in-flight users", async () => {
+    let turnCount = 0;
+    const model = new MockLanguageModelV3({
+      doStream: async () => {
+        turnCount++;
+        return { stream: textStream(`reply ${turnCount}`) };
+      },
+    });
+    const partial = assistantMessage("partial answer", "a-partial");
+    const u1 = userMessage("buffered", "u-1");
+    const agent = chat.agent({
+      id: "recovery-boot.suppress-dispatch",
+      onRecoveryBoot: async (): Promise<RecoveryBootResult> => ({ recoveredTurns: [] }),
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "suppress-dispatch",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionOutPartial(partial as never);
+    harness.seedSessionInTail([u1 as never]);
+    try {
+      // No turn should fire from the boot-injected queue.
+      // Send a fresh user message to confirm the agent is alive.
+      await harness.sendMessage(userMessage("real next message"));
+      await new Promise((r) => setTimeout(r, 20));
+      expect(turnCount).toBe(1); // only the explicit sendMessage turn
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("hook's chain override seeds the accumulator", async () => {
+    const model = new MockLanguageModelV3({
+      doStream: async () => ({ stream: textStream("acked") }),
+    });
+    const custom = assistantMessage("custom-recovered-history", "a-custom");
+    const partial = assistantMessage("partial", "a-partial");
+    const u1 = userMessage("buffered", "u-1");
+    let observedMessageCount = 0;
+    const agent = chat.agent({
+      id: "recovery-boot.chain-override",
+      onRecoveryBoot: async (): Promise<RecoveryBootResult> => ({
+        chain: [custom as never],
+        recoveredTurns: [u1 as never],
+      }),
+      onTurnStart: async ({ uiMessages }) => {
+        observedMessageCount = uiMessages.length;
+      },
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "chain-override",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionOutPartial(partial as never);
+    harness.seedSessionInTail([u1 as never]);
+    try {
+      await new Promise((r) => setTimeout(r, 50));
+      // Chain seeded with [custom] before the recovered user message
+      // arrives — onTurnStart sees [custom, u1] when the first
+      // recovered turn fires.
+      expect(observedMessageCount).toBe(2);
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("does NOT fire when hydrateMessages is registered", async () => {
+    const onRecoveryBoot = vi.fn();
+    const model = new MockLanguageModelV3({
+      doStream: async () => ({ stream: textStream("ok") }),
+    });
+    const u1 = userMessage("buffered", "u-1");
+    const agent = chat.agent({
+      id: "recovery-boot.hydrate-skips",
+      hydrateMessages: async ({ incomingMessages }) => incomingMessages,
+      onRecoveryBoot,
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "hydrate-skips",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionInTail([u1 as never]);
+    try {
+      await harness.sendMessage(userMessage("fresh"));
+      await new Promise((r) => setTimeout(r, 20));
+      expect(onRecoveryBoot).not.toHaveBeenCalled();
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("beforeBoot runs before the first recovered turn fires", async () => {
+    const order: string[] = [];
+    const model = new MockLanguageModelV3({
+      doStream: async () => {
+        order.push("turn");
+        return { stream: textStream("ok") };
+      },
+    });
+    const partial = assistantMessage("partial", "a-partial");
+    const u1 = userMessage("buffered original", "u-1");
+    const u2 = userMessage("followup", "u-2");
+    const agent = chat.agent({
+      id: "recovery-boot.before-boot",
+      onRecoveryBoot: async (): Promise<RecoveryBootResult> => ({
+        beforeBoot: async () => {
+          order.push("beforeBoot");
+        },
+      }),
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "before-boot",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionOutPartial(partial as never);
+    // Two users — smart default consumes u1 into the chain, leaves u2 for dispatch
+    harness.seedSessionInTail([u1 as never, u2 as never]);
+    try {
+      await new Promise((r) => setTimeout(r, 50));
+      expect(order).toEqual(["beforeBoot", "turn"]);
+    } finally {
+      await harness.close();
+    }
+  });
+
+  it("hook throwing falls back to defaults without sinking the run", async () => {
+    let turnCount = 0;
+    const model = new MockLanguageModelV3({
+      doStream: async () => {
+        turnCount++;
+        return { stream: textStream("ok") };
+      },
+    });
+    const partial = assistantMessage("partial", "a-partial");
+    const u1 = userMessage("buffered original", "u-1");
+    const u2 = userMessage("followup", "u-2");
+    const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+    const agent = chat.agent({
+      id: "recovery-boot.hook-throws",
+      onRecoveryBoot: async () => {
+        throw new Error("kaboom");
+      },
+      run: async ({ messages, signal }) =>
+        streamText({ model, messages, abortSignal: signal }),
+    });
+    const harness = mockChatAgent(agent, {
+      chatId: "hook-throws",
+      continuation: true,
+      previousRunId: "run_prior",
+    });
+    harness.seedSessionOutPartial(partial as never);
+    // Two users so smart default leaves u2 to dispatch (u1 spliced into chain)
+    harness.seedSessionInTail([u1 as never, u2 as never]);
+    try {
+      await new Promise((r) => setTimeout(r, 100));
+      // Default behavior: the in-flight user is re-dispatched as a turn
+      // even though the hook threw.
+      expect(turnCount).toBe(1);
+    } finally {
+      await harness.close();
+      warnSpy.mockRestore();
+    }
+  });
+});
diff --git a/packages/trigger-sdk/test/replay-session-in.test.ts b/packages/trigger-sdk/test/replay-session-in.test.ts
new file mode 100644
index 00000000000..92a1cb6f97c
--- /dev/null
+++ b/packages/trigger-sdk/test/replay-session-in.test.ts
@@ -0,0 +1,137 @@
+// Import the test entry point first so the resource catalog is installed.
+import "../src/v3/test/index.js";
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { apiClientManager } from "@trigger.dev/core/v3";
+import { __replaySessionInTailProductionPathForTests as replaySessionInTail } from "../src/v3/ai.js";
+
+// ── Helpers ────────────────────────────────────────────────────────────
+
+function userMessage(id: string, text: string) {
+  return {
+    id,
+    role: "user" as const,
+    parts: [{ type: "text" as const, text }],
+  };
+}
+
+function stubReadRecords(chunks: unknown[]) {
+  const records = chunks.map((chunk, i) => ({
+    data: chunk,
+    id: `evt-${i + 1}`,
+    seqNum: i + 1,
+  }));
+  const spy = vi.fn(async () => ({ records }));
+  vi.spyOn(apiClientManager, "clientOrThrow").mockReturnValue({
+    readSessionStreamRecords: spy,
+  } as never);
+  return spy;
+}
+
+beforeEach(() => {
+  vi.restoreAllMocks();
+});
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+// ── Tests ──────────────────────────────────────────────────────────────
+
+describe("replaySessionInTail", () => {
+  it("extracts user messages from kind: 'message' records with submit-message trigger", async () => {
+    const u1 = userMessage("u-1", "hello");
+    const u2 = userMessage("u-2", "again");
+    stubReadRecords([
+      {
+        kind: "message",
+        payload: { chatId: "c1", trigger: "submit-message", message: u1, metadata: { userId: "a" } },
+      },
+      {
+        kind: "message",
+        payload: { chatId: "c1", trigger: "submit-message", message: u2, metadata: { userId: "b" } },
+      },
+    ]);
+
+    const result = await replaySessionInTail("sess");
+    expect(result).toHaveLength(2);
+    expect(result[0]!.message.id).toBe("u-1");
+    expect(result[0]!.seqNum).toBe(1);
+    expect(result[0]!.metadata).toEqual({ userId: "a" });
+    expect(result[1]!.message.id).toBe("u-2");
+    expect(result[1]!.seqNum).toBe(2);
+    expect(result[1]!.metadata).toEqual({ userId: "b" });
+  });
+
+  it("ignores non-message variants (stop, handover, handover-skip)", async () => {
+    const u1 = userMessage("u-1", "real user");
+    stubReadRecords([
+      { kind: "stop", message: "user stopped" },
+      { kind: "handover-skip" },
+      { kind: "handover", partialAssistantMessage: [], isFinal: false },
+      { kind: "message", payload: { chatId: "c1", trigger: "submit-message", message: u1 } },
+    ]);
+
+    const result = await replaySessionInTail("sess");
+    expect(result).toHaveLength(1);
+    expect(result[0]!.message.id).toBe("u-1");
+  });
+
+  it("ignores message records that aren't submit-message", async () => {
+    // regenerate-message / preload / close / action / handover-prepare don't
+    // carry a user message — the chain reconstruction must skip them.
+    stubReadRecords([
+      { kind: "message", payload: { chatId: "c1", trigger: "regenerate-message" } },
+      { kind: "message", payload: { chatId: "c1", trigger: "preload" } },
+      { kind: "message", payload: { chatId: "c1", trigger: "close" } },
+      { kind: "message", payload: { chatId: "c1", trigger: "action", action: { foo: 1 } } },
+    ]);
+
+    const result = await replaySessionInTail("sess");
+    expect(result).toHaveLength(0);
+  });
+
+  it("ignores records whose payload is missing or empty", async () => {
+    stubReadRecords([
+      { kind: "message" }, // no payload
+      { kind: "message", payload: { chatId: "c1", trigger: "submit-message" } }, // no message
+      { kind: "message", payload: { chatId: "c1", trigger: "submit-message", message: null } },
+      {
+        kind: "message",
+        payload: { chatId: "c1", trigger: "submit-message", message: "not-an-object" },
+      },
+    ]);
+
+    const result = await replaySessionInTail("sess");
+    expect(result).toHaveLength(0);
+  });
+
+  it("skips non-object record data defensively", async () => {
+    const u1 = userMessage("u-1", "valid");
+    stubReadRecords([
+      42,
+      null,
+      "string-data",
+      { kind: "message", payload: { chatId: "c1", trigger: "submit-message", message: u1 } },
+    ]);
+
+    const result = await replaySessionInTail("sess");
+    expect(result).toHaveLength(1);
+    expect(result[0]!.message.id).toBe("u-1");
+  });
+
+  it("passes the afterEventId cursor through to readSessionStreamRecords", async () => {
+    const spy = stubReadRecords([]);
+
+    await replaySessionInTail("sess", { lastEventId: "evt-42" });
+
+    expect(spy).toHaveBeenCalledWith("sess", "in", { afterEventId: "evt-42" });
+  });
+
+  it("returns an empty list when the records endpoint returns no records", async () => {
+    stubReadRecords([]);
+
+    const result = await replaySessionInTail("sess");
+    expect(result).toEqual([]);
+  });
+});
diff --git a/packages/trigger-sdk/test/replay-session-out.test.ts b/packages/trigger-sdk/test/replay-session-out.test.ts
index 802f4ff0c41..ed78ecec146 100644
--- a/packages/trigger-sdk/test/replay-session-out.test.ts
+++ b/packages/trigger-sdk/test/replay-session-out.test.ts
@@ -40,18 +40,19 @@ function partialTurn(id: string, text: string): UIMessageChunk[] {
 /**
  * Stub `apiClientManager.clientOrThrow().readSessionStreamRecords` so the
  * helper sees a `{ records: StreamRecord[] }` response. Each StreamRecord
- * is `{ data: string, id, seqNum }` — `data` is the JSON-encoded chunk
- * body the runtime then `JSON.parse`s.
+ * is `{ data, id, seqNum }` — `data` is the parsed chunk OBJECT (the wire
+ * writer puts chunks directly into the record envelope; the route
+ * forwards them as-is; the schema declares `data: z.unknown()`).
  *
- * Pass either a `UIMessageChunk` (will be JSON.stringify'd) or a raw
- * string (used as `data` directly — for tests that need pre-stringified
- * or deliberately-malformed bodies).
+ * Pass either a chunk OBJECT (used as `data` directly) or a string
+ * (used as `data` directly — for tests that need deliberately-malformed
+ * bodies; the consumer filters non-objects out).
  *
  * Captures the `afterEventId` argument for resume-from-cursor assertions.
  */
 function stubReadRecordsWithChunks(chunks: unknown[]) {
   const records = chunks.map((chunk, i) => ({
-    data: typeof chunk === "string" ? chunk : JSON.stringify(chunk),
+    data: chunk,
     id: `evt-${i + 1}`,
     seqNum: i + 1,
   }));
@@ -228,30 +229,12 @@ describe("replaySessionOutTail", () => {
     expect(text).toBe("fully-finished");
   });
 
-  it("JSON-decodes each record.data (every record arrives pre-serialized)", async () => {
-    // The records endpoint hands each chunk back as a JSON string in
-    // `record.data` — the agent JSON.parses it client-side so the
-    // server's hot path doesn't pay the parse cost. Verify a normal
-    // turn round-trips through JSON encode→decode.
-    const stringChunks = textTurn("a-1", "from-string").map((c) => JSON.stringify(c));
-    stubReadRecordsWithChunks(stringChunks);
-
-    const result = await replaySessionOutTail("string-chunks");
-    expect(result).toHaveLength(1);
-    const text = (result[0]!.parts as Array<{ type: string; text?: string }>)
-      .filter((p) => p.type === "text")
-      .map((p) => p.text)
-      .join("");
-    expect(text).toBe("from-string");
-  });
-
-  it("skips records whose data is unparseable JSON", async () => {
-    // The replay helper wraps the per-record JSON.parse in try/catch so
-    // a single malformed record can't sink the rest of the replay. The
-    // server should never serve a malformed `data`, but the defensive
-    // catch lets a poisoned record skip cleanly.
+  it("skips records whose data is a string (the wire delivers objects)", async () => {
+    // The writer puts chunk objects directly into the record envelope;
+    // the route forwards them as-is. A string body is malformed — the
+    // consumer drops it defensively rather than JSON.parsing.
     stubReadRecordsWithChunks([
-      "not-json-{[",
+      "not-an-object",
       ...textTurn("a-1", "survived"),
     ]);
 
@@ -260,14 +243,14 @@ describe("replaySessionOutTail", () => {
     expect(result[0]!.id).toBe("a-1");
   });
 
-  it("skips records whose decoded data is not an object", async () => {
-    // After JSON.parse, the helper requires `chunk` to be a non-null
-    // object with a string `type` field. Records that decode to
-    // primitives (number, string, etc.) are dropped silently.
+  it("skips records whose data is not an object", async () => {
+    // The consumer requires `chunk` to be a non-null object with a
+    // string `type` field. Records that arrive as primitives
+    // (number, null, string) are dropped silently.
     stubReadRecordsWithChunks([
-      JSON.stringify(42),
-      JSON.stringify(null),
-      JSON.stringify("just-a-string"),
+      42,
+      null,
+      "just-a-string",
       ...textTurn("a-1", "survived"),
     ]);
 
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 17f73d9a252..31e0e2e458d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1159,7 +1159,7 @@ importers:
         version: 18.3.1
       react-email:
         specifier: ^2.1.1
-        version: 2.1.2(@opentelemetry/api@1.9.0)(@swc/helpers@0.5.15)(bufferutil@4.0.9)(eslint@8.31.0)
+        version: 2.1.2(@opentelemetry/api@1.9.0)(@swc/helpers@0.5.15)(eslint@8.31.0)
       resend:
         specifier: ^3.2.0
         version: 3.2.0
@@ -1472,7 +1472,7 @@ importers:
         specifier: ^6.10.0
         version: 6.19.0(magicast@0.3.5)
       '@trigger.dev/core':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../core
       mlly:
         specifier: ^1.7.1
@@ -1548,13 +1548,13 @@ importers:
         specifier: ^0.22.5
         version: 0.22.5(supports-color@10.0.0)
       '@trigger.dev/build':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../build
       '@trigger.dev/core':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../core
       '@trigger.dev/schema-to-json':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../schema-to-json
       ansi-escapes:
         specifier: ^7.0.0
@@ -1933,7 +1933,7 @@ importers:
         version: 6.0.1
       tsup:
         specifier: ^8.4.0
-        version: 8.4.0(@swc/core@1.3.101(@swc/helpers@0.5.15))(jiti@2.4.2)(postcss@8.5.10)(tsx@4.20.6)(typescript@5.5.4)(yaml@2.8.3)
+        version: 8.4.0(@swc/core@1.3.101(@swc/helpers@0.5.15))(jiti@2.6.1)(postcss@8.5.10)(tsx@4.20.6)(typescript@5.5.4)(yaml@2.8.3)
       typescript:
         specifier: 5.5.4
         version: 5.5.4
@@ -1941,7 +1941,7 @@ importers:
   packages/python:
     dependencies:
       '@trigger.dev/core':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../core
       tinyexec:
         specifier: ^0.3.2
@@ -1951,10 +1951,10 @@ importers:
         specifier: ^0.15.4
         version: 0.15.4
       '@trigger.dev/build':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../build
       '@trigger.dev/sdk':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../trigger-sdk
       '@types/node':
         specifier: 20.14.14
@@ -1978,7 +1978,7 @@ importers:
   packages/react-hooks:
     dependencies:
       '@trigger.dev/core':
-        specifier: workspace:^4.4.6
+        specifier: workspace:^4.5.0-rc.1
         version: link:../core
       react:
         specifier: ^18.0 || ^19.0 || ^19.0.0-rc
@@ -2012,7 +2012,7 @@ importers:
   packages/redis-worker:
     dependencies:
       '@trigger.dev/core':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../core
       cron-parser:
         specifier: ^4.9.0
@@ -2061,7 +2061,7 @@ importers:
   packages/rsc:
     dependencies:
       '@trigger.dev/core':
-        specifier: workspace:^4.4.6
+        specifier: workspace:^4.5.0-rc.1
         version: link:../core
       mlly:
         specifier: ^1.7.1
@@ -2077,7 +2077,7 @@ importers:
         specifier: ^0.15.4
         version: 0.15.4
       '@trigger.dev/build':
-        specifier: workspace:^4.4.6
+        specifier: workspace:^4.5.0-rc.1
         version: link:../build
       '@types/node':
         specifier: 20.14.14
@@ -2153,7 +2153,7 @@ importers:
         specifier: 1.36.0
         version: 1.36.0
       '@trigger.dev/core':
-        specifier: workspace:4.4.6
+        specifier: workspace:4.5.0-rc.1
         version: link:../core
       chalk:
         specifier: ^5.2.0
@@ -2223,6 +2223,19 @@ importers:
         specifier: 3.25.76
         version: 3.25.76
 
+  references/agent-skills:
+    dependencies:
+      '@trigger.dev/build':
+        specifier: workspace:*
+        version: link:../../packages/build
+      '@trigger.dev/sdk':
+        specifier: workspace:*
+        version: link:../../packages/trigger-sdk
+    devDependencies:
+      trigger.dev:
+        specifier: workspace:*
+        version: link:../../packages/cli-v3
+
   references/ai-chat:
     dependencies:
       '@ai-sdk/anthropic':
@@ -38398,11 +38411,11 @@ snapshots:
       tsx: 4.17.0
       yaml: 2.8.3
 
-  postcss-load-config@6.0.1(jiti@2.4.2)(postcss@8.5.10)(tsx@4.20.6)(yaml@2.8.3):
+  postcss-load-config@6.0.1(jiti@2.6.1)(postcss@8.5.10)(tsx@4.20.6)(yaml@2.8.3):
     dependencies:
       lilconfig: 3.1.3
     optionalDependencies:
-      jiti: 2.4.2
+      jiti: 2.6.1
       postcss: 8.5.10
       tsx: 4.20.6
       yaml: 2.8.3
@@ -38975,7 +38988,7 @@ snapshots:
       react: 18.2.0
       react-dom: 18.2.0(react@18.2.0)
 
-  react-email@2.1.2(@opentelemetry/api@1.9.0)(@swc/helpers@0.5.15)(bufferutil@4.0.9)(eslint@8.31.0):
+  react-email@2.1.2(@opentelemetry/api@1.9.0)(@swc/helpers@0.5.15)(eslint@8.31.0):
     dependencies:
       '@babel/parser': 7.24.1
       '@radix-ui/colors': 1.0.1
@@ -39012,8 +39025,8 @@ snapshots:
       react: 18.3.1
       react-dom: 18.2.0(react@18.3.1)
       shelljs: 0.8.5
-      socket.io: 4.7.3(bufferutil@4.0.9)
-      socket.io-client: 4.7.3(bufferutil@4.0.9)
+      socket.io: 4.7.3
+      socket.io-client: 4.7.3
       sonner: 1.3.1(react-dom@18.2.0(react@18.3.1))(react@18.3.1)
       source-map-js: 1.0.2
       stacktrace-parser: 0.1.10
@@ -40269,7 +40282,7 @@ snapshots:
       - supports-color
       - utf-8-validate
 
-  socket.io-client@4.7.3(bufferutil@4.0.9):
+  socket.io-client@4.7.3:
     dependencies:
       '@socket.io/component-emitter': 3.1.0
       debug: 4.3.7(supports-color@10.0.0)
@@ -40298,7 +40311,7 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  socket.io@4.7.3(bufferutil@4.0.9):
+  socket.io@4.7.3:
     dependencies:
       accepts: 1.3.8
       base64id: 2.0.0
@@ -41342,7 +41355,7 @@ snapshots:
       - tsx
       - yaml
 
-  tsup@8.4.0(@swc/core@1.3.101(@swc/helpers@0.5.15))(jiti@2.4.2)(postcss@8.5.10)(tsx@4.20.6)(typescript@5.5.4)(yaml@2.8.3):
+  tsup@8.4.0(@swc/core@1.3.101(@swc/helpers@0.5.15))(jiti@2.6.1)(postcss@8.5.10)(tsx@4.20.6)(typescript@5.5.4)(yaml@2.8.3):
     dependencies:
       bundle-require: 5.1.0(esbuild@0.25.1)
       cac: 6.7.14
@@ -41352,7 +41365,7 @@ snapshots:
       esbuild: 0.25.1
       joycon: 3.1.1
       picocolors: 1.1.1
-      postcss-load-config: 6.0.1(jiti@2.4.2)(postcss@8.5.10)(tsx@4.20.6)(yaml@2.8.3)
+      postcss-load-config: 6.0.1(jiti@2.6.1)(postcss@8.5.10)(tsx@4.20.6)(yaml@2.8.3)
       resolve-from: 5.0.0
       rollup: 4.60.1
       source-map: 0.8.0-beta.0
diff --git a/references/agent-skills/package.json b/references/agent-skills/package.json
new file mode 100644
index 00000000000..b8016c09279
--- /dev/null
+++ b/references/agent-skills/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "references-agent-skills",
+  "private": true,
+  "type": "module",
+  "devDependencies": {
+    "trigger.dev": "workspace:*"
+  },
+  "dependencies": {
+    "@trigger.dev/build": "workspace:*",
+    "@trigger.dev/sdk": "workspace:*"
+  },
+  "scripts": {
+    "dev": "trigger dev",
+    "deploy": "trigger deploy"
+  }
+}
diff --git a/references/agent-skills/src/trigger/skills/greeter/SKILL.md b/references/agent-skills/src/trigger/skills/greeter/SKILL.md
new file mode 100644
index 00000000000..a824f138003
--- /dev/null
+++ b/references/agent-skills/src/trigger/skills/greeter/SKILL.md
@@ -0,0 +1,18 @@
+---
+name: greeter
+description: Say hello in different styles. Use when the user asks for a greeting or a friendly message.
+---
+
+# Greeter
+
+A tiny skill used to validate that the CLI bundles `SKILL.md` plus a `scripts/` subfolder into the deploy image and that `skill.local()` can read both at runtime.
+
+## When to use
+
+- Anyone asks for "hello" — invoke `scripts/hello.sh [NAME]` and return its stdout.
+
+## Scripts
+
+### `scripts/hello.sh [NAME]`
+
+Prints `Hello, {NAME}!` (default `world`). Used to confirm `scripts/` is copied alongside `SKILL.md`.
diff --git a/references/agent-skills/src/trigger/skills/greeter/scripts/hello.sh b/references/agent-skills/src/trigger/skills/greeter/scripts/hello.sh
new file mode 100755
index 00000000000..b94fa92f76a
--- /dev/null
+++ b/references/agent-skills/src/trigger/skills/greeter/scripts/hello.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+set -euo pipefail
+NAME="${1:-world}"
+echo "Hello, ${NAME}!"
diff --git a/references/agent-skills/src/trigger/test-skill.ts b/references/agent-skills/src/trigger/test-skill.ts
new file mode 100644
index 00000000000..6a6de46665d
--- /dev/null
+++ b/references/agent-skills/src/trigger/test-skill.ts
@@ -0,0 +1,42 @@
+import { logger, skills, task } from "@trigger.dev/sdk";
+import { exec } from "node:child_process";
+import { promisify } from "node:util";
+import { join } from "node:path";
+import { access, constants } from "node:fs/promises";
+
+const greeterSkill = skills.define({
+  id: "greeter",
+  path: "./skills/greeter",
+});
+
+const execAsync = promisify(exec);
+
+export const testSkillTask = task({
+  id: "test-skill",
+  run: async (payload: { name?: string } = {}) => {
+    const resolved = await greeterSkill.local();
+
+    logger.info("Resolved skill", {
+      id: resolved.id,
+      version: resolved.version,
+      path: resolved.path,
+      frontmatterName: resolved.frontmatter.name,
+      frontmatterDescription: resolved.frontmatter.description,
+      bodyChars: resolved.body.length,
+    });
+
+    const scriptPath = join(resolved.path, "scripts", "hello.sh");
+    await access(scriptPath, constants.X_OK);
+
+    const { stdout } = await execAsync(`bash ${scriptPath} ${payload.name ?? "world"}`);
+    const output = stdout.trim();
+    logger.info("Script output", { output });
+
+    return {
+      skillId: resolved.id,
+      skillPath: resolved.path,
+      frontmatterName: resolved.frontmatter.name,
+      scriptOutput: output,
+    };
+  },
+});
diff --git a/references/agent-skills/trigger.config.ts b/references/agent-skills/trigger.config.ts
new file mode 100644
index 00000000000..5a5c9779107
--- /dev/null
+++ b/references/agent-skills/trigger.config.ts
@@ -0,0 +1,9 @@
+import { defineConfig } from "@trigger.dev/sdk";
+
+export default defineConfig({
+  project: "proj_zweffkxiuovfzsdtjvbe",
+  runtime: "node",
+  logLevel: "info",
+  maxDuration: 60,
+  dirs: ["./src/trigger"],
+});
diff --git a/references/agent-skills/tsconfig.json b/references/agent-skills/tsconfig.json
new file mode 100644
index 00000000000..2ca0ae6e2d7
--- /dev/null
+++ b/references/agent-skills/tsconfig.json
@@ -0,0 +1,13 @@
+{
+  "compilerOptions": {
+    "target": "ES2023",
+    "module": "Node16",
+    "moduleResolution": "Node16",
+    "esModuleInterop": true,
+    "strict": true,
+    "skipLibCheck": true,
+    "customConditions": ["@triggerdotdev/source"],
+    "noEmit": true
+  },
+  "include": ["./src/**/*.ts", "trigger.config.ts"]
+}
diff --git a/references/ai-chat/ARCHITECTURE.md b/references/ai-chat/ARCHITECTURE.md
new file mode 100644
index 00000000000..8adbc0c4a1a
--- /dev/null
+++ b/references/ai-chat/ARCHITECTURE.md
@@ -0,0 +1,311 @@
+# AI Chat Architecture
+
+## System Overview
+
+```mermaid
+graph TB
+    subgraph Frontend["Frontend (Browser)"]
+        UC[useChat Hook]
+        TCT[TriggerChatTransport]
+        UI[Chat UI Components]
+    end
+
+    subgraph Platform["Trigger.dev Platform"]
+        API[REST API]
+        RS[Realtime Streams]
+        RE[Run Engine]
+    end
+
+    subgraph Worker["Task Worker"]
+        CT[chat.task Turn Loop]
+        ST[streamText / AI SDK]
+        LLM[LLM Provider]
+        SUB[Subtasks via ai.tool]
+    end
+
+    UI -->|user types| UC
+    UC -->|sendMessages| TCT
+    TCT -->|triggerTask / sendInputStream| API
+    API -->|queue run / deliver input| RE
+    RE -->|execute| CT
+    CT -->|call| ST
+    ST -->|API call| LLM
+    LLM -->|stream chunks| ST
+    ST -->|UIMessageChunks| RS
+    RS -->|SSE| TCT
+    TCT -->|ReadableStream| UC
+    UC -->|update| UI
+    CT -->|triggerAndWait| SUB
+    SUB -->|chat.stream target:root| RS
+```
+
+## Detailed Flow: New Chat (First Message)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    User->>useChat: sendMessage("Hello")
+    useChat->>useChat: No session for chatId → trigger new run
+
+    useChat->>API: triggerTask(payload, tags: [chat:id])
+    API-->>useChat: { runId, publicAccessToken }
+    useChat->>useChat: Store session, subscribe to SSE
+
+    API->>Task: Start run with ChatTaskWirePayload
+
+    Note over Task: Preload phase skipped (trigger ≠ "preload")
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 0
+        Task->>Task: convertToModelMessages(uiMessages)
+        Task->>Task: Mint access token
+        Task->>Task: onChatStart({ chatId, messages, clientData })
+        Task->>Task: onTurnStart({ chatId, messages, uiMessages })
+        Task->>LLM: streamText({ model, messages, abortSignal })
+        LLM-->>Task: Stream response chunks
+        Task->>API: streams.pipe("chat", uiStream)
+        API-->>useChat: SSE: UIMessageChunks
+        useChat-->>User: Render streaming text
+        Task->>Task: onFinish → capturedResponseMessage
+        Task->>Task: Accumulate response in messages
+        Task->>API: Write __trigger_turn_complete chunk
+        API-->>useChat: SSE: { type: __trigger_turn_complete, publicAccessToken }
+        useChat->>useChat: Close stream, update session
+        Task->>Task: onTurnComplete({ messages, uiMessages, stopped })
+    end
+
+    rect rgb(255, 248, 240)
+        Note over Task: Wait for next message
+        Task->>Task: messagesInput.once() [warm, 30s]
+        Note over Task: No message → suspend
+        Task->>Task: messagesInput.wait() [suspended, 1h]
+    end
+```
+
+## Detailed Flow: Multi-Turn (Subsequent Messages)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Suspended, waiting for message
+
+    User->>useChat: sendMessage("Tell me more")
+    useChat->>useChat: Session exists → send via input stream
+    useChat->>API: sendInputStream(runId, "chat-messages", payload)
+    Note right of useChat: Only sends new message<br/>(not full history)
+
+    API->>Task: Deliver to messagesInput
+    Task->>Task: Wake from suspend
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 1
+        Task->>Task: Append new message to accumulators
+        Task->>Task: Mint fresh access token
+        Task->>Task: onTurnStart({ turn: 1, messages })
+        Task->>LLM: streamText({ messages: [all accumulated] })
+        LLM-->>Task: Stream response
+        Task->>API: streams.pipe("chat", uiStream)
+        API-->>useChat: SSE: UIMessageChunks
+        useChat-->>User: Render streaming text
+        Task->>API: Write __trigger_turn_complete
+        Task->>Task: onTurnComplete({ turn: 1 })
+    end
+
+    Task->>Task: Wait for next message (warm → suspend)
+```
+
+## Stop Signal Flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Streaming response...
+
+    User->>useChat: Click "Stop"
+    useChat->>API: sendInputStream(runId, "chat-stop", { stop: true })
+    useChat->>useChat: Set skipToTurnComplete = true
+
+    API->>Task: Deliver to stopInput
+    Task->>Task: stopController.abort()
+    Task->>LLM: AbortSignal fires
+    LLM-->>Task: Stream ends (AbortError)
+    Task->>Task: Catch AbortError, fall through
+    Task->>Task: await onFinishPromise (race condition fix)
+    Task->>Task: cleanupAbortedParts(responseMessage)
+    Note right of Task: Remove partial tool calls<br/>Mark streaming parts as done
+
+    Task->>API: Write __trigger_turn_complete
+    API-->>useChat: SSE: __trigger_turn_complete
+    useChat->>useChat: skipToTurnComplete = false, close stream
+
+    Task->>Task: onTurnComplete({ stopped: true, responseMessage: cleaned })
+    Task->>Task: Wait for next message
+```
+
+## Preload Flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+
+    User->>useChat: Click "New Chat"
+    useChat->>API: transport.preload(chatId)
+    Note right of useChat: payload: { messages: [], trigger: "preload" }<br/>tags: [chat:id, preload:true]
+    API-->>useChat: { runId, publicAccessToken }
+    useChat->>useChat: Store session
+
+    API->>Task: Start run (trigger = "preload")
+
+    rect rgb(240, 255, 240)
+        Note over Task: Preload Phase
+        Task->>Task: Mint access token
+        Task->>Task: onPreload({ chatId, clientData })
+        Note right of Task: DB setup, load user context,<br/>load dynamic tools
+        Task->>Task: messagesInput.once() [warm]
+        Note over Task: Waiting for first message...
+    end
+
+    Note over User: User is typing...
+
+    User->>useChat: sendMessage("Hello")
+    useChat->>useChat: Session exists → send via input stream
+    useChat->>API: sendInputStream(runId, "chat-messages", payload)
+    API->>Task: Deliver message
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 0 (preloaded = true)
+        Task->>Task: onChatStart({ preloaded: true })
+        Task->>Task: onTurnStart({ preloaded: true })
+        Task->>Task: run() with preloaded dynamic tools ready
+    end
+```
+
+## Subtask Streaming (Tool as Task)
+
+```mermaid
+sequenceDiagram
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Chat as chat.task
+    participant LLM as LLM Provider
+    participant Sub as Subtask (ai.tool)
+
+    Chat->>LLM: streamText({ tools: { research: ai.tool(task) } })
+    LLM-->>Chat: Tool call: research({ query, urls })
+
+    Chat->>API: triggerAndWait(subtask, input)
+    Note right of Chat: Passes toolCallId, chatId,<br/>clientData via metadata
+
+    API->>Sub: Start subtask
+
+    Sub->>Sub: ai.chatContextOrThrow() → { chatId, clientData }
+    Sub->>API: chat.stream.writer({ target: "root" })
+    Note right of Sub: Write data-research-progress<br/>chunks to parent's stream
+    API-->>useChat: SSE: data-* chunks
+    useChat-->>useChat: Render progress UI
+
+    Sub-->>Chat: Return result
+    Chat->>LLM: Tool result
+    LLM-->>Chat: Continue response
+```
+
+## Continuation Flow (Run Timeout / Cancel)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+
+    Note over Task: Previous run timed out / was cancelled
+
+    User->>useChat: sendMessage("Continue")
+    useChat->>API: sendInputStream(runId, payload)
+    API-->>useChat: Error (run dead)
+
+    useChat->>useChat: Delete session, set isContinuation = true
+    useChat->>API: triggerTask(payload, continuation: true, previousRunId)
+    API-->>useChat: New { runId, publicAccessToken }
+
+    API->>Task: Start new run
+
+    rect rgb(255, 245, 238)
+        Note over Task: Turn 0 (continuation = true)
+        Task->>Task: cleanupAbortedParts(incoming messages)
+        Note right of Task: Strip incomplete tool calls<br/>from previous run's response
+        Task->>Task: onChatStart({ continuation: true, previousRunId })
+        Task->>Task: Normal turn flow...
+    end
+```
+
+## Hook Lifecycle
+
+```mermaid
+graph TD
+    START([Run Starts]) --> IS_PRELOAD{trigger = preload?}
+
+    IS_PRELOAD -->|Yes| PRELOAD[onPreload]
+    PRELOAD --> WAIT_MSG[Wait for first message<br/>warm → suspend]
+    WAIT_MSG --> TURN0
+
+    IS_PRELOAD -->|No| TURN0
+
+    TURN0[Turn 0] --> CHAT_START[onChatStart<br/>continuation, preloaded]
+    CHAT_START --> TURN_START_0[onTurnStart]
+    TURN_START_0 --> RUN_0[run → streamText]
+    RUN_0 --> TURN_COMPLETE_0[onTurnComplete<br/>stopped, responseMessage]
+
+    TURN_COMPLETE_0 --> WAIT{Wait for<br/>next message}
+    WAIT -->|Message arrives| TURN_N[Turn N]
+    WAIT -->|Timeout| END_RUN([Run Ends])
+
+    TURN_N --> TURN_START_N[onTurnStart]
+    TURN_START_N --> RUN_N[run → streamText]
+    RUN_N --> TURN_COMPLETE_N[onTurnComplete]
+    TURN_COMPLETE_N --> WAIT
+```
+
+## Stream Architecture
+
+```mermaid
+graph LR
+    subgraph Output["Output Stream (chat)"]
+        direction TB
+        O1[UIMessageChunks<br/>text, reasoning, tools]
+        O2[data-* custom chunks]
+        O3[__trigger_turn_complete<br/>control chunk]
+    end
+
+    subgraph Input["Input Streams"]
+        direction TB
+        I1[chat-messages<br/>User messages]
+        I2[chat-stop<br/>Stop signal]
+    end
+
+    Frontend -->|sendInputStream| I1
+    Frontend -->|sendInputStream| I2
+    I1 -->|messagesInput.once/wait| Worker
+    I2 -->|stopInput.on| Worker
+    Worker -->|streams.pipe / chat.stream| Output
+    Subtask -->|chat.stream target:root| Output
+    Output -->|SSE /realtime/v1/streams| Frontend
+```
diff --git a/references/ai-chat/cf-worker/.gitignore b/references/ai-chat/cf-worker/.gitignore
new file mode 100644
index 00000000000..8619bbe6b27
--- /dev/null
+++ b/references/ai-chat/cf-worker/.gitignore
@@ -0,0 +1,3 @@
+.wrangler/
+node_modules/
+*.log
diff --git a/references/ai-chat/cf-worker/README.md b/references/ai-chat/cf-worker/README.md
new file mode 100644
index 00000000000..8c9a733d73a
--- /dev/null
+++ b/references/ai-chat/cf-worker/README.md
@@ -0,0 +1,33 @@
+# cf-trust-test worker
+
+A minimal Cloudflare Worker that demonstrates the trusted-edge-signals pattern from [`docs/ai-chat/patterns/trusted-edge-signals`](../../../docs/ai-chat/patterns/trusted-edge-signals.mdx). The worker sits in front of the Trigger.dev API, intercepts the two body-write paths (`POST /api/v1/sessions` and `POST /realtime/v1/sessions/{id}/in/append`), and injects a server-trusted `__cf` namespace into the wire payload's `metadata` field. Everything else (SSE, auth, dashboard) passes through untouched.
+
+Pairs with the `cfTrustTestAgent` (task id `cf-trust-test`) defined in `src/trigger/chat.ts`, which declares the `__cf` namespace in its `clientDataSchema` and echoes the values back so the round-trip is visible in the streamed response.
+
+## Run it
+
+```bash
+# In references/ai-chat/cf-worker
+pnpm install
+pnpm run dev    # serves on http://localhost:8787, proxies to TRIGGER_API_UPSTREAM
+```
+
+Point the Next.js reference app at the worker by setting `TRIGGER_API_URL` and `NEXT_PUBLIC_TRIGGER_API_URL` to `http://localhost:8787` in `references/ai-chat/.env`. Then start trigger-dev and Next.js as usual.
+
+`wrangler dev` populates `request.cf` with the developer's real Cloudflare edge metadata even in local mode; the worker falls back to hardcoded sample values if `request.cf` is unset.
+
+## Wire-up for `.out` SSE direct (optional)
+
+By default the reference app routes every request through `NEXT_PUBLIC_TRIGGER_API_URL`, so SSE also flows through the worker. To skip the worker on the long-lived `.out` channel — which gives no body-mutation benefit and adds one extra edge hop per reconnect — switch the transport's `baseURL` to the function form:
+
+```ts
+const transport = useTriggerChatTransport({
+  // ...
+  baseURL: ({ endpoint }) =>
+    endpoint === "out"
+      ? "https://api.trigger.dev"
+      : process.env.NEXT_PUBLIC_TRIGGER_API_URL!,
+});
+```
+
+See [`docs/ai-chat/patterns/trusted-edge-signals`](../../../docs/ai-chat/patterns/trusted-edge-signals.mdx) for the full design — threat model, agent-side schema, deploy considerations.
diff --git a/references/ai-chat/cf-worker/package.json b/references/ai-chat/cf-worker/package.json
new file mode 100644
index 00000000000..3e1f8debe99
--- /dev/null
+++ b/references/ai-chat/cf-worker/package.json
@@ -0,0 +1,14 @@
+{
+  "name": "cf-trust-test-worker",
+  "version": "0.0.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "wrangler dev",
+    "deploy": "wrangler deploy"
+  },
+  "devDependencies": {
+    "@cloudflare/workers-types": "4.20240909.0",
+    "wrangler": "3.78.0"
+  }
+}
diff --git a/references/ai-chat/cf-worker/src/index.ts b/references/ai-chat/cf-worker/src/index.ts
new file mode 100644
index 00000000000..1e449153d1e
--- /dev/null
+++ b/references/ai-chat/cf-worker/src/index.ts
@@ -0,0 +1,124 @@
+/**
+ * cf-trust-test proxy. Validates that a trusted edge proxy can inject a
+ * namespaced metadata field (`__cf`) into trigger.dev's chat session-create
+ * and follow-up message wire payloads — and that the trigger.dev server passes
+ * it through to the agent untouched.
+ *
+ * Local dev: `wrangler dev` exposes the worker on http://localhost:8787 and
+ * forwards to TRIGGER_API_UPSTREAM. With `wrangler dev --remote` the worker
+ * runs on the CF edge and `request.cf` is populated with real signals; the
+ * --local default leaves request.cf undefined, so we fall back to hardcoded
+ * trust values that prove the plumbing without depending on a real CF edge.
+ */
+
+export interface Env {
+  TRIGGER_API_UPSTREAM: string;
+}
+
+type CfTrustData = {
+  botScore: number;
+  ja4: string;
+  asn: number;
+  country: string;
+};
+
+function readCfTrustData(request: Request): CfTrustData {
+  const cf = (request as Request & { cf?: Record<string, unknown> }).cf;
+  const bm = (cf?.botManagement as Record<string, unknown> | undefined) ?? undefined;
+  return {
+    botScore: (bm?.score as number | undefined) ?? 95,
+    ja4: (bm?.ja4 as string | undefined) ?? "t13d1715h2_5b57614c22b0_5c2c4ed3e2d9",
+    asn: (cf?.asn as number | undefined) ?? 13335,
+    country: (cf?.country as string | undefined) ?? "US",
+  };
+}
+
+function withCors(response: Response, request: Request): Response {
+  const headers = new Headers(response.headers);
+  const origin = request.headers.get("origin") ?? "*";
+  const reqHeaders = request.headers.get("access-control-request-headers");
+  headers.set("Access-Control-Allow-Origin", origin);
+  headers.set("Vary", "Origin");
+  headers.set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PUT, PATCH, DELETE");
+  if (reqHeaders) headers.set("Access-Control-Allow-Headers", reqHeaders);
+  headers.set("Access-Control-Expose-Headers", "*");
+  headers.set("Access-Control-Allow-Credentials", "true");
+  return new Response(response.body, { status: response.status, statusText: response.statusText, headers });
+}
+
+function handlePreflight(request: Request): Response {
+  return withCors(new Response(null, { status: 204 }), request);
+}
+
+function setCfNamespace(
+  metadata: Record<string, unknown> | undefined,
+  cf: CfTrustData
+): Record<string, unknown> {
+  const stripped: Record<string, unknown> = { ...(metadata ?? {}) };
+  delete stripped.__cf;
+  return { ...stripped, __cf: cf };
+}
+
+async function rewriteSessionCreateBody(body: string, cf: CfTrustData): Promise<string> {
+  const parsed = JSON.parse(body) as Record<string, unknown>;
+  const triggerConfig = (parsed.triggerConfig as Record<string, unknown> | undefined) ?? {};
+  const basePayload = (triggerConfig.basePayload as Record<string, unknown> | undefined) ?? {};
+  const metadata = basePayload.metadata as Record<string, unknown> | undefined;
+  parsed.triggerConfig = {
+    ...triggerConfig,
+    basePayload: { ...basePayload, metadata: setCfNamespace(metadata, cf) },
+  };
+  return JSON.stringify(parsed);
+}
+
+async function rewriteAppendBody(body: string, cf: CfTrustData): Promise<string> {
+  let parsed: Record<string, unknown>;
+  try {
+    parsed = JSON.parse(body) as Record<string, unknown>;
+  } catch {
+    return body;
+  }
+  if (parsed.kind !== "message") return body;
+  const payload = (parsed.payload as Record<string, unknown> | undefined) ?? {};
+  const metadata = payload.metadata as Record<string, unknown> | undefined;
+  parsed.payload = { ...payload, metadata: setCfNamespace(metadata, cf) };
+  return JSON.stringify(parsed);
+}
+
+export default {
+  async fetch(request: Request, env: Env): Promise<Response> {
+    if (request.method === "OPTIONS") return handlePreflight(request);
+
+    const upstream = new URL(env.TRIGGER_API_UPSTREAM);
+    const incoming = new URL(request.url);
+    const target = new URL(incoming.pathname + incoming.search, upstream);
+
+    const cf = readCfTrustData(request);
+    const isAppend =
+      request.method === "POST" &&
+      /^\/realtime\/v1\/sessions\/[^/]+\/in\/append$/.test(incoming.pathname);
+    const isSessionsCreate =
+      request.method === "POST" && incoming.pathname === "/api/v1/sessions";
+
+    let body: BodyInit | null = null;
+    if (request.method !== "GET" && request.method !== "HEAD") {
+      const raw = await request.text();
+      if (isSessionsCreate && raw) body = await rewriteSessionCreateBody(raw, cf);
+      else if (isAppend && raw) body = await rewriteAppendBody(raw, cf);
+      else body = raw;
+    }
+
+    const headers = new Headers(request.headers);
+    headers.delete("host");
+    headers.delete("content-length");
+
+    const upstreamResponse = await fetch(target.toString(), {
+      method: request.method,
+      headers,
+      body,
+      redirect: "manual",
+    });
+
+    return withCors(upstreamResponse, request);
+  },
+};
diff --git a/references/ai-chat/cf-worker/tsconfig.json b/references/ai-chat/cf-worker/tsconfig.json
new file mode 100644
index 00000000000..7d45444baef
--- /dev/null
+++ b/references/ai-chat/cf-worker/tsconfig.json
@@ -0,0 +1,14 @@
+{
+  "compilerOptions": {
+    "target": "es2022",
+    "module": "es2022",
+    "moduleResolution": "bundler",
+    "lib": ["es2022"],
+    "types": ["@cloudflare/workers-types"],
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true
+  },
+  "include": ["src/**/*.ts"]
+}
diff --git a/references/ai-chat/cf-worker/wrangler.toml b/references/ai-chat/cf-worker/wrangler.toml
new file mode 100644
index 00000000000..e62a10cc1bf
--- /dev/null
+++ b/references/ai-chat/cf-worker/wrangler.toml
@@ -0,0 +1,10 @@
+name = "cf-trust-test-worker"
+main = "src/index.ts"
+compatibility_date = "2024-09-23"
+compatibility_flags = ["nodejs_compat"]
+
+[vars]
+TRIGGER_API_UPSTREAM = "https://api.trigger.dev"
+
+[dev]
+port = 8787
diff --git a/references/ai-chat/src/app/actions.ts b/references/ai-chat/src/app/actions.ts
index 0ef650cfc8c..4586aa3eeb3 100644
--- a/references/ai-chat/src/app/actions.ts
+++ b/references/ai-chat/src/app/actions.ts
@@ -8,6 +8,7 @@ import type {
   aiChatRaw,
   aiChatSession,
   upgradeTestAgent,
+  cfTrustTestAgent,
 } from "@/trigger/chat";
 import type { ChatUiMessage } from "@/lib/chat-tools-schemas";
 import { prisma } from "@/lib/prisma";
@@ -20,7 +21,8 @@ export type ChatReferenceTaskId =
   | "ai-chat-hydrated"
   | "ai-chat-raw"
   | "ai-chat-session"
-  | "upgrade-test";
+  | "upgrade-test"
+  | "cf-trust-test";
 
 function isChatReferenceTaskId(id: string): id is ChatReferenceTaskId {
   return (
@@ -28,7 +30,8 @@ function isChatReferenceTaskId(id: string): id is ChatReferenceTaskId {
     id === "ai-chat-hydrated" ||
     id === "ai-chat-raw" ||
     id === "ai-chat-session" ||
-    id === "upgrade-test"
+    id === "upgrade-test" ||
+    id === "cf-trust-test"
   );
 }
 
@@ -38,7 +41,8 @@ type TaskIdentifierForChat =
   | (typeof aiChatHydrated)["id"]
   | (typeof aiChatRaw)["id"]
   | (typeof aiChatSession)["id"]
-  | (typeof upgradeTestAgent)["id"];
+  | (typeof upgradeTestAgent)["id"]
+  | (typeof cfTrustTestAgent)["id"];
 
 /**
  * Server-mediated start: creates the Session row + triggers the first
@@ -70,6 +74,7 @@ const startActionByTaskId: Record<
   "ai-chat-raw": startChatSessionFor("ai-chat-raw"),
   "ai-chat-session": startChatSessionFor("ai-chat-session"),
   "upgrade-test": startChatSessionFor("upgrade-test"),
+  "cf-trust-test": startChatSessionFor("cf-trust-test"),
 };
 
 export async function startChatSession(input: {
diff --git a/references/ai-chat/src/components/chat-sidebar.tsx b/references/ai-chat/src/components/chat-sidebar.tsx
index e036eebc71a..9707b61ac36 100644
--- a/references/ai-chat/src/components/chat-sidebar.tsx
+++ b/references/ai-chat/src/components/chat-sidebar.tsx
@@ -118,6 +118,7 @@ export function ChatSidebar({
             <option value="ai-chat-session">ai-chat-session (session)</option>
             <option value="upgrade-test">upgrade-test (requestUpgrade after 3 turns)</option>
             <option value="stress-emit">stress-emit (UI stress test)</option>
+            <option value="cf-trust-test">cf-trust-test (Cloudflare proxy trust)</option>
           </select>
         </div>
         <label
diff --git a/references/ai-chat/src/trigger/chat-client-test.ts b/references/ai-chat/src/trigger/chat-client-test.ts
index 742246eca2a..a909034cfcf 100644
--- a/references/ai-chat/src/trigger/chat-client-test.ts
+++ b/references/ai-chat/src/trigger/chat-client-test.ts
@@ -272,7 +272,7 @@ export const orchestratorAgent = chat
         stop.reset();
 
         const messages = await conversation.addIncoming(
-          currentPayload.messages,
+          currentPayload.message ? [currentPayload.message] : [],
           currentPayload.trigger,
           turn
         );
diff --git a/references/ai-chat/src/trigger/chat.ts b/references/ai-chat/src/trigger/chat.ts
index b58b5c99e20..6033e9e3689 100644
--- a/references/ai-chat/src/trigger/chat.ts
+++ b/references/ai-chat/src/trigger/chat.ts
@@ -296,6 +296,34 @@ export const aiChat = chat
     },
     // #endregion
 
+    // #region onRecoveryBoot — emit a data-chat-recovery banner chunk
+    onRecoveryBoot: async ({
+      chatId,
+      previousRunId,
+      cause,
+      settledMessages,
+      inFlightUsers,
+      partialAssistant,
+      pendingToolCalls,
+      writer,
+    }) => {
+      logger.info("onRecoveryBoot fired", {
+        chatId,
+        previousRunId,
+        cause,
+        settledCount: settledMessages.length,
+        inFlightUserCount: inFlightUsers.length,
+        partialAssistantPresent: partialAssistant !== undefined,
+        pendingToolCallCount: pendingToolCalls.length,
+      });
+      writer.write({
+        type: "data-chat-recovery",
+        data: { cause, previousRunId, partialPresent: partialAssistant !== undefined },
+        transient: true,
+      });
+    },
+    // #endregion
+
     // #region onPreload — eagerly create chat/session DB rows before the first message
     onPreload: async ({ chatId, chatAccessToken, clientData }) => {
       if (!clientData) return;
@@ -631,7 +659,7 @@ export const aiChatRaw = chat.customAgent({
       stop.reset();
 
       const messages = await conversation.addIncoming(
-        currentPayload.messages,
+        currentPayload.message ? [currentPayload.message] : [],
         currentPayload.trigger,
         turn
       );
@@ -650,8 +678,7 @@ export const aiChatRaw = chat.customAgent({
       const combinedSignal = AbortSignal.any([runSignal, stop.signal]);
 
       const steeringSub = chat.messages.on(async (msg) => {
-        const lastMsg = msg.messages?.[msg.messages.length - 1];
-        if (lastMsg) await conversation.steerAsync(lastMsg);
+        if (msg.message) await conversation.steerAsync(msg.message);
       });
 
       const result = streamText({
@@ -998,3 +1025,41 @@ export const upgradeTestAgent = chat.agent({
     });
   },
 });
+
+// ============================================================================
+// cf-trust-test — validates that a trusted edge proxy (Cloudflare Worker) can
+// inject a namespaced metadata field that flows through `/api/v1/sessions` +
+// `/in/append` and lands typed in `clientData.__cf` on every turn.
+// ============================================================================
+
+export const cfTrustTestAgent = chat
+  .withClientData({
+    schema: z.object({
+      userId: z.string(),
+      __cf: z.object({
+        botScore: z.number(),
+        ja4: z.string(),
+        asn: z.number(),
+        country: z.string(),
+      }),
+    }),
+  })
+  .agent({
+    id: "cf-trust-test",
+    idleTimeoutInSeconds: 60,
+    onTurnStart: async ({ turn, clientData }) => {
+      logger.info("cf-trust-test turn", { turn, cf: clientData!.__cf, userId: clientData!.userId });
+    },
+    run: async ({ messages, clientData, signal }) => {
+      const cf = clientData!.__cf;
+      return streamText({
+        model: openai("gpt-4o-mini"),
+        system:
+          "You are a test agent verifying trusted Cloudflare signal propagation. " +
+          "Echo the trust signal you were given on this turn exactly in this format, then stop:\n" +
+          `CF botScore=${cf.botScore} ja4=${cf.ja4} asn=${cf.asn} country=${cf.country}`,
+        messages,
+        abortSignal: signal,
+      });
+    },
+  });
diff --git a/references/ai-chat/src/trigger/stress-emit.ts b/references/ai-chat/src/trigger/stress-emit.ts
index b9300c6ae25..7443eb21315 100644
--- a/references/ai-chat/src/trigger/stress-emit.ts
+++ b/references/ai-chat/src/trigger/stress-emit.ts
@@ -10,7 +10,7 @@
 // Defaults: 1000 chunks × 10 chars, single message.
 
 import { chat } from "@trigger.dev/sdk/ai";
-import { type UIMessage, simulateReadableStream, streamText } from "ai";
+import { type ModelMessage, simulateReadableStream, streamText } from "ai";
 import { MockLanguageModelV3 } from "ai/test";
 import type { LanguageModelV3StreamPart } from "@ai-sdk/provider";
 
@@ -20,10 +20,16 @@ type StressConfig = {
   manyMessages: boolean;
 };
 
-function parseConfig(messages: UIMessage[]): StressConfig {
+function parseConfig(messages: ModelMessage[]): StressConfig {
   const lastUser = [...messages].reverse().find((m) => m.role === "user");
-  const text =
-    lastUser?.parts?.[0]?.type === "text" ? lastUser.parts[0].text.trim() : "";
+  const content = lastUser?.content;
+  let text = "";
+  if (typeof content === "string") {
+    text = content.trim();
+  } else if (Array.isArray(content)) {
+    const textPart = content.find((p) => p.type === "text");
+    text = textPart && "text" in textPart ? textPart.text.trim() : "";
+  }
   const parts = text.split(/\s+/);
   const chunkCount = Number(parts[0]);
   const chunkSize = Number(parts[1]);
diff --git a/references/hello-world/src/trigger/dynamicImportReproChild.ts b/references/hello-world/src/trigger/dynamicImportReproChild.ts
new file mode 100644
index 00000000000..de65c71574b
--- /dev/null
+++ b/references/hello-world/src/trigger/dynamicImportReproChild.ts
@@ -0,0 +1,12 @@
+import { task } from "@trigger.dev/sdk";
+
+// Defined in a module that's loaded via `await import(...)` from the parent
+// task's run() function. Pre-fix: the task() call below fires while
+// `_currentFileContext` is unset, so the registration is silently dropped.
+// Post-fix: registered with sentinel file metadata + console.warn fires once.
+export const lazyChildTask = task({
+  id: "lazy-child-task",
+  run: async (payload: { value: string }) => {
+    return { received: payload.value };
+  },
+});
diff --git a/references/hello-world/src/trigger/dynamicImportReproParent.ts b/references/hello-world/src/trigger/dynamicImportReproParent.ts
new file mode 100644
index 00000000000..ecafff8c7f0
--- /dev/null
+++ b/references/hello-world/src/trigger/dynamicImportReproParent.ts
@@ -0,0 +1,17 @@
+import { logger, task } from "@trigger.dev/sdk";
+
+// Triggers the dynamic-import silent-drop path. The child task's `task()`
+// definition lives in a module loaded via `await import(...)` inside this
+// parent's run() — so its registration would land outside the worker's
+// cold-load context window.
+export const dynamicImportReproParent = task({
+  id: "dynamic-import-repro-parent",
+  run: async () => {
+    logger.info("parent: about to dynamically import child task module");
+    const { lazyChildTask } = await import("./dynamicImportReproChild.js");
+    logger.info("parent: import complete, triggering child");
+    const handle = await lazyChildTask.trigger({ value: "hello from parent" });
+    logger.info("parent: child triggered", { childRunId: handle.id });
+    return { childRunId: handle.id };
+  },
+});