diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 7260a8af2ebf..37d43c62ee68 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -684,7 +684,6 @@ export namespace MessageV2 { } if (msg.info.role === "assistant") { - const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}` const media: Array<{ mime: string; url: string }> = [] if ( @@ -706,7 +705,7 @@ export namespace MessageV2 { assistantMessage.parts.push({ type: "text", text: part.text, - ...(differentModel ? {} : { providerMetadata: part.metadata }), + providerMetadata: part.metadata, }) if (part.type === "step-start") assistantMessage.parts.push({ @@ -741,7 +740,7 @@ export namespace MessageV2 { toolCallId: part.callID, input: part.state.input, output, - ...(differentModel ? {} : { callProviderMetadata: part.metadata }), + callProviderMetadata: part.metadata, }) } if (part.state.status === "error") @@ -751,7 +750,7 @@ export namespace MessageV2 { toolCallId: part.callID, input: part.state.input, errorText: part.state.error, - ...(differentModel ? {} : { callProviderMetadata: part.metadata }), + callProviderMetadata: part.metadata, }) // Handle pending/running tool calls to prevent dangling tool_use blocks // Anthropic/Claude APIs require every tool_use to have a corresponding tool_result @@ -762,14 +761,14 @@ export namespace MessageV2 { toolCallId: part.callID, input: part.state.input, errorText: "[Tool execution was interrupted]", - ...(differentModel ? {} : { callProviderMetadata: part.metadata }), + callProviderMetadata: part.metadata, }) } if (part.type === "reasoning") { assistantMessage.parts.push({ type: "reasoning", text: part.text, - ...(differentModel ? {} : { providerMetadata: part.metadata }), + providerMetadata: part.metadata, }) } } diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts index f0e52565d81f..4a42c39892e1 100644 --- a/packages/opencode/src/session/overflow.ts +++ b/packages/opencode/src/session/overflow.ts @@ -3,8 +3,6 @@ import type { Provider } from "@/provider/provider" import { ProviderTransform } from "@/provider/transform" import type { MessageV2 } from "./message-v2" -const COMPACTION_BUFFER = 20_000 - export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { if (input.cfg.compaction?.auto === false) return false const context = input.model.limit.context @@ -13,10 +11,11 @@ export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistan const count = input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write - const reserved = - input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model)) - const usable = input.model.limit.input - ? input.model.limit.input - reserved - : context - ProviderTransform.maxOutputTokens(input.model) + // Reserve headroom so compaction triggers before the next turn overflows. + // maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the + // model's raw output limit, so this is never excessively aggressive. + // Users can override via config.compaction.reserved if needed (#12924). + const reserved = input.cfg.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model) + const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved return count >= usable } diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index f1d61babfaf5..29b7c0a04682 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -317,19 +317,19 @@ describe("session.compaction.isOverflow", () => { }) }) - // ─── Bug reproduction tests ─────────────────────────────────────────── - // These tests demonstrate that when limit.input is set, isOverflow() - // does not subtract any headroom for the next model response. This means - // compaction only triggers AFTER we've already consumed the full input - // budget, leaving zero room for the next API call's output tokens. + // ─── Headroom reservation tests ────────────────────────────────────── + // These tests verify that when limit.input is set, isOverflow() + // correctly reserves headroom (maxOutputTokens, capped at 32K) so + // compaction triggers before the next API call overflows. // - // Compare: without limit.input, usable = context - output (reserves space). - // With limit.input, usable = limit.input (reserves nothing). + // Previously (bug), the limit.input path only subtracted a 20K buffer + // while the non-input path subtracted the full maxOutputTokens — an + // asymmetry that let sessions grow ~12K tokens too large before compacting. // // Related issues: #10634, #8089, #11086, #12621 // Open PRs: #6875, #12924 - test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => { + test("no headroom when limit.input is set — compaction should trigger near boundary", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, @@ -355,7 +355,7 @@ describe("session.compaction.isOverflow", () => { }) }) - test("BUG: without limit.input, same token count correctly triggers compaction", async () => { + test("without limit.input, same token count correctly triggers compaction", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, @@ -375,7 +375,7 @@ describe("session.compaction.isOverflow", () => { }) }) - test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => { + test("asymmetry — limit.input model does not allow more usage than equivalent model without it", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, @@ -384,7 +384,7 @@ describe("session.compaction.isOverflow", () => { const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 }) const withoutInputLimit = createModel({ context: 200_000, output: 32_000 }) - // 170K total tokens — well above context-output (168K) but below input limit (200K) + // 181K total tokens — above usable (context - maxOutput = 168K) const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } } const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit }) diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts index 3634d6fb7ec8..2efb3e96f676 100644 --- a/packages/opencode/test/session/message-v2.test.ts +++ b/packages/opencode/test/session/message-v2.test.ts @@ -57,6 +57,17 @@ const model: Provider.Model = { release_date: "2026-01-01", } +const model2: Provider.Model = { + ...model, + id: "other-model", + providerID: "other", + api: { + ...model.api, + id: "other-model", + }, + name: "Other Model", +} + function userInfo(id: string): MessageV2.User { return { id, @@ -359,7 +370,90 @@ describe("session.message-v2.toModelMessage", () => { ]) }) - test("omits provider metadata when assistant model differs", async () => { + test("preserves reasoning providerMetadata when model matches", async () => { + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: assistantInfo(assistantID, "m-parent"), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "reasoning", + text: "thinking", + metadata: { openai: { signature: "sig-match" } }, + time: { start: 0 }, + }, + ] as MessageV2.Part[], + }, + ] + + expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([ + { + role: "assistant", + content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-match" } } }], + }, + ]) + }) + + test("preserves reasoning providerMetadata when model differs", async () => { + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: assistantInfo(assistantID, "m-parent", undefined, { + providerID: model2.providerID, + modelID: model2.api.id, + }), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "reasoning", + text: "thinking", + metadata: { openai: { signature: "sig-different" } }, + time: { start: 0 }, + }, + ] as MessageV2.Part[], + }, + ] + + expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([ + { + role: "assistant", + content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-different" } } }], + }, + ]) + }) + + test("preserves text providerMetadata when model differs", async () => { + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: assistantInfo(assistantID, "m-parent", undefined, { + providerID: model2.providerID, + modelID: model2.api.id, + }), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "text", + text: "done", + metadata: { openai: { assistant: "meta" } }, + }, + ] as MessageV2.Part[], + }, + ] + + expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([ + { + role: "assistant", + content: [{ type: "text", text: "done", providerOptions: { openai: { assistant: "meta" } } }], + }, + ]) + }) + + test("preserves tool callProviderMetadata when model differs", async () => { const userID = "m-user" const assistantID = "m-assistant" @@ -375,16 +469,97 @@ describe("session.message-v2.toModelMessage", () => { ] as MessageV2.Part[], }, { - info: assistantInfo(assistantID, userID, undefined, { providerID: "other", modelID: "other" }), + info: assistantInfo(assistantID, userID, undefined, { + providerID: model2.providerID, + modelID: model2.api.id, + }), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "tool", + callID: "call-1", + tool: "bash", + state: { + status: "completed", + input: { cmd: "ls" }, + output: "ok", + title: "Bash", + metadata: {}, + time: { start: 0, end: 1 }, + }, + metadata: { openai: { tool: "meta" } }, + }, + ] as MessageV2.Part[], + }, + ] + + expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([ + { + role: "user", + content: [{ type: "text", text: "run tool" }], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call-1", + toolName: "bash", + input: { cmd: "ls" }, + providerExecuted: undefined, + providerOptions: { openai: { tool: "meta" } }, + }, + ], + }, + { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call-1", + toolName: "bash", + output: { type: "text", value: "ok" }, + providerOptions: { openai: { tool: "meta" } }, + }, + ], + }, + ]) + }) + + test("handles undefined metadata gracefully", async () => { + const userID = "m-user" + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: userInfo(userID), + parts: [ + { + ...basePart(userID, "u1"), + type: "text", + text: "run tool", + }, + ] as MessageV2.Part[], + }, + { + info: assistantInfo(assistantID, userID, undefined, { + providerID: model2.providerID, + modelID: model2.api.id, + }), parts: [ { ...basePart(assistantID, "a1"), type: "text", text: "done", - metadata: { openai: { assistant: "meta" } }, }, { ...basePart(assistantID, "a2"), + type: "reasoning", + text: "thinking", + time: { start: 0 }, + }, + { + ...basePart(assistantID, "a3"), type: "tool", callID: "call-1", tool: "bash", @@ -396,7 +571,6 @@ describe("session.message-v2.toModelMessage", () => { metadata: {}, time: { start: 0, end: 1 }, }, - metadata: { openai: { tool: "meta" } }, }, ] as MessageV2.Part[], }, @@ -411,6 +585,7 @@ describe("session.message-v2.toModelMessage", () => { role: "assistant", content: [ { type: "text", text: "done" }, + { type: "reasoning", text: "thinking", providerOptions: undefined }, { type: "tool-call", toolCallId: "call-1",