From 3e0b40039c72c0e033c7cd0efd5f8827c4060d4f Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Mon, 9 Feb 2026 18:40:08 -0600 Subject: [PATCH 01/19] wip --- packages/opencode/src/session/compaction.ts | 5 +++++ packages/opencode/src/session/index.ts | 17 ++++++++++++----- packages/opencode/src/session/message-v2.ts | 1 + packages/sdk/js/src/v2/gen/types.gen.ts | 1 + 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 73a70af9d46a..e85f0e592aa2 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -32,6 +32,11 @@ export namespace SessionCompaction { if (config.compaction?.auto === false) return false const context = input.model.limit.context if (context === 0) return false + + const usageTokens = + input.tokens.total || + input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write + const count = input.tokens.input + input.tokens.cache.read + input.tokens.output const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX const usable = input.model.limit.input || context - output diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 556fad01f59a..84efc2fd6824 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -22,6 +22,7 @@ import { Snapshot } from "@/snapshot" import type { Provider } from "@/provider/provider" import { PermissionNext } from "@/permission/next" import { Global } from "@/global" +import type { LanguageModelV2Usage } from "@ai-sdk/provider" export namespace Session { const log = Log.create({ service: "session" }) @@ -439,10 +440,15 @@ export namespace Session { export const getUsage = fn( z.object({ model: z.custom(), - usage: z.custom(), + usage: z.custom(), metadata: z.custom().optional(), }), (input) => { + const inputTokens = input.usage.inputTokens ?? 0 + const outputTokens = input.usage.outputTokens ?? 0 + const reasoningTokens = input.usage.reasoningTokens ?? 0 + // input.usage. + const cacheReadInputTokens = input.usage.cachedInputTokens ?? 0 const cacheWriteInputTokens = (input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ?? // @ts-expect-error @@ -453,17 +459,18 @@ export namespace Session { const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"]) const adjustedInputTokens = excludesCachedTokens - ? (input.usage.inputTokens ?? 0) - : (input.usage.inputTokens ?? 0) - cacheReadInputTokens - cacheWriteInputTokens + ? inputTokens + : inputTokens - cacheReadInputTokens - cacheWriteInputTokens const safe = (value: number) => { if (!Number.isFinite(value)) return 0 return value } const tokens = { + total: input.usage.totalTokens, input: safe(adjustedInputTokens), - output: safe(input.usage.outputTokens ?? 0), - reasoning: safe(input.usage?.reasoningTokens ?? 0), + output: safe(outputTokens), + reasoning: safe(reasoningTokens), cache: { write: safe(cacheWriteInputTokens), read: safe(cacheReadInputTokens), diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index e45bfc772869..c2d089fc3791 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -383,6 +383,7 @@ export namespace MessageV2 { summary: z.boolean().optional(), cost: z.number(), tokens: z.object({ + total: z.number().optional(), input: z.number(), output: z.number(), reasoning: z.number(), diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index 9543e5b5796d..eedddbe511a1 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -203,6 +203,7 @@ export type AssistantMessage = { summary?: boolean cost: number tokens: { + total?: number input: number output: number reasoning: number From bdd108be2e3dadb8f32088dc7d951d9148962aeb Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Mon, 9 Feb 2026 23:09:08 -0600 Subject: [PATCH 02/19] wip --- packages/opencode/src/session/index.ts | 7 + packages/opencode/src/session/message-v2.ts | 1 + packages/opencode/src/session/tokens.txt | 199 ++++++++++++++++++++ 3 files changed, 207 insertions(+) create mode 100644 packages/opencode/src/session/tokens.txt diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 84efc2fd6824..e6cec7cb9358 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -457,6 +457,10 @@ export namespace Session { input.metadata?.["venice"]?.["usage"]?.["cacheCreationInputTokens"] ?? 0) as number + // OpenRouter provides inputTokens as the total count of input tokens (including cached). + // AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment) + // Anthropic does it differently though - inputTokens doesn't include cached tokens. + // It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others. const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"]) const adjustedInputTokens = excludesCachedTokens ? inputTokens @@ -465,6 +469,9 @@ export namespace Session { if (!Number.isFinite(value)) return 0 return value } + // Anthropic doesn't provide total_tokens, compute from components + // output.usage.totalTokens = + // output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; const tokens = { total: input.usage.totalTokens, diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index c2d089fc3791..63159ecc50c7 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -210,6 +210,7 @@ export namespace MessageV2 { snapshot: z.string().optional(), cost: z.number(), tokens: z.object({ + total: z.number().optional(), input: z.number(), output: z.number(), reasoning: z.number(), diff --git a/packages/opencode/src/session/tokens.txt b/packages/opencode/src/session/tokens.txt new file mode 100644 index 000000000000..bf6c205823ec --- /dev/null +++ b/packages/opencode/src/session/tokens.txt @@ -0,0 +1,199 @@ +## Openai + +--- Real --- +{ + "usage": { + "input_tokens": 14195, + "input_tokens_details": { + "cached_tokens": 12032 + }, + "output_tokens": 377, + "output_tokens_details": { + "reasoning_tokens": 41 + }, + "total_tokens": 14572 + } +} + +--- Calculated --- +{ + "tokens": { + "total": 14572, + "input": 2163, + "output": 377, + "reasoning": 41, + "cache": { + "read": 12032, + "write": 0 + } + } +} + +## Anthropic + +--- Real --- +{ + "usage": { + "input_tokens": 4, + "cache_creation_input_tokens": 2466, + "cache_read_input_tokens": 18873, + "output_tokens": 346 + } +} + +--- Calculated --- +{ + "tokens": { + "total": 350, + "input": 4, + "output": 346, + "reasoning": 0, + "cache": { + "read": 18873, + "write": 2466 + } + } +} + +## Bedrock + +--- Real --- +{ + "usage": { + "cacheReadInputTokenCount": 16138, + "cacheReadInputTokens": 16138, + "cacheWriteInputTokenCount": 2571, + "cacheWriteInputTokens": 2571, + "inputTokens": 4, + "outputTokens": 358, + "serverToolUsage": {}, + "totalTokens": 19071 + } +} + +--- Calculated --- +{ + "tokens": { + "total": 362, + "input": 4, + "output": 358, + "reasoning": 0, + "cache": { + "read": 16138, + "write": 2571 + } + } +} + +## Google + +--- Real --- +{ + "usageMetadata": { + "promptTokenCount": 19435, + "candidatesTokenCount": 291, + "totalTokenCount": 19726, + "cachedContentTokenCount": 11447, + "trafficType": "ON_DEMAND", + "promptTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 19435 + } + ], + "cacheTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 11447 + } + ], + "candidatesTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 291 + } + ] + } +} + +--- Calculated --- +{ + "tokens": { + "total": 19726, + "input": 7988, + "output": 291, + "reasoning": 0, + "cache": { + "read": 11447, + "write": 0 + } + } +} + + +## Github Copilot + +--- Real --- +{ + "usage": { + "completion_tokens": 448, + "prompt_tokens": 21172, + "prompt_tokens_details": { + "cached_tokens": 18702 + }, + "total_tokens": 21620 + } +} + +--- Calculated --- +{ + "tokens": { + "total": 21620, + "input": 2470, + "output": 448, + "reasoning": 0, + "cache": { + "read": 18702, + "write": 0 + } + } +} + +## OpenRouter + +--- Real --- +{ + "usage": { + "prompt_tokens": 14145, + "completion_tokens": 447, + "total_tokens": 14592, + "cost": 0.02215125, + "is_byok": false, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "cost_details": { + "upstream_inference_cost": 0.02215125, + "upstream_inference_prompt_cost": 0.01768125, + "upstream_inference_completions_cost": 0.00447 + }, + "completion_tokens_details": { + "reasoning_tokens": 64, + "image_tokens": 0 + } + } +} + +--- Calculated --- +{ + "tokens": { + "total": 14592, + "input": 14145, + "output": 447, + "reasoning": 64, + "cache": { + "read": 0, + "write": 0 + } + } +} From a794489b10f73c189a3e0901d2cf69069f08f696 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Mon, 9 Feb 2026 23:21:28 -0600 Subject: [PATCH 03/19] fix total token caluclations --- packages/opencode/src/session/compaction.ts | 3 +- packages/opencode/src/session/index.ts | 65 ++++++++++++--------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index e85f0e592aa2..a24c94374de4 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -33,11 +33,10 @@ export namespace SessionCompaction { const context = input.model.limit.context if (context === 0) return false - const usageTokens = + const count = input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write - const count = input.tokens.input + input.tokens.cache.read + input.tokens.output const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX const usable = input.model.limit.input || context - output return count > usable diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index e6cec7cb9358..4c5cdaf5738a 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -23,6 +23,7 @@ import type { Provider } from "@/provider/provider" import { PermissionNext } from "@/permission/next" import { Global } from "@/global" import type { LanguageModelV2Usage } from "@ai-sdk/provider" +import { iife } from "@/util/iife" export namespace Session { const log = Log.create({ service: "session" }) @@ -437,6 +438,11 @@ export namespace Session { return part }) + const safe = (value: number) => { + if (!Number.isFinite(value)) return 0 + return value + } + export const getUsage = fn( z.object({ model: z.custom(), @@ -444,43 +450,46 @@ export namespace Session { metadata: z.custom().optional(), }), (input) => { - const inputTokens = input.usage.inputTokens ?? 0 - const outputTokens = input.usage.outputTokens ?? 0 - const reasoningTokens = input.usage.reasoningTokens ?? 0 - // input.usage. - - const cacheReadInputTokens = input.usage.cachedInputTokens ?? 0 - const cacheWriteInputTokens = (input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ?? - // @ts-expect-error - input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ?? - // @ts-expect-error - input.metadata?.["venice"]?.["usage"]?.["cacheCreationInputTokens"] ?? - 0) as number + const inputTokens = safe(input.usage.inputTokens ?? 0) + const outputTokens = safe(input.usage.outputTokens ?? 0) + const reasoningTokens = safe(input.usage.reasoningTokens ?? 0) + + const cacheReadInputTokens = safe(input.usage.cachedInputTokens ?? 0) + const cacheWriteInputTokens = safe( + (input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ?? + // @ts-expect-error + input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ?? + // @ts-expect-error + input.metadata?.["venice"]?.["usage"]?.["cacheCreationInputTokens"] ?? + 0) as number, + ) // OpenRouter provides inputTokens as the total count of input tokens (including cached). // AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment) // Anthropic does it differently though - inputTokens doesn't include cached tokens. // It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others. const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"]) - const adjustedInputTokens = excludesCachedTokens - ? inputTokens - : inputTokens - cacheReadInputTokens - cacheWriteInputTokens - const safe = (value: number) => { - if (!Number.isFinite(value)) return 0 - return value - } - // Anthropic doesn't provide total_tokens, compute from components - // output.usage.totalTokens = - // output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; + const adjustedInputTokens = safe( + excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens, + ) + + const total = iife(() => { + // Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we + // don't compute from components + if (input.model.api.npm === "@ai-sdk/anthropic" || input.model.api.npm === "@ai-sdk/bedrock") { + return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens + } + return input.usage.totalTokens + }) const tokens = { - total: input.usage.totalTokens, - input: safe(adjustedInputTokens), - output: safe(outputTokens), - reasoning: safe(reasoningTokens), + total, + input: adjustedInputTokens, + output: outputTokens, + reasoning: reasoningTokens, cache: { - write: safe(cacheWriteInputTokens), - read: safe(cacheReadInputTokens), + write: cacheWriteInputTokens, + read: cacheReadInputTokens, }, } From 3cf3325240d9b7ba52d71372fa95962c2e500e58 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 00:12:30 -0600 Subject: [PATCH 04/19] wip --- packages/opencode/src/provider/transform.ts | 28 +----- packages/opencode/src/session/compaction.ts | 4 +- packages/opencode/src/session/index.ts | 2 +- packages/opencode/src/session/llm.ts | 12 +-- packages/opencode/src/session/prompt.ts | 1 - .../opencode/test/provider/transform.test.ts | 94 ------------------- packages/opencode/test/session/llm.test.ts | 32 +------ 7 files changed, 14 insertions(+), 159 deletions(-) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 01291491d323..7ed89cb2599c 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -5,6 +5,7 @@ import type { JSONSchema } from "zod/v4/core" import type { Provider } from "./provider" import type { ModelsDev } from "./models" import { iife } from "@/util/iife" +import { Flag } from "@/flag/flag" type Modality = NonNullable["input"][number] @@ -17,6 +18,8 @@ function mimeToModality(mime: string): Modality | undefined { } export namespace ProviderTransform { + export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000 + // Maps npm package to the key the AI SDK expects for providerOptions function sdkKey(npm: string): string | undefined { switch (npm) { @@ -723,29 +726,8 @@ export namespace ProviderTransform { return { [key]: options } } - export function maxOutputTokens( - npm: string, - options: Record, - modelLimit: number, - globalLimit: number, - ): number { - const modelCap = modelLimit || globalLimit - const standardLimit = Math.min(modelCap, globalLimit) - - if (npm === "@ai-sdk/anthropic" || npm === "@ai-sdk/google-vertex/anthropic") { - const thinking = options?.["thinking"] - const budgetTokens = typeof thinking?.["budgetTokens"] === "number" ? thinking["budgetTokens"] : 0 - const enabled = thinking?.["type"] === "enabled" - if (enabled && budgetTokens > 0) { - // Return text tokens so that text + thinking <= model cap, preferring 32k text when possible. - if (budgetTokens + standardLimit <= modelCap) { - return standardLimit - } - return modelCap - budgetTokens - } - } - - return standardLimit + export function maxOutputTokens(model: Provider.Model): number { + return Math.min(model.limit.output, OUTPUT_TOKEN_MAX) || OUTPUT_TOKEN_MAX } export function schema(model: Provider.Model, schema: JSONSchema.BaseSchema | JSONSchema7): JSONSchema7 { diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index a24c94374de4..e109a86543fd 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -6,7 +6,6 @@ import { Instance } from "../project/instance" import { Provider } from "../provider/provider" import { MessageV2 } from "./message-v2" import z from "zod" -import { SessionPrompt } from "./prompt" import { Token } from "../util/token" import { Log } from "../util/log" import { SessionProcessor } from "./processor" @@ -14,6 +13,7 @@ import { fn } from "@/util/fn" import { Agent } from "@/agent/agent" import { Plugin } from "@/plugin" import { Config } from "@/config/config" +import { ProviderTransform } from "@/provider/transform" export namespace SessionCompaction { const log = Log.create({ service: "session.compaction" }) @@ -37,7 +37,7 @@ export namespace SessionCompaction { input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write - const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX + const output = ProviderTransform.maxOutputTokens(input.model) const usable = input.model.limit.input || context - output return count > usable } diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 4c5cdaf5738a..57456929f61b 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -4,7 +4,7 @@ import { BusEvent } from "@/bus/bus-event" import { Bus } from "@/bus" import { Decimal } from "decimal.js" import z from "zod" -import { type LanguageModelUsage, type ProviderMetadata } from "ai" +import { type ProviderMetadata } from "ai" import { Config } from "../config/config" import { Flag } from "../flag/flag" import { Identifier } from "../id/id" diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index 4be6e2538f7e..b8705ec4e3fa 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -25,8 +25,7 @@ import { Auth } from "@/auth" export namespace LLM { const log = Log.create({ service: "llm" }) - - export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000 + export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX export type StreamInput = { user: MessageV2.User @@ -149,14 +148,7 @@ export namespace LLM { ) const maxOutputTokens = - isCodex || provider.id.includes("github-copilot") - ? undefined - : ProviderTransform.maxOutputTokens( - input.model.api.npm, - params.options, - input.model.limit.output, - OUTPUT_TOKEN_MAX, - ) + isCodex || provider.id.includes("github-copilot") ? undefined : ProviderTransform.maxOutputTokens(input.model) const tools = await resolveTools(input) diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index d7f73b4f6097..f49084d9c04d 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -52,7 +52,6 @@ globalThis.AI_SDK_LOG_WARNINGS = false export namespace SessionPrompt { const log = Log.create({ service: "session.prompt" }) - export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000 const state = Instance.state( () => { diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index 0e0bb440aa81..02bb5278fc7d 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -175,100 +175,6 @@ describe("ProviderTransform.options - gpt-5 textVerbosity", () => { }) }) -describe("ProviderTransform.maxOutputTokens", () => { - test("returns 32k when modelLimit > 32k", () => { - const modelLimit = 100000 - const result = ProviderTransform.maxOutputTokens("@ai-sdk/openai", {}, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(OUTPUT_TOKEN_MAX) - }) - - test("returns modelLimit when modelLimit < 32k", () => { - const modelLimit = 16000 - const result = ProviderTransform.maxOutputTokens("@ai-sdk/openai", {}, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(16000) - }) - - describe("azure", () => { - test("returns 32k when modelLimit > 32k", () => { - const modelLimit = 100000 - const result = ProviderTransform.maxOutputTokens("@ai-sdk/azure", {}, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(OUTPUT_TOKEN_MAX) - }) - - test("returns modelLimit when modelLimit < 32k", () => { - const modelLimit = 16000 - const result = ProviderTransform.maxOutputTokens("@ai-sdk/azure", {}, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(16000) - }) - }) - - describe("bedrock", () => { - test("returns 32k when modelLimit > 32k", () => { - const modelLimit = 100000 - const result = ProviderTransform.maxOutputTokens("@ai-sdk/amazon-bedrock", {}, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(OUTPUT_TOKEN_MAX) - }) - - test("returns modelLimit when modelLimit < 32k", () => { - const modelLimit = 16000 - const result = ProviderTransform.maxOutputTokens("@ai-sdk/amazon-bedrock", {}, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(16000) - }) - }) - - describe("anthropic without thinking options", () => { - test("returns 32k when modelLimit > 32k", () => { - const modelLimit = 100000 - const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", {}, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(OUTPUT_TOKEN_MAX) - }) - - test("returns modelLimit when modelLimit < 32k", () => { - const modelLimit = 16000 - const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", {}, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(16000) - }) - }) - - describe("anthropic with thinking options", () => { - test("returns 32k when budgetTokens + 32k <= modelLimit", () => { - const modelLimit = 100000 - const options = { - thinking: { - type: "enabled", - budgetTokens: 10000, - }, - } - const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", options, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(OUTPUT_TOKEN_MAX) - }) - - test("returns modelLimit - budgetTokens when budgetTokens + 32k > modelLimit", () => { - const modelLimit = 50000 - const options = { - thinking: { - type: "enabled", - budgetTokens: 30000, - }, - } - const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", options, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(20000) - }) - - test("returns 32k when thinking type is not enabled", () => { - const modelLimit = 100000 - const options = { - thinking: { - type: "disabled", - budgetTokens: 10000, - }, - } - const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", options, modelLimit, OUTPUT_TOKEN_MAX) - expect(result).toBe(OUTPUT_TOKEN_MAX) - }) - }) -}) - describe("ProviderTransform.schema - gemini array items", () => { test("adds missing items for array properties", () => { const geminiModel = { diff --git a/packages/opencode/test/session/llm.test.ts b/packages/opencode/test/session/llm.test.ts index 1f7e17e1bd20..be0b8e520e3f 100644 --- a/packages/opencode/test/session/llm.test.ts +++ b/packages/opencode/test/session/llm.test.ts @@ -314,12 +314,7 @@ describe("session.llm.stream", () => { expect(body.stream).toBe(true) const maxTokens = (body.max_tokens as number | undefined) ?? (body.max_output_tokens as number | undefined) - const expectedMaxTokens = ProviderTransform.maxOutputTokens( - resolved.api.npm, - ProviderTransform.options({ model: resolved, sessionID }), - resolved.limit.output, - LLM.OUTPUT_TOKEN_MAX, - ) + const expectedMaxTokens = ProviderTransform.maxOutputTokens(resolved) expect(maxTokens).toBe(expectedMaxTokens) const reasoning = (body.reasoningEffort as string | undefined) ?? (body.reasoning_effort as string | undefined) @@ -442,12 +437,7 @@ describe("session.llm.stream", () => { expect((body.reasoning as { effort?: string } | undefined)?.effort).toBe("high") const maxTokens = body.max_output_tokens as number | undefined - const expectedMaxTokens = ProviderTransform.maxOutputTokens( - resolved.api.npm, - ProviderTransform.options({ model: resolved, sessionID }), - resolved.limit.output, - LLM.OUTPUT_TOKEN_MAX, - ) + const expectedMaxTokens = ProviderTransform.maxOutputTokens(resolved) expect(maxTokens).toBe(expectedMaxTokens) }, }) @@ -565,14 +555,7 @@ describe("session.llm.stream", () => { expect(capture.url.pathname.endsWith("/messages")).toBe(true) expect(body.model).toBe(resolved.api.id) - expect(body.max_tokens).toBe( - ProviderTransform.maxOutputTokens( - resolved.api.npm, - ProviderTransform.options({ model: resolved, sessionID }), - resolved.limit.output, - LLM.OUTPUT_TOKEN_MAX, - ), - ) + expect(body.max_tokens).toBe(ProviderTransform.maxOutputTokens(resolved)) expect(body.temperature).toBe(0.4) expect(body.top_p).toBe(0.9) }, @@ -677,14 +660,7 @@ describe("session.llm.stream", () => { expect(capture.url.pathname).toBe(pathSuffix) expect(config?.temperature).toBe(0.3) expect(config?.topP).toBe(0.8) - expect(config?.maxOutputTokens).toBe( - ProviderTransform.maxOutputTokens( - resolved.api.npm, - ProviderTransform.options({ model: resolved, sessionID }), - resolved.limit.output, - LLM.OUTPUT_TOKEN_MAX, - ), - ) + expect(config?.maxOutputTokens).toBe(ProviderTransform.maxOutputTokens(resolved)) }, }) }) From 18e5da652a98ec726552cd6e2abdd624203e1c15 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 10:16:20 -0600 Subject: [PATCH 05/19] wip --- packages/opencode/src/session/compaction.ts | 2 +- packages/opencode/src/session/processor.ts | 3 +++ packages/opencode/src/session/retry.ts | 3 --- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index e109a86543fd..678b7689c137 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -39,7 +39,7 @@ export namespace SessionCompaction { const output = ProviderTransform.maxOutputTokens(input.model) const usable = input.model.limit.input || context - output - return count > usable + return count >= usable } export const PRUNE_MINIMUM = 20_000 diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index b5289e903a16..271576dae8dc 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -342,6 +342,9 @@ export namespace SessionProcessor { stack: JSON.stringify(e.stack), }) const error = MessageV2.fromError(e, { providerID: input.model.providerID }) + // DO NOT retry context overflow errors + if (MessageV2.ContextOverflowError.isInstance(error)) { + } const retry = SessionRetry.retryable(error) if (retry !== undefined) { attempt++ diff --git a/packages/opencode/src/session/retry.ts b/packages/opencode/src/session/retry.ts index 0d9a865b1f3f..a71a6a38241f 100644 --- a/packages/opencode/src/session/retry.ts +++ b/packages/opencode/src/session/retry.ts @@ -59,9 +59,6 @@ export namespace SessionRetry { } export function retryable(error: ReturnType) { - // DO NOT retry context overflow errors - if (MessageV2.ContextOverflowError.isInstance(error)) return undefined - if (MessageV2.APIError.isInstance(error)) { if (!error.data.isRetryable) return undefined return error.data.message.includes("Overloaded") ? "Provider is overloaded" : error.data.message From 6f208dacaaf580b9aa5a3b604411bb0861662295 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 13:58:05 -0600 Subject: [PATCH 06/19] wip: reserved tokens --- packages/opencode/src/config/config.ts | 12 ++++++++++++ packages/opencode/src/session/compaction.ts | 3 ++- packages/opencode/src/session/processor.ts | 13 +++++++++++++ packages/opencode/src/session/retry.ts | 2 ++ packages/opencode/test/session/retry.test.ts | 9 +++++++++ packages/sdk/js/src/v2/gen/types.gen.ts | 9 +++++++++ 6 files changed, 47 insertions(+), 1 deletion(-) diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index a231a5300724..d25e1b2a8a76 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1161,6 +1161,18 @@ export namespace Config { .object({ auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"), prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"), + preserved: z + .number() + .int() + .min(0) + .optional() + .describe("Token budget to preserve as recent context during compaction"), + reserved: z + .number() + .int() + .min(0) + .optional() + .describe("Token headroom reserved to reduce overflow risk before compaction"), }) .optional(), experimental: z diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 678b7689c137..f1be96e995f9 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -32,6 +32,7 @@ export namespace SessionCompaction { if (config.compaction?.auto === false) return false const context = input.model.limit.context if (context === 0) return false + const reserved = config.compaction?.reserved ?? 16_000 const count = input.tokens.total || @@ -39,7 +40,7 @@ export namespace SessionCompaction { const output = ProviderTransform.maxOutputTokens(input.model) const usable = input.model.limit.input || context - output - return count >= usable + return count >= usable - reserved } export const PRUNE_MINIMUM = 20_000 diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 271576dae8dc..2c11e71ebacf 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -344,6 +344,19 @@ export namespace SessionProcessor { const error = MessageV2.fromError(e, { providerID: input.model.providerID }) // DO NOT retry context overflow errors if (MessageV2.ContextOverflowError.isInstance(error)) { + // const compaction = await Agent.get("compaction") + // const compactionModel = compaction.model + // ? `${compaction.model.providerID}/${compaction.model.modelID}` + // : undefined + // if ( + // streamInput.agent.name !== "compaction" && + // compactionModel && + // compactionModel !== `${streamInput.model.providerID}/${streamInput.model.id}` + // ) { + // needsCompaction = true + // } else { + // // Need to do explicit / better compaction + // } } const retry = SessionRetry.retryable(error) if (retry !== undefined) { diff --git a/packages/opencode/src/session/retry.ts b/packages/opencode/src/session/retry.ts index a71a6a38241f..c057cddd7302 100644 --- a/packages/opencode/src/session/retry.ts +++ b/packages/opencode/src/session/retry.ts @@ -59,6 +59,8 @@ export namespace SessionRetry { } export function retryable(error: ReturnType) { + // context overflow errors should not be retried + if (MessageV2.ContextOverflowError.isInstance(error)) return undefined if (MessageV2.APIError.isInstance(error)) { if (!error.data.isRetryable) return undefined return error.data.message.includes("Overloaded") ? "Provider is overloaded" : error.data.message diff --git a/packages/opencode/test/session/retry.test.ts b/packages/opencode/test/session/retry.test.ts index a483a0152714..6768e72d95a7 100644 --- a/packages/opencode/test/session/retry.test.ts +++ b/packages/opencode/test/session/retry.test.ts @@ -112,6 +112,15 @@ describe("session.retry.retryable", () => { const error = wrap("not-json") expect(SessionRetry.retryable(error)).toBeUndefined() }) + + test("does not retry context overflow errors", () => { + const error = new MessageV2.ContextOverflowError({ + message: "Input exceeds context window of this model", + responseBody: '{"error":{"code":"context_length_exceeded"}}', + }).toObject() as ReturnType + + expect(SessionRetry.retryable(error)).toBeUndefined() + }) }) describe("session.message-v2.fromError", () => { diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index eedddbe511a1..900dd486f5e9 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -419,6 +419,7 @@ export type StepFinishPart = { snapshot?: string cost: number tokens: { + total?: number input: number output: number reasoning: number @@ -1823,6 +1824,14 @@ export type Config = { * Enable pruning of old tool outputs (default: true) */ prune?: boolean + /** + * Token budget to preserve as recent context during compaction + */ + preserved?: number + /** + * Token headroom reserved to reduce overflow risk before compaction + */ + reserved?: number } experimental?: { disable_paste_summary?: boolean From 55daf0383557e56fd00e1545e83fad1cbfe3a796 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 16:07:01 -0600 Subject: [PATCH 07/19] wip --- packages/opencode/src/session/processor.ts | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 2c11e71ebacf..8e7472e2f18c 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -342,21 +342,8 @@ export namespace SessionProcessor { stack: JSON.stringify(e.stack), }) const error = MessageV2.fromError(e, { providerID: input.model.providerID }) - // DO NOT retry context overflow errors if (MessageV2.ContextOverflowError.isInstance(error)) { - // const compaction = await Agent.get("compaction") - // const compactionModel = compaction.model - // ? `${compaction.model.providerID}/${compaction.model.modelID}` - // : undefined - // if ( - // streamInput.agent.name !== "compaction" && - // compactionModel && - // compactionModel !== `${streamInput.model.providerID}/${streamInput.model.id}` - // ) { - // needsCompaction = true - // } else { - // // Need to do explicit / better compaction - // } + // TODO: Handle context overflow error } const retry = SessionRetry.retryable(error) if (retry !== undefined) { From 7f4afb43efc8896943fad82cf201a42eefe1be72 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 16:41:18 -0600 Subject: [PATCH 08/19] wip --- .../opencode/src/agent/prompt/compaction.txt | 4 ++- packages/opencode/src/config/config.ts | 2 +- packages/opencode/src/session/compaction.ts | 28 +++++++++++++++++-- packages/sdk/js/src/v2/gen/types.gen.ts | 4 --- packages/web/src/content/docs/config.mdx | 4 ++- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/packages/opencode/src/agent/prompt/compaction.txt b/packages/opencode/src/agent/prompt/compaction.txt index b919671a0aca..3308627e153c 100644 --- a/packages/opencode/src/agent/prompt/compaction.txt +++ b/packages/opencode/src/agent/prompt/compaction.txt @@ -1,6 +1,6 @@ You are a helpful AI assistant tasked with summarizing conversations. -When asked to summarize, provide a detailed but concise summary of the conversation. +When asked to summarize, provide a detailed but concise summary of the conversation. Focus on information that would be helpful for continuing the conversation, including: - What was done - What is currently being worked on @@ -10,3 +10,5 @@ Focus on information that would be helpful for continuing the conversation, incl - Important technical decisions and why they were made Your summary should be comprehensive enough to provide context but concise enough to be quickly understood. + +Do not respond to any questions in the conversation, only output the summary. diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index d25e1b2a8a76..1845146ee14c 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1172,7 +1172,7 @@ export namespace Config { .int() .min(0) .optional() - .describe("Token headroom reserved to reduce overflow risk before compaction"), + .describe("Token buffer for compaction. Leaves enough window to avoid overflow during compaction."), }) .optional(), experimental: z diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index f1be96e995f9..9bdef18de944 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -32,7 +32,7 @@ export namespace SessionCompaction { if (config.compaction?.auto === false) return false const context = input.model.limit.context if (context === 0) return false - const reserved = config.compaction?.reserved ?? 16_000 + const reserved = config.compaction?.reserved ?? 10_000 const count = input.tokens.total || @@ -144,8 +144,30 @@ export namespace SessionCompaction { { sessionID: input.sessionID }, { context: [], prompt: undefined }, ) - const defaultPrompt = - "Provide a detailed prompt for continuing our conversation above. Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next considering new session will not have access to our conversation." + const defaultPrompt = `Provide a detailed prompt for continuing our conversation above. +Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next. +The summary that you construct will be used so that another agent can read it and continue the work. + +When constructing the summary, try to stick to this template: +--- +## Goal + +[What goal(s) is the user trying to accomplish?] + +## Instructions + +- [What important instructions did the user give you that are relevant] +- [If there is a plan or spec, include information about it so next agent can continue using it] + +## Discoveries + +[What notable things were learned during this conversation that would be useful for the next agent to know when continuing the work] + +## Accomplished + +[What work has been completed, what work is still in progress, and what work is left?] +---` + const promptText = compacting.prompt ?? [defaultPrompt, ...compacting.context].join("\n\n") const result = await processor.process({ user: userMessage, diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index 900dd486f5e9..42617c79ec89 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -1824,10 +1824,6 @@ export type Config = { * Enable pruning of old tool outputs (default: true) */ prune?: boolean - /** - * Token budget to preserve as recent context during compaction - */ - preserved?: number /** * Token headroom reserved to reduce overflow risk before compaction */ diff --git a/packages/web/src/content/docs/config.mdx b/packages/web/src/content/docs/config.mdx index 5cc9d8666a96..a8b0ffc2c68b 100644 --- a/packages/web/src/content/docs/config.mdx +++ b/packages/web/src/content/docs/config.mdx @@ -490,13 +490,15 @@ You can control context compaction behavior through the `compaction` option. "$schema": "https://opencode.ai/config.json", "compaction": { "auto": true, - "prune": true + "prune": true, + "reserved": 10000 } } ``` - `auto` - Automatically compact the session when context is full (default: `true`). - `prune` - Remove old tool outputs to save tokens (default: `true`). +- `reserved` - Token buffer for compaction. Leaves enough window to avoid overflow during compaction (default: `10000`). --- From 46bb0af6e26d7901b7be2c6559f643cc7b302ccd Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 16:44:07 -0600 Subject: [PATCH 09/19] tweak --- packages/opencode/src/session/compaction.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 9bdef18de944..d8f7f178941c 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -166,6 +166,10 @@ When constructing the summary, try to stick to this template: ## Accomplished [What work has been completed, what work is still in progress, and what work is left?] + +## Relevant files / directories + +[Construct a structured list of relevant files that have been read, edited, or created that pertain to the task at hand. If all the files in a directory are relevant, include the path to the directory.] ---` const promptText = compacting.prompt ?? [defaultPrompt, ...compacting.context].join("\n\n") From b8ef47df8bbf8714f48307ffa26b170d822bf22d Mon Sep 17 00:00:00 2001 From: James Lal Date: Tue, 10 Feb 2026 16:03:36 -0700 Subject: [PATCH 10/19] test(compaction): failing tests for missing headroom when limit.input is set (#13034) --- .../opencode/test/session/compaction.test.ts | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 2e9c091870ed..76852c3f5359 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -112,6 +112,86 @@ describe("session.compaction.isOverflow", () => { }) }) + // ─── Bug reproduction tests ─────────────────────────────────────────── + // These tests demonstrate that when limit.input is set, isOverflow() + // does not subtract any headroom for the next model response. This means + // compaction only triggers AFTER we've already consumed the full input + // budget, leaving zero room for the next API call's output tokens. + // + // Compare: without limit.input, usable = context - output (reserves space). + // With limit.input, usable = limit.input (reserves nothing). + // + // Related issues: #10634, #8089, #11086, #12621 + // Open PRs: #6875, #12924 + + test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Simulate Claude with prompt caching: input limit = 200K, output limit = 32K + const model = createModel({ context: 200_000, input: 200_000, output: 32_000 }) + + // We've used 198K tokens total. Only 2K under the input limit. + // On the next turn, the full conversation (198K) becomes input, + // plus the model needs room to generate output — this WILL overflow. + const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } } + // count = 180K + 3K + 15K = 198K + // usable = limit.input = 200K (no output subtracted!) + // 198K > 200K = false → no compaction triggered + + // WITHOUT limit.input: usable = 200K - 32K = 168K, and 198K > 168K = true ✓ + // WITH limit.input: usable = 200K, and 198K > 200K = false ✗ + + // With 198K used and only 2K headroom, the next turn will overflow. + // Compaction MUST trigger here. + expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) + }, + }) + }) + + test("BUG: without limit.input, same token count correctly triggers compaction", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Same model but without limit.input — uses context - output instead + const model = createModel({ context: 200_000, output: 32_000 }) + + // Same token usage as above + const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } } + // count = 198K + // usable = context - output = 200K - 32K = 168K + // 198K > 168K = true → compaction correctly triggered + + const result = await SessionCompaction.isOverflow({ tokens, model }) + expect(result).toBe(true) // ← Correct: headroom is reserved + }, + }) + }) + + test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Two models with identical context/output limits, differing only in limit.input + const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 }) + const withoutInputLimit = createModel({ context: 200_000, output: 32_000 }) + + // 170K total tokens — well above context-output (168K) but below input limit (200K) + const tokens = { input: 155_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } } + + const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit }) + const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit }) + + // Both models have identical real capacity — they should agree: + expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output) + expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K) + }, + }) + }) + test("returns false when model context limit is 0", async () => { await using tmp = await tmpdir() await Instance.provide({ From 34d3b87a4f28c269db25317662fc8fc93dbd0744 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 17:49:34 -0600 Subject: [PATCH 11/19] tweak: prompt --- packages/opencode/src/session/compaction.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index d8f7f178941c..75b702e1a983 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -212,7 +212,7 @@ When constructing the summary, try to stick to this template: sessionID: input.sessionID, type: "text", synthetic: true, - text: "Continue if you have next steps", + text: "Proceed only when there are clear, concrete next steps based on the user's instructions. If next steps are unclear, ask clarifying questions before continuing. Do not make edits unless the user has explicitly asked for implementation or it is clearly implied by their request.", time: { start: Date.now(), end: Date.now(), From 7dfac88524f49ba8f9117808a6b6b087d9cec378 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 19:09:03 -0600 Subject: [PATCH 12/19] fix: math --- packages/opencode/src/session/compaction.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 75b702e1a983..aaba0e685580 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -32,15 +32,14 @@ export namespace SessionCompaction { if (config.compaction?.auto === false) return false const context = input.model.limit.context if (context === 0) return false - const reserved = config.compaction?.reserved ?? 10_000 const count = input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write - const output = ProviderTransform.maxOutputTokens(input.model) - const usable = input.model.limit.input || context - output - return count >= usable - reserved + const reserved = config.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model) + const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved + return count >= usable } export const PRUNE_MINIMUM = 20_000 @@ -212,7 +211,7 @@ When constructing the summary, try to stick to this template: sessionID: input.sessionID, type: "text", synthetic: true, - text: "Proceed only when there are clear, concrete next steps based on the user's instructions. If next steps are unclear, ask clarifying questions before continuing. Do not make edits unless the user has explicitly asked for implementation or it is clearly implied by their request.", + text: "Continue if you have next steps, or stop and ask for clarification if you are unsure how to proceed.", time: { start: Date.now(), end: Date.now(), From 0b47e9dd412da67617ef3a76231b0332c3783f42 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 19:11:33 -0600 Subject: [PATCH 13/19] cleanup --- packages/opencode/src/config/config.ts | 6 ------ packages/sdk/js/src/v2/gen/types.gen.ts | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 1845146ee14c..8f0f583ea3d6 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1161,12 +1161,6 @@ export namespace Config { .object({ auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"), prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"), - preserved: z - .number() - .int() - .min(0) - .optional() - .describe("Token budget to preserve as recent context during compaction"), reserved: z .number() .int() diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index 42617c79ec89..07ce5c2b05cd 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -1825,7 +1825,7 @@ export type Config = { */ prune?: boolean /** - * Token headroom reserved to reduce overflow risk before compaction + * Token buffer for compaction. Leaves enough window to avoid overflow during compaction. */ reserved?: number } From 3eea9b9ae37cc7e7d203fbd9e909ec3065ec85c2 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 19:14:08 -0600 Subject: [PATCH 14/19] fix vertex calculation --- packages/opencode/src/session/index.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 57456929f61b..f33b2e9e02d8 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -476,7 +476,11 @@ export namespace Session { const total = iife(() => { // Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we // don't compute from components - if (input.model.api.npm === "@ai-sdk/anthropic" || input.model.api.npm === "@ai-sdk/bedrock") { + if ( + input.model.api.npm === "@ai-sdk/anthropic" || + input.model.api.npm === "@ai-sdk/bedrock" || + input.model.api.npm === "@ai-sdk/google-vertex/anthropic" + ) { return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens } return input.usage.totalTokens From c07b6f6f3e134b17c6a207c29e40713b66df5e40 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 19:14:40 -0600 Subject: [PATCH 15/19] cleanup --- packages/opencode/src/session/tokens.txt | 199 ----------------------- 1 file changed, 199 deletions(-) delete mode 100644 packages/opencode/src/session/tokens.txt diff --git a/packages/opencode/src/session/tokens.txt b/packages/opencode/src/session/tokens.txt deleted file mode 100644 index bf6c205823ec..000000000000 --- a/packages/opencode/src/session/tokens.txt +++ /dev/null @@ -1,199 +0,0 @@ -## Openai - ---- Real --- -{ - "usage": { - "input_tokens": 14195, - "input_tokens_details": { - "cached_tokens": 12032 - }, - "output_tokens": 377, - "output_tokens_details": { - "reasoning_tokens": 41 - }, - "total_tokens": 14572 - } -} - ---- Calculated --- -{ - "tokens": { - "total": 14572, - "input": 2163, - "output": 377, - "reasoning": 41, - "cache": { - "read": 12032, - "write": 0 - } - } -} - -## Anthropic - ---- Real --- -{ - "usage": { - "input_tokens": 4, - "cache_creation_input_tokens": 2466, - "cache_read_input_tokens": 18873, - "output_tokens": 346 - } -} - ---- Calculated --- -{ - "tokens": { - "total": 350, - "input": 4, - "output": 346, - "reasoning": 0, - "cache": { - "read": 18873, - "write": 2466 - } - } -} - -## Bedrock - ---- Real --- -{ - "usage": { - "cacheReadInputTokenCount": 16138, - "cacheReadInputTokens": 16138, - "cacheWriteInputTokenCount": 2571, - "cacheWriteInputTokens": 2571, - "inputTokens": 4, - "outputTokens": 358, - "serverToolUsage": {}, - "totalTokens": 19071 - } -} - ---- Calculated --- -{ - "tokens": { - "total": 362, - "input": 4, - "output": 358, - "reasoning": 0, - "cache": { - "read": 16138, - "write": 2571 - } - } -} - -## Google - ---- Real --- -{ - "usageMetadata": { - "promptTokenCount": 19435, - "candidatesTokenCount": 291, - "totalTokenCount": 19726, - "cachedContentTokenCount": 11447, - "trafficType": "ON_DEMAND", - "promptTokensDetails": [ - { - "modality": "TEXT", - "tokenCount": 19435 - } - ], - "cacheTokensDetails": [ - { - "modality": "TEXT", - "tokenCount": 11447 - } - ], - "candidatesTokensDetails": [ - { - "modality": "TEXT", - "tokenCount": 291 - } - ] - } -} - ---- Calculated --- -{ - "tokens": { - "total": 19726, - "input": 7988, - "output": 291, - "reasoning": 0, - "cache": { - "read": 11447, - "write": 0 - } - } -} - - -## Github Copilot - ---- Real --- -{ - "usage": { - "completion_tokens": 448, - "prompt_tokens": 21172, - "prompt_tokens_details": { - "cached_tokens": 18702 - }, - "total_tokens": 21620 - } -} - ---- Calculated --- -{ - "tokens": { - "total": 21620, - "input": 2470, - "output": 448, - "reasoning": 0, - "cache": { - "read": 18702, - "write": 0 - } - } -} - -## OpenRouter - ---- Real --- -{ - "usage": { - "prompt_tokens": 14145, - "completion_tokens": 447, - "total_tokens": 14592, - "cost": 0.02215125, - "is_byok": false, - "prompt_tokens_details": { - "cached_tokens": 0 - }, - "cost_details": { - "upstream_inference_cost": 0.02215125, - "upstream_inference_prompt_cost": 0.01768125, - "upstream_inference_completions_cost": 0.00447 - }, - "completion_tokens_details": { - "reasoning_tokens": 64, - "image_tokens": 0 - } - } -} - ---- Calculated --- -{ - "tokens": { - "total": 14592, - "input": 14145, - "output": 447, - "reasoning": 64, - "cache": { - "read": 0, - "write": 0 - } - } -} From aceb6356a429136419b7e8004afb5116910c67fc Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 19:21:11 -0600 Subject: [PATCH 16/19] tweak compaction to have smaller buffer by default --- packages/opencode/src/session/compaction.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index aaba0e685580..5660ca97319f 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -27,6 +27,8 @@ export namespace SessionCompaction { ), } + const COMPACTION_BUFFER = 20_000 + export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { const config = await Config.get() if (config.compaction?.auto === false) return false @@ -37,7 +39,8 @@ export namespace SessionCompaction { input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write - const reserved = config.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model) + const reserved = + config.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model)) const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved return count >= usable } From e89e40eb49959cd34df6602839a693e9af2ad52e Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 19:21:50 -0600 Subject: [PATCH 17/19] cleanup --- packages/opencode/src/session/index.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index f33b2e9e02d8..4bf53bee00e2 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -438,11 +438,6 @@ export namespace Session { return part }) - const safe = (value: number) => { - if (!Number.isFinite(value)) return 0 - return value - } - export const getUsage = fn( z.object({ model: z.custom(), @@ -450,6 +445,10 @@ export namespace Session { metadata: z.custom().optional(), }), (input) => { + const safe = (value: number) => { + if (!Number.isFinite(value)) return 0 + return value + } const inputTokens = safe(input.usage.inputTokens ?? 0) const outputTokens = safe(input.usage.outputTokens ?? 0) const reasoningTokens = safe(input.usage.reasoningTokens ?? 0) From 6cc2aa5db8e889bd2c7f1397998b696732c024d0 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 19:32:33 -0600 Subject: [PATCH 18/19] fix: test --- packages/opencode/test/session/compaction.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 76852c3f5359..a50a7d454cd1 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -70,7 +70,7 @@ describe("session.compaction.isOverflow", () => { directory: tmp.path, fn: async () => { const model = createModel({ context: 100_000, output: 32_000 }) - const tokens = { input: 50_000, output: 10_000, reasoning: 0, cache: { read: 10_000, write: 0 } } + const tokens = { input: 60_000, output: 10_000, reasoning: 0, cache: { read: 10_000, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) }, }) @@ -180,13 +180,13 @@ describe("session.compaction.isOverflow", () => { const withoutInputLimit = createModel({ context: 200_000, output: 32_000 }) // 170K total tokens — well above context-output (168K) but below input limit (200K) - const tokens = { input: 155_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } } + const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } } const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit }) const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit }) // Both models have identical real capacity — they should agree: - expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output) + expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output) expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K) }, }) From b3582ce70f4b701d606f5dd5075c29845967b0b3 Mon Sep 17 00:00:00 2001 From: Aiden Cline Date: Tue, 10 Feb 2026 19:49:10 -0600 Subject: [PATCH 19/19] fix: pkg name and update test --- packages/opencode/src/session/index.ts | 2 +- .../opencode/test/session/compaction.test.ts | 52 ++++++++++++++++++- packages/web/src/content/docs/config.mdx | 2 +- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 4bf53bee00e2..b0ffaaf70d77 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -477,7 +477,7 @@ export namespace Session { // don't compute from components if ( input.model.api.npm === "@ai-sdk/anthropic" || - input.model.api.npm === "@ai-sdk/bedrock" || + input.model.api.npm === "@ai-sdk/amazon-bedrock" || input.model.api.npm === "@ai-sdk/google-vertex/anthropic" ) { return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index a50a7d454cd1..452926d12e1b 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -15,6 +15,7 @@ function createModel(opts: { output: number input?: number cost?: Provider.Model["cost"] + npm?: string }): Provider.Model { return { id: "test-model", @@ -34,7 +35,7 @@ function createModel(opts: { input: { text: true, image: false, audio: false, video: false }, output: { text: true, image: false, audio: false, video: false }, }, - api: { npm: "@ai-sdk/anthropic" }, + api: { npm: opts.npm ?? "@ai-sdk/anthropic" }, options: {}, } as Provider.Model } @@ -370,4 +371,53 @@ describe("session.getUsage", () => { expect(result.cost).toBe(3 + 1.5) }) + + test.each(["@ai-sdk/anthropic", "@ai-sdk/amazon-bedrock", "@ai-sdk/google-vertex/anthropic"])( + "computes total from components for %s models", + (npm) => { + const model = createModel({ context: 100_000, output: 32_000, npm }) + const usage = { + inputTokens: 1000, + outputTokens: 500, + // These providers typically report total as input + output only, + // excluding cache read/write. + totalTokens: 1500, + cachedInputTokens: 200, + } + if (npm === "@ai-sdk/amazon-bedrock") { + const result = Session.getUsage({ + model, + usage, + metadata: { + bedrock: { + usage: { + cacheWriteInputTokens: 300, + }, + }, + }, + }) + + expect(result.tokens.input).toBe(1000) + expect(result.tokens.cache.read).toBe(200) + expect(result.tokens.cache.write).toBe(300) + expect(result.tokens.total).toBe(2000) + return + } + + const result = Session.getUsage({ + model, + usage, + metadata: { + anthropic: { + cacheCreationInputTokens: 300, + }, + }, + }) + + expect(result.tokens.input).toBe(1000) + expect(result.tokens.cache.read).toBe(200) + expect(result.tokens.cache.write).toBe(300) + expect(result.tokens.total).toBe(2000) + }, + ) }) diff --git a/packages/web/src/content/docs/config.mdx b/packages/web/src/content/docs/config.mdx index a8b0ffc2c68b..eeccde2f7913 100644 --- a/packages/web/src/content/docs/config.mdx +++ b/packages/web/src/content/docs/config.mdx @@ -498,7 +498,7 @@ You can control context compaction behavior through the `compaction` option. - `auto` - Automatically compact the session when context is full (default: `true`). - `prune` - Remove old tool outputs to save tokens (default: `true`). -- `reserved` - Token buffer for compaction. Leaves enough window to avoid overflow during compaction (default: `10000`). +- `reserved` - Token buffer for compaction. Leaves enough window to avoid overflow during compaction ---