diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 2e9c091870ed..76852c3f5359 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -112,6 +112,86 @@ describe("session.compaction.isOverflow", () => { }) }) + // ─── Bug reproduction tests ─────────────────────────────────────────── + // These tests demonstrate that when limit.input is set, isOverflow() + // does not subtract any headroom for the next model response. This means + // compaction only triggers AFTER we've already consumed the full input + // budget, leaving zero room for the next API call's output tokens. + // + // Compare: without limit.input, usable = context - output (reserves space). + // With limit.input, usable = limit.input (reserves nothing). + // + // Related issues: #10634, #8089, #11086, #12621 + // Open PRs: #6875, #12924 + + test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Simulate Claude with prompt caching: input limit = 200K, output limit = 32K + const model = createModel({ context: 200_000, input: 200_000, output: 32_000 }) + + // We've used 198K tokens total. Only 2K under the input limit. + // On the next turn, the full conversation (198K) becomes input, + // plus the model needs room to generate output — this WILL overflow. + const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } } + // count = 180K + 3K + 15K = 198K + // usable = limit.input = 200K (no output subtracted!) + // 198K > 200K = false → no compaction triggered + + // WITHOUT limit.input: usable = 200K - 32K = 168K, and 198K > 168K = true ✓ + // WITH limit.input: usable = 200K, and 198K > 200K = false ✗ + + // With 198K used and only 2K headroom, the next turn will overflow. + // Compaction MUST trigger here. + expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) + }, + }) + }) + + test("BUG: without limit.input, same token count correctly triggers compaction", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Same model but without limit.input — uses context - output instead + const model = createModel({ context: 200_000, output: 32_000 }) + + // Same token usage as above + const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } } + // count = 198K + // usable = context - output = 200K - 32K = 168K + // 198K > 168K = true → compaction correctly triggered + + const result = await SessionCompaction.isOverflow({ tokens, model }) + expect(result).toBe(true) // ← Correct: headroom is reserved + }, + }) + }) + + test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Two models with identical context/output limits, differing only in limit.input + const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 }) + const withoutInputLimit = createModel({ context: 200_000, output: 32_000 }) + + // 170K total tokens — well above context-output (168K) but below input limit (200K) + const tokens = { input: 155_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } } + + const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit }) + const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit }) + + // Both models have identical real capacity — they should agree: + expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output) + expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K) + }, + }) + }) + test("returns false when model context limit is 0", async () => { await using tmp = await tmpdir() await Instance.provide({