From ef0ff95e531a58c2f95cdd6f93a65c400e59a5f2 Mon Sep 17 00:00:00 2001
From: ryanwyler <ryan@bridgetone.com>
Date: Mon, 29 Dec 2025 11:47:04 -0700
Subject: [PATCH 1/4] feat: add collapse compaction mode

Adds a new 'collapse' compaction mode that preserves recent context while
summarizing older messages. Unlike standard compaction which summarizes the
entire conversation, collapse extracts the oldest 65% of tokens, summarizes
them at a breakpoint, and leaves the newest 35% untouched.

Changes:
- Add processCollapse() for breakpoint-based summarization
- Add Identifier.createLike() for inserting messages at past timestamps
- Add detectFormat() for backward compatibility with existing sessions
- Add configurable ratios: trigger (0.85), extractRatio (0.65), recentRatio (0.15)
- Add TUI toggle for switching between standard/collapse modes
- Fix isOverflow() to include cache.write tokens
- Fix link.tsx underline prop error from #6317

Default remains 'standard' - users can opt-in to collapse via config or TUI toggle.
---
 .../src/cli/cmd/tui/routes/session/index.tsx  |  16 +
 .../cli/cmd/tui/routes/session/sidebar.tsx    |   6 +
 packages/opencode/src/config/config.ts        |  24 +
 packages/opencode/src/id/id.ts                | 137 ++++-
 packages/opencode/src/server/server.ts        |   2 +
 packages/opencode/src/session/compaction.ts   | 482 +++++++++++++++++-
 packages/opencode/src/session/message-v2.ts   |  26 +-
 packages/opencode/src/session/prompt.ts       |  47 +-
 packages/sdk/js/src/v2/gen/types.gen.ts       |  16 +
 packages/sdk/openapi.json                     |  23 +
 10 files changed, 759 insertions(+), 20 deletions(-)
diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
index 374645abb356..f530b7f7f425 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@@ -128,6 +128,9 @@ export function Session() {
   const [userMessageMarkdown, setUserMessageMarkdown] = createSignal(kv.get("user_message_markdown", true))
   const [diffWrapMode, setDiffWrapMode] = createSignal<"word" | "none">("word")
   const [animationsEnabled, setAnimationsEnabled] = createSignal(kv.get("animations_enabled", true))
+  const [compactionMethod, setCompactionMethod] = createSignal<"standard" | "collapse">(
+    kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard"),
+  )
 
   const wide = createMemo(() => dimensions().width > 120)
   const sidebarVisible = createMemo(() => {
@@ -395,6 +398,19 @@ export function Session() {
         dialog.clear()
       },
     },
+    {
+      title: compactionMethod() === "collapse" ? "Use standard compaction" : "Use collapse compaction",
+      value: "session.toggle.compaction_method",
+      category: "Session",
+      onSelect: (dialog) => {
+        setCompactionMethod((prev) => {
+          const next = prev === "standard" ? "collapse" : "standard"
+          kv.set("compaction_method", next)
+          return next
+        })
+        dialog.clear()
+      },
+    },
     {
       title: "Unshare session",
       value: "session.unshare",
diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
index a9ed042d1bb9..3efae65f6021 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
@@ -92,6 +92,12 @@ export function Sidebar(props: { sessionID: string }) {
               <text fg={theme.text}>
                 <b>Context</b>
               </text>
+              <text fg={theme.textMuted}>
+                compact{" "}
+                {sync.data.config.compaction?.auto === false
+                  ? "disabled"
+                  : kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard")}
+              </text>
               <text fg={theme.textMuted}>{context()?.tokens ?? 0} tokens</text>
               <text fg={theme.textMuted}>{context()?.percentage ?? 0}% used</text>
               <text fg={theme.textMuted}>{cost()} spent</text>
diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts
index 012e3e12f53e..f8d9fed43b47 100644
--- a/packages/opencode/src/config/config.ts
+++ b/packages/opencode/src/config/config.ts
@@ -807,6 +807,30 @@ export namespace Config {
         .object({
           auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"),
           prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"),
+          method: z
+            .enum(["standard", "collapse"])
+            .optional()
+            .describe(
+              "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)",
+            ),
+          trigger: z
+            .number()
+            .min(0)
+            .max(1)
+            .optional()
+            .describe("Trigger compaction at this fraction of total context (default: 0.85 = 85%)"),
+          extractRatio: z
+            .number()
+            .min(0)
+            .max(1)
+            .optional()
+            .describe("For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)"),
+          recentRatio: z
+            .number()
+            .min(0)
+            .max(1)
+            .optional()
+            .describe("For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)"),
         })
         .optional(),
       experimental: z
diff --git a/packages/opencode/src/id/id.ts b/packages/opencode/src/id/id.ts
index ad6e22e1beee..6bc6356ced85 100644
--- a/packages/opencode/src/id/id.ts
+++ b/packages/opencode/src/id/id.ts
@@ -15,7 +15,11 @@ export namespace Identifier {
     return z.string().startsWith(prefixes[prefix])
   }
 
+  // Total ID length after prefix: 6 bytes hex (12 chars) + 14 random chars = 26 chars
+  // Note: 6-byte format truncates high byte but maintains backwards compatibility
+  // Use createLike() with a 7-byte reference ID when inserting at past timestamps
   const LENGTH = 26
+  const TIME_BYTES = 6
 
   // State for monotonic ID generation
   let lastTimestamp = 0
@@ -59,15 +63,140 @@ export namespace Identifier {
     }
     counter++
 
+    // Encode timestamp * 0x1000 + counter into 6 bytes (48 bits)
+    // Note: This truncates the high byte for modern timestamps, but all IDs
+    // created at "now" will have the same truncation, so they sort correctly.
+    // The truncation only matters when inserting at past timestamps (use createLike for that).
     let now = BigInt(currentTimestamp) * BigInt(0x1000) + BigInt(counter)
 
     now = descending ? ~now : now
 
-    const timeBytes = Buffer.alloc(6)
-    for (let i = 0; i < 6; i++) {
-      timeBytes[i] = Number((now >> BigInt(40 - 8 * i)) & BigInt(0xff))
+    const timeBytes = Buffer.alloc(TIME_BYTES)
+    for (let i = 0; i < TIME_BYTES; i++) {
+      timeBytes[i] = Number((now >> BigInt((TIME_BYTES - 1 - i) * 8)) & BigInt(0xff))
     }
 
-    return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - 12)
+    return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - TIME_BYTES * 2)
+  }
+
+  /**
+   * Detect the byte format (6 or 7) of an existing ID.
+   * 6-byte IDs: 12 hex chars + 14 random = 26 total after prefix
+   * 7-byte IDs: 14 hex chars + 12 random = 26 total after prefix
+   */
+  export function detectFormat(id: string): 6 | 7 {
+    const underscoreIndex = id.indexOf("_")
+    if (underscoreIndex === -1) return TIME_BYTES as 6 | 7
+
+    const afterPrefix = id.slice(underscoreIndex + 1)
+
+    // Check if first 14 chars are all valid hex (would indicate 7-byte format)
+    const first14 = afterPrefix.slice(0, 14)
+    const isValidHex14 = /^[0-9a-f]{14}$/i.test(first14)
+
+    if (isValidHex14) {
+      // Could be 7-byte format, verify by checking if it decodes to a valid timestamp
+      try {
+        const bigValue = BigInt("0x" + first14)
+        const ts = Number(bigValue / BigInt(0x1000))
+
+        // Check if this looks like a valid modern timestamp (after 2020, before 2100)
+        const year2020 = 1577836800000
+        const year2100 = 4102444800000
+        if (ts >= year2020 && ts < year2100) {
+          return 7
+        }
+      } catch {
+        // Not valid hex, fall through to 6-byte
+      }
+    }
+
+    // Otherwise assume 6-byte (old format)
+    return 6
+  }
+
+  /**
+   * Create an ID that sorts immediately after a reference ID.
+   *
+   * This works by extracting the raw encoded value from the reference ID and
+   * incrementing it, ensuring the new ID sorts correctly regardless of the
+   * byte format (6 or 7 bytes).
+   *
+   * @param referenceId - The ID to sort after
+   * @param prefix - The prefix for the new ID (e.g., "message", "part")
+   * @param descending - Whether to use descending order (usually false)
+   * @param offsetMs - Milliseconds to add to the reference timestamp (default 1)
+   */
+  export function createLike(
+    referenceId: string,
+    prefix: keyof typeof prefixes,
+    descending: boolean,
+    offsetMs: number = 1,
+  ): string {
+    const format = detectFormat(referenceId)
+    const underscoreIndex = referenceId.indexOf("_")
+    if (underscoreIndex === -1) {
+      throw new Error(`Invalid reference ID: ${referenceId}`)
+    }
+
+    // Extract the hex timestamp portion from the reference ID
+    const hexPart = referenceId.slice(underscoreIndex + 1, underscoreIndex + 1 + format * 2)
+    const referenceValue = BigInt("0x" + hexPart)
+
+    // Add offset (in the encoded space: offsetMs * 0x1000)
+    // This ensures the new ID sorts after the reference regardless of truncation
+    let newValue = referenceValue + BigInt(offsetMs) * BigInt(0x1000) + BigInt(1) // +1 for counter
+
+    newValue = descending ? ~newValue : newValue
+
+    const timeBytes = Buffer.alloc(format)
+    for (let i = 0; i < format; i++) {
+      timeBytes[i] = Number((newValue >> BigInt((format - 1 - i) * 8)) & BigInt(0xff))
+    }
+
+    const randomLength = LENGTH - format * 2
+    return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(randomLength)
+  }
+
+  /**
+   * Decode the timestamp from an ID.
+   * Handles both old 6-byte IDs and new 7-byte IDs.
+   */
+  export function decodeTimestamp(id: string): { timestamp: number; counter: number } | null {
+    const underscoreIndex = id.indexOf("_")
+    if (underscoreIndex === -1) return null
+
+    const hexPart = id.slice(underscoreIndex + 1)
+
+    // Determine if this is an old 6-byte ID or new 7-byte ID
+    // Old IDs: 12 hex chars for time + 14 random = 26 total after prefix
+    // New IDs: 14 hex chars for time + 12 random = 26 total after prefix
+    // We can detect by checking if the first 14 chars decode to a reasonable timestamp
+
+    // Try 7-byte (new format) first
+    if (hexPart.length >= 14) {
+      const hex7 = hexPart.slice(0, 14)
+      const bigValue7 = BigInt("0x" + hex7)
+      const ts7 = Number(bigValue7 / BigInt(0x1000))
+      const counter7 = Number(bigValue7 % BigInt(0x1000))
+
+      // Check if this looks like a valid modern timestamp (after 2020, before 2100)
+      const year2020 = 1577836800000
+      const year2100 = 4102444800000
+      if (ts7 >= year2020 && ts7 < year2100) {
+        return { timestamp: ts7, counter: counter7 }
+      }
+    }
+
+    // Try 6-byte (old format)
+    if (hexPart.length >= 12) {
+      const hex6 = hexPart.slice(0, 12)
+      const bigValue6 = BigInt("0x" + hex6)
+      const ts6 = Number(bigValue6 / BigInt(0x1000))
+      const counter6 = Number(bigValue6 % BigInt(0x1000))
+      return { timestamp: ts6, counter: counter6 }
+    }
+
+    return null
   }
 }
diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts
index f31b8ec44f50..6f32e854447f 100644
--- a/packages/opencode/src/server/server.ts
+++ b/packages/opencode/src/server/server.ts
@@ -1121,6 +1121,8 @@ export namespace Server {
               break
             }
           }
+          // Create compaction trigger, then loop processes it
+          // process() will route to the appropriate method (collapse or standard)
           await SessionCompaction.create({
             sessionID,
             agent: currentAgent,
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 42bab2eb9751..12dea48f738c 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -14,6 +14,8 @@ import { fn } from "@/util/fn"
 import { Agent } from "@/agent/agent"
 import { Plugin } from "@/plugin"
 import { Config } from "@/config/config"
+import { Global } from "@/global"
+import path from "path"
 
 export namespace SessionCompaction {
   const log = Log.create({ service: "session.compaction" })
@@ -27,15 +29,60 @@ export namespace SessionCompaction {
     ),
   }
 
+  // Default configuration values
+  export const DEFAULTS = {
+    method: "standard" as const,
+    trigger: 0.85, // Trigger at 85% of usable context to leave headroom
+    extractRatio: 0.65,
+    recentRatio: 0.15,
+  }
+
+  /**
+   * Get the compaction method.
+   * Priority: TUI toggle (kv.json) > config file > default
+   */
+  export async function getMethod(): Promise<"standard" | "collapse"> {
+    const config = await Config.get()
+    const configMethod = config.compaction?.method
+
+    // Check TUI toggle override
+    try {
+      const file = Bun.file(path.join(Global.Path.state, "kv.json"))
+      if (await file.exists()) {
+        const kv = await file.json()
+        const toggle = kv["compaction_method"]
+        if (toggle === "standard" || toggle === "collapse") {
+          return toggle
+        }
+      }
+    } catch {
+      // Ignore KV read errors
+    }
+
+    return configMethod ?? DEFAULTS.method
+  }
+
   export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
     const config = await Config.get()
     if (config.compaction?.auto === false) return false
     const context = input.model.limit.context
     if (context === 0) return false
-    const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
-    const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
-    const usable = context - output
-    return count > usable
+
+    const count = input.tokens.input + input.tokens.cache.read + input.tokens.cache.write + input.tokens.output
+    const trigger = config.compaction?.trigger ?? DEFAULTS.trigger
+    const threshold = context * trigger
+    const isOver = count > threshold
+
+    log.debug("overflow check", {
+      tokens: input.tokens,
+      count,
+      context,
+      trigger,
+      threshold,
+      isOver,
+    })
+
+    return isOver
   }
 
   export const PRUNE_MINIMUM = 20_000
@@ -89,13 +136,37 @@ export namespace SessionCompaction {
     }
   }
 
+  /**
+   * Process compaction - routes to appropriate method based on config.
+   * This is called via the create() -> loop() -> process() flow.
+   */
   export async function process(input: {
     parentID: string
     messages: MessageV2.WithParts[]
     sessionID: string
     abort: AbortSignal
     auto: boolean
-  }) {
+  }): Promise<"continue" | "stop"> {
+    const method = await getMethod()
+    log.info("compacting", { method })
+
+    if (method === "collapse") {
+      return processCollapse(input)
+    }
+    return processStandard(input)
+  }
+
+  /**
+   * Standard compaction: Summarizes entire conversation at end.
+   */
+  async function processStandard(input: {
+    parentID: string
+    messages: MessageV2.WithParts[]
+    sessionID: string
+    abort: AbortSignal
+    auto: boolean
+  }): Promise<"continue" | "stop"> {
+    log.debug("standard", { parentID: input.parentID })
     const userMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User
     const agent = await Agent.get("compaction")
     const model = agent.model
@@ -192,6 +263,350 @@ export namespace SessionCompaction {
     return "continue"
   }
 
+  /**
+   * Collapse compaction: Extract oldest messages, distill with AI, insert summary at breakpoint.
+   * Messages before the breakpoint are filtered out by filterCompacted().
+   */
+  async function processCollapse(input: {
+    parentID: string
+    messages: MessageV2.WithParts[]
+    sessionID: string
+    abort: AbortSignal
+    auto: boolean
+  }): Promise<"continue" | "stop"> {
+    const config = await Config.get()
+    const extractRatio = config.compaction?.extractRatio ?? DEFAULTS.extractRatio
+    const recentRatio = config.compaction?.recentRatio ?? DEFAULTS.recentRatio
+
+    log.debug("collapse", {
+      messages: input.messages.length,
+      extractRatio,
+      recentRatio,
+    })
+
+    // Calculate token counts for messages
+    const messageTokens: number[] = []
+    let totalTokens = 0
+    for (const msg of input.messages) {
+      const estimate = estimateMessageTokens(msg)
+      messageTokens.push(estimate)
+      totalTokens += estimate
+    }
+
+    // Calculate extraction targets
+    const extractTarget = Math.floor(totalTokens * extractRatio)
+    const recentTarget = Math.floor(totalTokens * recentRatio)
+
+    // Find split points
+    let extractedTokens = 0
+    let extractSplitIndex = 0
+    for (let i = 0; i < input.messages.length; i++) {
+      if (extractedTokens >= extractTarget) break
+      extractedTokens += messageTokens[i]
+      extractSplitIndex = i + 1
+    }
+
+    let recentTokens = 0
+    let recentSplitIndex = input.messages.length
+    for (let i = input.messages.length - 1; i >= 0; i--) {
+      if (recentTokens >= recentTarget) break
+      recentTokens += messageTokens[i]
+      recentSplitIndex = i
+    }
+
+    // Ensure recent split doesn't overlap with extract
+    if (recentSplitIndex <= extractSplitIndex) {
+      recentSplitIndex = extractSplitIndex
+    }
+
+    const extractedMessages = input.messages.slice(0, extractSplitIndex)
+    const recentReferenceMessages = input.messages.slice(recentSplitIndex)
+
+    log.debug("collapse split", {
+      totalTokens,
+      extractTarget,
+      extractedTokens,
+      extractedMessages: extractedMessages.length,
+      recentTarget,
+      recentTokens,
+      recentMessages: recentReferenceMessages.length,
+    })
+
+    if (extractedMessages.length === 0) {
+      log.info("collapse skipped", { reason: "no messages to extract" })
+      return "continue"
+    }
+
+    // Convert extracted messages to markdown for distillation
+    const markdownContent = messagesToMarkdown(extractedMessages)
+    const recentContext = messagesToMarkdown(recentReferenceMessages)
+
+    // Get the original compaction user message (placeholder created by create())
+    const originalUserMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User
+
+    // Get the last extracted message to determine breakpoint position
+    const lastExtractedMessage = extractedMessages[extractedMessages.length - 1]
+    const lastExtractedId = lastExtractedMessage.info.id
+
+    // Extract timestamp from the last extracted message ID
+    // Use createLike to handle both 6-byte and 7-byte ID formats
+    const breakpointTimestamp = lastExtractedMessage.info.time.created + 1
+
+    log.debug("collapse positioning", {
+      lastExtractedId,
+      breakpointTimestamp,
+    })
+
+    // Create the compaction user message at the breakpoint position
+    const compactionUserId = Identifier.createLike(lastExtractedId, "message", false, 1)
+    const compactionUserMsg = await Session.updateMessage({
+      id: compactionUserId,
+      role: "user",
+      model: originalUserMessage.model,
+      sessionID: input.sessionID,
+      agent: originalUserMessage.agent,
+      time: {
+        created: breakpointTimestamp,
+      },
+    })
+    await Session.updatePart({
+      id: Identifier.createLike(lastExtractedId, "part", false, 1),
+      messageID: compactionUserMsg.id,
+      sessionID: input.sessionID,
+      type: "compaction",
+      auto: input.auto,
+    })
+
+    const agent = await Agent.get("compaction")
+    const model = agent.model
+      ? await Provider.getModel(agent.model.providerID, agent.model.modelID)
+      : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID)
+
+    // Create assistant summary message positioned right after the compaction user message
+    // Use compactionUserId as reference (not lastExtractedId) to ensure assistant sorts immediately after user
+    // This prevents other messages from being created with IDs that sort between user and assistant
+    const compactionAssistantId = Identifier.createLike(compactionUserId, "message", false, 1)
+    const msg = (await Session.updateMessage({
+      id: compactionAssistantId,
+      role: "assistant",
+      parentID: compactionUserMsg.id,
+      sessionID: input.sessionID,
+      mode: "compaction",
+      agent: "compaction",
+      summary: true,
+      path: {
+        cwd: Instance.directory,
+        root: Instance.worktree,
+      },
+      cost: 0,
+      tokens: {
+        output: 0,
+        input: 0,
+        reasoning: 0,
+        cache: { read: 0, write: 0 },
+      },
+      modelID: model.id,
+      providerID: model.providerID,
+      time: {
+        created: breakpointTimestamp + 1,
+      },
+    })) as MessageV2.Assistant
+
+    const processor = SessionProcessor.create({
+      assistantMessage: msg,
+      sessionID: input.sessionID,
+      model,
+      abort: input.abort,
+    })
+
+    // Allow plugins to inject context
+    const compacting = await Plugin.trigger(
+      "experimental.session.compacting",
+      { sessionID: input.sessionID },
+      { context: [], prompt: undefined },
+    )
+
+    const collapsePrompt = `You are creating a comprehensive context restoration document from a collapsed conversation segment. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
+
+## Output Structure
+
+Create a detailed summary (target: 600-800 lines, approximately 8,000-12,000 tokens) with the following sections:
+
+### 1. Current Task State
+- What is actively being worked on
+- Immediate next steps
+- Any blockers or open questions
+
+### 2. Resolved Code & Lessons Learned
+For each complex or highly iterative area of focus, include:
+- The actual working code in markdown fences (this is critical - preserve it verbatim)
+- What approaches failed and why
+- What finally worked and why
+- Insights that would help if revisiting this area
+- Any edge cases or gotchas discovered
+
+Format example:
+\`\`\`typescript
+// Solution for X problem
+// Failed approaches: tried A (failed because...), tried B (failed because...)
+// Working solution: C works because...
+// Gotcha: watch out for Y when Z
+<actual working code here>
+\`\`\`
+
+### 3. User Directives
+Bullet points of explicit or implicit user preferences:
+- Things they want you to always do
+- Things they want you to never do
+- Coding style preferences
+- Communication preferences
+- Project-specific rules they've established
+
+### 4. Custom Utilities & Commands
+- Any custom scripts, commands, or workflows established
+- Special tool configurations or aliases
+- Debugging commands that proved useful
+- Project-specific shortcuts or patterns
+
+### 5. Design Decisions & Derived Requirements
+Requirements and decisions that emerged from the conversation but aren't documented elsewhere:
+- Architecture decisions made and their rationale
+- API contracts or interfaces agreed upon
+- Naming conventions established
+- File organization patterns
+- Integration patterns discovered
+
+### 6. Technical Facts
+- Key file paths and their purposes
+- Important function/class names and what they do
+- Configuration values that matter
+- Environment specifics
+- Dependencies or version constraints
+
+## Critical Rules
+
+- PRESERVE working code verbatim in fenced blocks - this is essential context that prevents re-solving solved problems
+- INCLUDE failed approaches with explanations - this prevents repeating the same mistakes
+- Be specific: exact paths, line numbers, function names, config values
+- Capture the "why" behind decisions, not just the "what"
+- If something was hard-won through iteration, document the full journey
+- User directives are sacred - never omit explicit user preferences
+- This document should allow work to continue seamlessly as if the conversation never broke
+
+## Extracted Context (to distill)
+
+${markdownContent}
+
+## Recent Context (for reference - shows current state)
+
+${recentContext}
+
+${compacting.context.length > 0 ? "\n## Additional Context\n\n" + compacting.context.join("\n\n") : ""}
+
+Generate the context restoration document now:`
+
+    const result = await processor.process({
+      user: originalUserMessage,
+      agent,
+      abort: input.abort,
+      sessionID: input.sessionID,
+      tools: {},
+      system: [],
+      messages: [
+        {
+          role: "user",
+          content: [{ type: "text", text: collapsePrompt }],
+        },
+      ],
+      model,
+    })
+
+    // NOTE: We intentionally do NOT add a "Continue if you have next steps" message
+    // for collapse mode. The collapse summary is just context restoration - the loop
+    // should exit after the summary is generated so the user can continue naturally.
+
+    if (processor.message.error) return "stop"
+
+    // Update token count on the chronologically last assistant message
+    // so isOverflow() sees the correct post-collapse state.
+    const allMessages = await Session.messages({ sessionID: input.sessionID })
+    const lastAssistant = allMessages
+      .filter(
+        (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } =>
+          m.info.role === "assistant" && m.info.id !== msg.id,
+      )
+      .sort((a, b) => b.info.time.created - a.info.time.created)[0]
+
+    if (lastAssistant) {
+      const originalTokens = { ...lastAssistant.info.tokens }
+      const collapseSummaryTokens = processor.message.tokens.output
+
+      const currentTotal =
+        lastAssistant.info.tokens.input +
+        lastAssistant.info.tokens.cache.read +
+        lastAssistant.info.tokens.cache.write +
+        lastAssistant.info.tokens.output
+
+      const newTotal = Math.max(0, currentTotal - extractedTokens + collapseSummaryTokens)
+
+      lastAssistant.info.tokens = {
+        input: 0,
+        output: lastAssistant.info.tokens.output,
+        reasoning: lastAssistant.info.tokens.reasoning,
+        cache: {
+          read: Math.max(0, newTotal - lastAssistant.info.tokens.output),
+          write: 0,
+        },
+      }
+      await Session.updateMessage(lastAssistant.info)
+
+      log.debug("tokens adjusted", {
+        extracted: extractedTokens,
+        summary: collapseSummaryTokens,
+        total: newTotal,
+      })
+    }
+
+    log.info("collapsed", {
+      messages: extractedMessages.length,
+      tokens: extractedTokens,
+    })
+
+    // Delete the original trigger message (created by create()) to prevent
+    // the loop from picking it up again as a pending compaction task.
+    // The trigger is the message at input.parentID - we've created a new
+    // compaction user message at the breakpoint position.
+    if (input.parentID !== compactionUserMsg.id) {
+      log.debug("cleanup trigger", { id: input.parentID })
+      // Delete parts first
+      const triggerMsg = input.messages.find((m) => m.info.id === input.parentID)
+      if (triggerMsg) {
+        for (const part of triggerMsg.parts) {
+          await Session.removePart({
+            sessionID: input.sessionID,
+            messageID: input.parentID,
+            partID: part.id,
+          })
+        }
+      }
+      await Session.removeMessage({
+        sessionID: input.sessionID,
+        messageID: input.parentID,
+      })
+    }
+
+    Bus.publish(Event.Compacted, { sessionID: input.sessionID })
+
+    // For auto-compaction: return "continue" so the loop processes the user's
+    // original message that triggered the overflow. The trigger message is deleted,
+    // so the loop will find the real user message and respond to it.
+    // For manual compaction: return "stop" - user explicitly requested compaction only.
+    if (input.auto) {
+      return "continue"
+    }
+    return "stop"
+  }
+
   export const create = fn(
     z.object({
       sessionID: Identifier.schema("session"),
@@ -222,4 +637,61 @@ export namespace SessionCompaction {
       })
     },
   )
+
+  /**
+   * Estimate tokens for a message (respects compaction state)
+   */
+  function estimateMessageTokens(msg: MessageV2.WithParts): number {
+    let tokens = 0
+    for (const part of msg.parts) {
+      if (part.type === "text") {
+        tokens += Token.estimate(part.text)
+      } else if (part.type === "tool" && part.state.status === "completed") {
+        // Skip compacted tool outputs
+        if (part.state.time.compacted) continue
+        tokens += Token.estimate(JSON.stringify(part.state.input))
+        tokens += Token.estimate(part.state.output)
+      }
+    }
+    return tokens
+  }
+
+  /**
+   * Convert messages to markdown format for distillation
+   */
+  function messagesToMarkdown(messages: MessageV2.WithParts[]): string {
+    const lines: string[] = []
+
+    for (const msg of messages) {
+      const role = msg.info.role === "user" ? "User" : "Assistant"
+      lines.push(`### ${role}`)
+      lines.push("")
+
+      for (const part of msg.parts) {
+        if (part.type === "text" && part.text) {
+          // Skip synthetic parts like "Continue if you have next steps"
+          if (part.synthetic) continue
+          lines.push(part.text)
+          lines.push("")
+        } else if (part.type === "tool" && part.state.status === "completed") {
+          // Skip compacted tool outputs
+          if (part.state.time.compacted) continue
+          lines.push(`**Tool: ${part.tool}**`)
+          lines.push("```json")
+          lines.push(JSON.stringify(part.state.input, null, 2))
+          lines.push("```")
+          if (part.state.output) {
+            lines.push("Output:")
+            lines.push("```")
+            lines.push(part.state.output.slice(0, 1000))
+            if (part.state.output.length > 1000) lines.push("... (truncated)")
+            lines.push("```")
+          }
+          lines.push("")
+        }
+      }
+    }
+
+    return lines.join("\n")
+  }
 }
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
index bb78ae64ce6e..47eeb3a649f3 100644
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@@ -11,8 +11,11 @@ import { ProviderTransform } from "@/provider/transform"
 import { STATUS_CODES } from "http"
 import { iife } from "@/util/iife"
 import { type SystemError } from "bun"
+import { Log } from "../util/log"
 
 export namespace MessageV2 {
+  const log = Log.create({ service: "message-v2" })
+
   export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({}))
   export const AbortedError = NamedError.create("MessageAbortedError", z.object({ message: z.string() }))
   export const AuthError = NamedError.create(
@@ -577,17 +580,28 @@ export namespace MessageV2 {
   export async function filterCompacted(stream: AsyncIterable<MessageV2.WithParts>) {
     const result = [] as MessageV2.WithParts[]
     const completed = new Set<string>()
+
     for await (const msg of stream) {
+      const hasCompactionPart = msg.parts.some((part) => part.type === "compaction")
+      const isAssistantSummary =
+        msg.info.role === "assistant" && (msg.info as Assistant).summary && (msg.info as Assistant).finish
+
       result.push(msg)
-      if (
-        msg.info.role === "user" &&
-        completed.has(msg.info.id) &&
-        msg.parts.some((part) => part.type === "compaction")
-      )
+
+      // Check if this is a compaction breakpoint
+      if (msg.info.role === "user" && completed.has(msg.info.id) && hasCompactionPart) {
+        log.debug("breakpoint", { id: msg.info.id })
         break
-      if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish) completed.add(msg.info.parentID)
+      }
+
+      // If assistant with summary=true and finish, add parentID to completed set
+      if (isAssistantSummary) {
+        completed.add((msg.info as Assistant).parentID)
+      }
     }
+
     result.reverse()
+    log.debug("filtered", { count: result.length })
     return result
   }
 
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index 6bf71ef36534..f5a6c1fac7cf 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -244,6 +244,7 @@ export namespace SessionPrompt {
       SessionStatus.set(sessionID, { type: "busy" })
       log.info("loop", { step, sessionID })
       if (abort.aborted) break
+
       let msgs = await MessageV2.filterCompacted(MessageV2.stream(sessionID))
 
       let lastUser: MessageV2.User | undefined
@@ -263,6 +264,12 @@ export namespace SessionPrompt {
         }
       }
 
+      log.debug("state", {
+        lastUser: lastUser?.id,
+        lastFinished: lastFinished?.id,
+        tasks: tasks.length,
+      })
+
       if (!lastUser) throw new Error("No user message found in stream. This should never happen.")
       if (
         lastAssistant?.finish &&
@@ -445,6 +452,7 @@ export namespace SessionPrompt {
 
       // pending compaction
       if (task?.type === "compaction") {
+        log.debug("compaction task", { auto: task.auto })
         const result = await SessionCompaction.process({
           messages: msgs,
           parentID: lastUser.id,
@@ -462,6 +470,7 @@ export namespace SessionPrompt {
         lastFinished.summary !== true &&
         (await SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model }))
       ) {
+        log.info("overflow", { tokens: lastFinished.tokens })
         await SessionCompaction.create({
           sessionID,
           agent: lastUser.agent,
@@ -471,7 +480,6 @@ export namespace SessionPrompt {
         continue
       }
 
-      // normal processing
       const agent = await Agent.get(lastUser.agent)
       const maxSteps = agent.maxSteps ?? Infinity
       const isLastStep = step >= maxSteps
@@ -528,6 +536,16 @@ export namespace SessionPrompt {
 
       await Plugin.trigger("experimental.chat.messages.transform", {}, { messages: sessionMessages })
 
+      // Debug: log messages being sent to LLM
+      log.debug("llm messages", {
+        count: sessionMessages.length,
+        messageIds: sessionMessages.map((m) => m.info.id),
+        firstMessageId: sessionMessages[0]?.info.id,
+        hasCompactionSummary: sessionMessages.some(
+          (m) => m.info.role === "assistant" && (m.info as any).summary === true,
+        ),
+      })
+
       const result = await processor.process({
         user: lastUser,
         agent,
@@ -552,12 +570,31 @@ export namespace SessionPrompt {
       continue
     }
     SessionCompaction.prune({ sessionID })
+
+    // Check if there are queued requests - their user messages are already created
+    // and need processing. We need to grab them before defer() runs cancel().
+    const queued = state()[sessionID]?.callbacks ?? []
+    if (queued.length > 0) {
+      // Clear callbacks so cancel() doesn't reject them
+      state()[sessionID].callbacks = []
+      // Schedule re-entry after this function exits (and defer runs cancel)
+      // Use setImmediate to let defer() clear state first, then re-enter loop
+      setImmediate(async () => {
+        const result = await loop(sessionID)
+        for (const q of queued) {
+          q.resolve(result)
+        }
+      })
+      // Return last assistant for now - queued requests will get their real response
+      for await (const item of MessageV2.stream(sessionID)) {
+        if (item.info.role === "user") continue
+        return item
+      }
+    }
+
+    // No queued requests - return last assistant as before
     for await (const item of MessageV2.stream(sessionID)) {
       if (item.info.role === "user") continue
-      const queued = state()[sessionID]?.callbacks ?? []
-      for (const q of queued) {
-        q.resolve(item)
-      }
       return item
     }
     throw new Error("Impossible")
diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts
index 8b3bece004f6..e2af8443f6ad 100644
--- a/packages/sdk/js/src/v2/gen/types.gen.ts
+++ b/packages/sdk/js/src/v2/gen/types.gen.ts
@@ -1616,6 +1616,22 @@ export type Config = {
      * Enable pruning of old tool outputs (default: true)
      */
     prune?: boolean
+    /**
+     * Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)
+     */
+    method?: "standard" | "collapse"
+    /**
+     * Trigger compaction at this fraction of total context (default: 0.85 = 85%)
+     */
+    trigger?: number
+    /**
+     * For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)
+     */
+    extractRatio?: number
+    /**
+     * For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)
+     */
+    recentRatio?: number
   }
   experimental?: {
     hook?: {
diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json
index 4924a5bfac07..c693a643202a 100644
--- a/packages/sdk/openapi.json
+++ b/packages/sdk/openapi.json
@@ -8684,6 +8684,29 @@
               "prune": {
                 "description": "Enable pruning of old tool outputs (default: true)",
                 "type": "boolean"
+              },
+              "method": {
+                "description": "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)",
+                "type": "string",
+                "enum": ["standard", "collapse"]
+              },
+              "trigger": {
+                "description": "Trigger compaction at this fraction of total context (default: 0.85 = 85%)",
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+              },
+              "extractRatio": {
+                "description": "For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)",
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+              },
+              "recentRatio": {
+                "description": "For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)",
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
               }
             }
           },

From 07993c0c9ec1a0c0343c65d93b35268e6ac7fc18 Mon Sep 17 00:00:00 2001
From: ryanwyler <ryan@bridgetone.com>
Date: Tue, 30 Dec 2025 08:56:49 -0700
Subject: [PATCH 2/4] feat: include previous summaries in collapse compaction

Adds support for merging historical context from previous compaction summaries
into new collapse summaries. This prevents loss of important information across
multiple compaction cycles.

New config options:
- summaryMaxTokens: target token count for summary output (default: 10000)
- previousSummaries: number of previous summaries to include (default: 3)

Changes:
- Add getPreviousSummaries() to fetch historical summaries from unfiltered messages
- Add extractSummaryText() to extract text content from summary parts
- Update collapse prompt to include previous summaries section
- Add merge instructions to preserve historical context
---
 packages/opencode/src/config/config.ts      | 12 +++++
 packages/opencode/src/session/compaction.ts | 59 ++++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts
index f8d9fed43b47..60ebb3c624b7 100644
--- a/packages/opencode/src/config/config.ts
+++ b/packages/opencode/src/config/config.ts
@@ -831,6 +831,18 @@ export namespace Config {
             .max(1)
             .optional()
             .describe("For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)"),
+          summaryMaxTokens: z
+            .number()
+            .min(1000)
+            .max(50000)
+            .optional()
+            .describe("For collapse mode: target token count for the summary output (default: 10000)"),
+          previousSummaries: z
+            .number()
+            .min(0)
+            .max(10)
+            .optional()
+            .describe("For collapse mode: number of previous summaries to include for context merging (default: 3)"),
         })
         .optional(),
       experimental: z
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 12dea48f738c..683b899a74e1 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -35,6 +35,8 @@ export namespace SessionCompaction {
     trigger: 0.85, // Trigger at 85% of usable context to leave headroom
     extractRatio: 0.65,
     recentRatio: 0.15,
+    summaryMaxTokens: 10000, // Target token count for collapse summary
+    previousSummaries: 3, // Number of previous summaries to include in collapse
   }
 
   /**
@@ -277,11 +279,18 @@ export namespace SessionCompaction {
     const config = await Config.get()
     const extractRatio = config.compaction?.extractRatio ?? DEFAULTS.extractRatio
     const recentRatio = config.compaction?.recentRatio ?? DEFAULTS.recentRatio
+    const summaryMaxTokens = config.compaction?.summaryMaxTokens ?? DEFAULTS.summaryMaxTokens
+    const previousSummariesLimit = config.compaction?.previousSummaries ?? DEFAULTS.previousSummaries
+
+    // Fetch previous summaries from this session (unfiltered, to get all historical summaries)
+    const previousSummaries = await getPreviousSummaries(input.sessionID, previousSummariesLimit)
 
     log.debug("collapse", {
       messages: input.messages.length,
       extractRatio,
       recentRatio,
+      summaryMaxTokens,
+      previousSummaries: previousSummaries.length,
     })
 
     // Calculate token counts for messages
@@ -426,11 +435,27 @@ export namespace SessionCompaction {
       { context: [], prompt: undefined },
     )
 
+    // Build previous summaries section if available
+    const previousSummariesSection =
+      previousSummaries.length > 0
+        ? `## Previous Session Summaries
+
+The following summaries contain critical context from earlier compactions in this session.
+You MUST merge and consolidate all relevant information from these summaries into your new summary.
+Do not lose any important details - treat previous summaries as authoritative historical record.
+
+${previousSummaries.map((summary, i) => `### Previous Summary ${i + 1}\n\n${summary}`).join("\n\n---\n\n")}
+
+---
+
+`
+        : ""
+
     const collapsePrompt = `You are creating a comprehensive context restoration document from a collapsed conversation segment. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
 
 ## Output Structure
 
-Create a detailed summary (target: 600-800 lines, approximately 8,000-12,000 tokens) with the following sections:
+Create a detailed summary (target: approximately ${summaryMaxTokens} tokens) with the following sections:
 
 ### 1. Current Task State
 - What is actively being worked on
@@ -492,8 +517,9 @@ Requirements and decisions that emerged from the conversation but aren't documen
 - If something was hard-won through iteration, document the full journey
 - User directives are sacred - never omit explicit user preferences
 - This document should allow work to continue seamlessly as if the conversation never broke
+${previousSummaries.length > 0 ? "- MERGE all information from previous summaries - do not lose historical context\n- Consolidate duplicate information but preserve all unique details" : ""}
 
-## Extracted Context (to distill)
+${previousSummariesSection}## Extracted Context (to distill)
 
 ${markdownContent}
 
@@ -694,4 +720,33 @@ Generate the context restoration document now:`
 
     return lines.join("\n")
   }
+
+  /**
+   * Extract summary text from a compaction summary message's parts
+   */
+  function extractSummaryText(msg: MessageV2.WithParts): string {
+    return msg.parts
+      .filter((p): p is MessageV2.TextPart => p.type === "text" && !p.synthetic)
+      .map((p) => p.text)
+      .join("\n")
+  }
+
+  /**
+   * Fetch previous compaction summaries from the session (unfiltered)
+   */
+  async function getPreviousSummaries(sessionID: string, limit: number): Promise<string[]> {
+    const allMessages = await Session.messages({ sessionID })
+
+    return allMessages
+      .filter(
+        (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } =>
+          m.info.role === "assistant" &&
+          (m.info as MessageV2.Assistant).summary === true &&
+          (m.info as MessageV2.Assistant).finish !== undefined,
+      )
+      .sort((a, b) => a.info.time.created - b.info.time.created) // oldest first
+      .slice(-limit) // take the N most recent
+      .map((m) => extractSummaryText(m))
+      .filter((text) => text.trim().length > 0)
+  }
 }

From cd69c84e30a9ff57f693b200887bd4b724f28331 Mon Sep 17 00:00:00 2001
From: ryanwyler <ryan@bridgetone.com>
Date: Tue, 30 Dec 2025 07:18:59 -0700
Subject: [PATCH 3/4] fix: resolve broken forked sessions with compactions due
 to missing parent-child message references

When forking a session, message IDs are regenerated but parentID references in assistant messages were not updated to point to the new IDs. This broke features that rely on parent-child relationships, such as compaction breakpoints.

## Problem

Forking a compacted session would fail with:
```
prompt is too long: 203573 tokens > 200000 maximum
```

The forked session didn't honor the compaction breakpoint because the parent-child message relationships were broken.

## Fix

Added an ID mapping that tracks old ID -> new ID, then updates parentID references when cloning assistant messages.

## Changes

- packages/opencode/src/session/index.ts - Add ID mapping in fork function to preserve parent-child relationships
---
 packages/opencode/src/session/index.ts | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts
index 4285223bc5c9..c64d569235d1 100644
--- a/packages/opencode/src/session/index.ts
+++ b/packages/opencode/src/session/index.ts
@@ -147,12 +147,19 @@ export namespace Session {
         directory: Instance.directory,
       })
       const msgs = await messages({ sessionID: input.sessionID })
+      const idMap = new Map<string, string>()
+
       for (const msg of msgs) {
         if (input.messageID && msg.info.id >= input.messageID) break
+        const newID = Identifier.ascending("message")
+        idMap.set(msg.info.id, newID)
+
+        const parentID = msg.info.role === "assistant" && msg.info.parentID ? idMap.get(msg.info.parentID) : undefined
         const cloned = await updateMessage({
           ...msg.info,
           sessionID: session.id,
-          id: Identifier.ascending("message"),
+          id: newID,
+          ...(parentID && { parentID }),
         })
 
         for (const part of msg.parts) {

From 61694c46e040bae922eaedbb59036a621d8e711a Mon Sep 17 00:00:00 2001
From: ryanwyler <ryan@bridgetone.com>
Date: Tue, 30 Dec 2025 09:57:59 -0700
Subject: [PATCH 4/4] feat: add token budget for previous summaries and improve
 prompt structure

- Calculate available token budget before fetching previous summaries
- Use XML-style delimiters for clearer prompt structure
- Add debug logging with [BUDGET_CALC] tag for troubleshooting
- Prevents context overflow when including previous summaries
---
 packages/opencode/src/session/compaction.ts | 223 ++++++++++----------
 1 file changed, 113 insertions(+), 110 deletions(-)

diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 683b899a74e1..886ebbb161a9 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -39,6 +39,33 @@ export namespace SessionCompaction {
     previousSummaries: 3, // Number of previous summaries to include in collapse
   }
 
+  // Static portion of collapse prompt template for token estimation
+  const COLLAPSE_PROMPT_TEMPLATE = `You are creating a comprehensive context restoration document from a collapsed conversation segment. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
+
+## Output Structure
+
+Create a detailed summary with the following sections:
+
+### 1. Current Task State
+### 2. Resolved Code & Lessons Learned
+### 3. User Directives
+### 4. Custom Utilities & Commands
+### 5. Design Decisions & Derived Requirements
+### 6. Technical Facts
+
+## Critical Rules
+
+- PRESERVE working code verbatim in fenced blocks
+- INCLUDE failed approaches with explanations
+- Be specific: exact paths, line numbers, function names, config values
+- Capture the "why" behind decisions, not just the "what"
+- User directives are sacred - never omit explicit user preferences
+
+## Extracted Context (to distill)
+## Recent Context (for reference)
+
+Generate the context restoration document now:`
+
   /**
    * Get the compaction method.
    * Priority: TUI toggle (kv.json) > config file > default
@@ -282,18 +309,14 @@ export namespace SessionCompaction {
     const summaryMaxTokens = config.compaction?.summaryMaxTokens ?? DEFAULTS.summaryMaxTokens
     const previousSummariesLimit = config.compaction?.previousSummaries ?? DEFAULTS.previousSummaries
 
-    // Fetch previous summaries from this session (unfiltered, to get all historical summaries)
-    const previousSummaries = await getPreviousSummaries(input.sessionID, previousSummariesLimit)
-
-    log.debug("collapse", {
-      messages: input.messages.length,
-      extractRatio,
-      recentRatio,
-      summaryMaxTokens,
-      previousSummaries: previousSummaries.length,
-    })
+    // Get the user message to determine which model we'll use
+    const originalUserMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User
+    const agent = await Agent.get("compaction")
+    const model = agent.model
+      ? await Provider.getModel(agent.model.providerID, agent.model.modelID)
+      : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID)
 
-    // Calculate token counts for messages
+    // Calculate token counts for messages first
     const messageTokens: number[] = []
     let totalTokens = 0
     for (const msg of input.messages) {
@@ -350,8 +373,17 @@ export namespace SessionCompaction {
     const markdownContent = messagesToMarkdown(extractedMessages)
     const recentContext = messagesToMarkdown(recentReferenceMessages)
 
-    // Get the original compaction user message (placeholder created by create())
-    const originalUserMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User
+    // Build base prompt (without previous summaries) to calculate token budget
+    const markdownTokens = Token.estimate(markdownContent)
+    const recentTokensEstimate = Token.estimate(recentContext)
+    const templateTokens = Token.estimate(COLLAPSE_PROMPT_TEMPLATE)
+    const basePromptTokens = markdownTokens + recentTokensEstimate + templateTokens
+    const contextLimit = model.limit.context
+    const outputReserve = SessionPrompt.OUTPUT_TOKEN_MAX
+    const previousSummaryBudget = Math.max(0, contextLimit - outputReserve - basePromptTokens)
+
+    // Fetch previous summaries that fit within budget
+    const previousSummaries = await getPreviousSummaries(input.sessionID, previousSummariesLimit, previousSummaryBudget)
 
     // Get the last extracted message to determine breakpoint position
     const lastExtractedMessage = extractedMessages[extractedMessages.length - 1]
@@ -386,11 +418,6 @@ export namespace SessionCompaction {
       auto: input.auto,
     })
 
-    const agent = await Agent.get("compaction")
-    const model = agent.model
-      ? await Provider.getModel(agent.model.providerID, agent.model.modelID)
-      : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID)
-
     // Create assistant summary message positioned right after the compaction user message
     // Use compactionUserId as reference (not lastExtractedId) to ensure assistant sorts immediately after user
     // This prevents other messages from being created with IDs that sort between user and assistant
@@ -435,101 +462,60 @@ export namespace SessionCompaction {
       { context: [], prompt: undefined },
     )
 
-    // Build previous summaries section if available
-    const previousSummariesSection =
-      previousSummaries.length > 0
-        ? `## Previous Session Summaries
-
-The following summaries contain critical context from earlier compactions in this session.
-You MUST merge and consolidate all relevant information from these summaries into your new summary.
-Do not lose any important details - treat previous summaries as authoritative historical record.
-
-${previousSummaries.map((summary, i) => `### Previous Summary ${i + 1}\n\n${summary}`).join("\n\n---\n\n")}
-
----
-
-`
-        : ""
-
-    const collapsePrompt = `You are creating a comprehensive context restoration document from a collapsed conversation segment. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
-
-## Output Structure
-
-Create a detailed summary (target: approximately ${summaryMaxTokens} tokens) with the following sections:
-
-### 1. Current Task State
-- What is actively being worked on
-- Immediate next steps
-- Any blockers or open questions
-
-### 2. Resolved Code & Lessons Learned
-For each complex or highly iterative area of focus, include:
-- The actual working code in markdown fences (this is critical - preserve it verbatim)
-- What approaches failed and why
-- What finally worked and why
-- Insights that would help if revisiting this area
-- Any edge cases or gotchas discovered
-
-Format example:
-\`\`\`typescript
-// Solution for X problem
-// Failed approaches: tried A (failed because...), tried B (failed because...)
-// Working solution: C works because...
-// Gotcha: watch out for Y when Z
-<actual working code here>
-\`\`\`
-
-### 3. User Directives
-Bullet points of explicit or implicit user preferences:
-- Things they want you to always do
-- Things they want you to never do
-- Coding style preferences
-- Communication preferences
-- Project-specific rules they've established
-
-### 4. Custom Utilities & Commands
-- Any custom scripts, commands, or workflows established
-- Special tool configurations or aliases
-- Debugging commands that proved useful
-- Project-specific shortcuts or patterns
-
-### 5. Design Decisions & Derived Requirements
-Requirements and decisions that emerged from the conversation but aren't documented elsewhere:
-- Architecture decisions made and their rationale
-- API contracts or interfaces agreed upon
-- Naming conventions established
-- File organization patterns
-- Integration patterns discovered
-
-### 6. Technical Facts
-- Key file paths and their purposes
-- Important function/class names and what they do
-- Configuration values that matter
-- Environment specifics
-- Dependencies or version constraints
-
-## Critical Rules
-
-- PRESERVE working code verbatim in fenced blocks - this is essential context that prevents re-solving solved problems
-- INCLUDE failed approaches with explanations - this prevents repeating the same mistakes
-- Be specific: exact paths, line numbers, function names, config values
-- Capture the "why" behind decisions, not just the "what"
-- If something was hard-won through iteration, document the full journey
-- User directives are sacred - never omit explicit user preferences
-- This document should allow work to continue seamlessly as if the conversation never broke
-${previousSummaries.length > 0 ? "- MERGE all information from previous summaries - do not lose historical context\n- Consolidate duplicate information but preserve all unique details" : ""}
+    // Build prompt sections - only include what we have
+    const sections: string[] = []
+
+    // Instructions
+    sections.push(`You are creating a comprehensive context restoration document. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
+
+Create a detailed summary (target: approximately ${summaryMaxTokens} tokens) with these sections:
+1. Current Task State - what is being worked on, next steps, blockers
+2. Resolved Code & Lessons Learned - working code verbatim, failed approaches, insights
+3. User Directives - explicit preferences, style rules, things to always/never do
+4. Custom Utilities & Commands - scripts, aliases, debugging commands
+5. Design Decisions & Derived Requirements - architecture decisions, API contracts, patterns
+6. Technical Facts - file paths, function names, config values, environment details
+
+Critical rules:
+- PRESERVE working code verbatim in fenced blocks
+- INCLUDE failed approaches with explanations
+- Be specific with paths, line numbers, function names
+- Capture the "why" behind decisions
+- User directives are sacred - never omit them`)
+
+    // Previous summaries
+    if (previousSummaries.length > 0) {
+      sections.push(`<previous_summaries>
+IMPORTANT: Merge all information from these previous summaries into your new summary. Do not lose any historical context.
+
+${previousSummaries.map((summary, i) => `--- Summary ${i + 1} ---\n${summary}`).join("\n\n")}
+</previous_summaries>`)
+    }
 
-${previousSummariesSection}## Extracted Context (to distill)
+    // Extracted content
+    sections.push(`<extracted_context>
+The following conversation content needs to be distilled into the summary:
 
 ${markdownContent}
+</extracted_context>`)
 
-## Recent Context (for reference - shows current state)
+    // Recent context
+    sections.push(`<recent_context>
+The following is recent context for reference (shows current state):
 
 ${recentContext}
+</recent_context>`)
 
-${compacting.context.length > 0 ? "\n## Additional Context\n\n" + compacting.context.join("\n\n") : ""}
+    // Additional plugin context
+    if (compacting.context.length > 0) {
+      sections.push(`<additional_context>
+${compacting.context.join("\n\n")}
+</additional_context>`)
+    }
 
-Generate the context restoration document now:`
+    sections.push("Generate the context restoration document now.")
+
+    const collapsePrompt = sections.join("\n\n")
 
     const result = await processor.process({
       user: originalUserMessage,
@@ -732,12 +718,13 @@ Generate the context restoration document now:`
   }
 
   /**
-   * Fetch previous compaction summaries from the session (unfiltered)
+   * Fetch previous compaction summaries from the session (unfiltered).
+   * Respects token budget to avoid overflowing context window.
    */
-  async function getPreviousSummaries(sessionID: string, limit: number): Promise<string[]> {
+  async function getPreviousSummaries(sessionID: string, limit: number, tokenBudget: number): Promise<string[]> {
     const allMessages = await Session.messages({ sessionID })
 
-    return allMessages
+    const summaryMessages = allMessages
       .filter(
         (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } =>
           m.info.role === "assistant" &&
@@ -746,7 +733,23 @@ Generate the context restoration document now:`
       )
       .sort((a, b) => a.info.time.created - b.info.time.created) // oldest first
       .slice(-limit) // take the N most recent
-      .map((m) => extractSummaryText(m))
-      .filter((text) => text.trim().length > 0)
+
+    // Include summaries only if they fit within token budget
+    // Start from most recent (end of array) since those are most relevant
+    const result: string[] = []
+    let tokensUsed = 0
+
+    for (let i = summaryMessages.length - 1; i >= 0; i--) {
+      const text = extractSummaryText(summaryMessages[i])
+      if (!text.trim()) continue
+
+      const estimate = Token.estimate(text)
+      if (tokensUsed + estimate > tokenBudget) break
+
+      result.unshift(text) // prepend to maintain chronological order
+      tokensUsed += estimate
+    }
+
+    return result
   }
 }