From 5707d2242cd5e1c0292a8e55032bd690a4532c9a Mon Sep 17 00:00:00 2001
From: Karthik Vinayan <karthikdoestech@gmail.com>
Date: Thu, 1 Jan 2026 11:23:01 +0530
Subject: [PATCH] fix(session): prevent context overflow by adding safety
 margin to compaction check

- Add 50K overhead buffer to account for system prompts and tool definitions
- Add mid-turn overflow detection in finish-step to catch runaway tool loops
- Add 30K truncation for MCP tool outputs and webfetch results
- Handle compact signal to trigger compaction mid-turn

Fixes #6068
---
 packages/opencode/src/session/compaction.ts |  9 ++--
 packages/opencode/src/session/processor.ts  |  8 +++
 packages/opencode/src/session/prompt.ts     | 17 ++++++-
 packages/opencode/src/tool/webfetch.ts      | 54 ++++-----------------
 4 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 42bab2eb9751..6a4c9b9e1431 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -27,6 +27,10 @@ export namespace SessionCompaction {
     ),
   }
 
+  export const PRUNE_MINIMUM = 20_000
+  export const PRUNE_PROTECT = 40_000
+  export const OVERHEAD_BUFFER = 50_000
+
   export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
     const config = await Config.get()
     if (config.compaction?.auto === false) return false
@@ -34,13 +38,10 @@ export namespace SessionCompaction {
     if (context === 0) return false
     const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
     const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
-    const usable = context - output
+    const usable = context - output - OVERHEAD_BUFFER
     return count > usable
   }
 
-  export const PRUNE_MINIMUM = 20_000
-  export const PRUNE_PROTECT = 40_000
-
   const PRUNE_PROTECTED_TOOLS = ["skill"]
 
   // goes backwards through parts until there are 40_000 tokens worth of tool
diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
index 78871630c65b..567b9647934d 100644
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@@ -13,6 +13,7 @@ import { Plugin } from "@/plugin"
 import type { Provider } from "@/provider/provider"
 import { LLM } from "./llm"
 import { Config } from "@/config/config"
+import { SessionCompaction } from "./compaction"
 
 export namespace SessionProcessor {
   const DOOM_LOOP_THRESHOLD = 3
@@ -31,6 +32,7 @@ export namespace SessionProcessor {
     let snapshot: string | undefined
     let blocked = false
     let attempt = 0
+    let needsCompaction = false
 
     const result = {
       get message() {
@@ -41,6 +43,7 @@ export namespace SessionProcessor {
       },
       async process(streamInput: LLM.StreamInput) {
         log.info("process")
+        needsCompaction = false
         const shouldBreak = (await Config.get()).experimental?.continue_loop_on_deny !== true
         while (true) {
           try {
@@ -279,6 +282,9 @@ export namespace SessionProcessor {
                     sessionID: input.sessionID,
                     messageID: input.assistantMessage.parentID,
                   })
+                  if (await SessionCompaction.isOverflow({ tokens: usage.tokens, model: input.model })) {
+                    needsCompaction = true
+                  }
                   break
 
                 case "text-start":
@@ -339,6 +345,7 @@ export namespace SessionProcessor {
                   })
                   continue
               }
+              if (needsCompaction) break
             }
           } catch (e: any) {
             log.error("process", {
@@ -398,6 +405,7 @@ export namespace SessionProcessor {
           }
           input.assistantMessage.time.completed = Date.now()
           await Session.updateMessage(input.assistantMessage)
+          if (needsCompaction) return "compact"
           if (blocked) return "stop"
           if (input.assistantMessage.error) return "stop"
           return "continue"
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index 595fc746e7f7..8c8861324ff2 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -549,6 +549,14 @@ export namespace SessionPrompt {
         model,
       })
       if (result === "stop") break
+      if (result === "compact") {
+        await SessionCompaction.create({
+          sessionID,
+          agent: lastUser.agent,
+          model: lastUser.model,
+          auto: true,
+        })
+      }
       continue
     }
     SessionCompaction.prune({ sessionID })
@@ -696,12 +704,17 @@ export namespace SessionPrompt {
           // Add support for other types if needed
         }
 
+        const text = textParts.join("\n\n")
+        const output = text.length > 30_000
+          ? text.slice(0, 30_000) + `\n\n[MCP output truncated: exceeded 30000 char limit]`
+          : text
+
         return {
           title: "",
           metadata: result.metadata ?? {},
-          output: textParts.join("\n\n"),
+          output,
           attachments,
-          content: result.content, // directly return content to preserve ordering when outputting to model
+          content: result.content,
         }
       }
       item.toModelOutput = (result) => {
diff --git a/packages/opencode/src/tool/webfetch.ts b/packages/opencode/src/tool/webfetch.ts
index cf1940bf8650..764bfa3ebf42 100644
--- a/packages/opencode/src/tool/webfetch.ts
+++ b/packages/opencode/src/tool/webfetch.ts
@@ -93,53 +93,19 @@ export const WebFetchTool = Tool.define("webfetch", {
     const contentType = response.headers.get("content-type") || ""
 
     const title = `${params.url} (${contentType})`
+    const isHtml = contentType.includes("text/html")
 
-    // Handle content based on requested format and actual content type
-    switch (params.format) {
-      case "markdown":
-        if (contentType.includes("text/html")) {
-          const markdown = convertHTMLToMarkdown(content)
-          return {
-            output: markdown,
-            title,
-            metadata: {},
-          }
-        }
-        return {
-          output: content,
-          title,
-          metadata: {},
-        }
+    const raw = params.format === "markdown" && isHtml
+      ? convertHTMLToMarkdown(content)
+      : params.format === "text" && isHtml
+        ? await extractTextFromHTML(content)
+        : content
 
-      case "text":
-        if (contentType.includes("text/html")) {
-          const text = await extractTextFromHTML(content)
-          return {
-            output: text,
-            title,
-            metadata: {},
-          }
-        }
-        return {
-          output: content,
-          title,
-          metadata: {},
-        }
+    const output = raw.length > 30_000
+      ? raw.slice(0, 30_000) + `\n\n[Output truncated: exceeded 30000 char limit]`
+      : raw
 
-      case "html":
-        return {
-          output: content,
-          title,
-          metadata: {},
-        }
-
-      default:
-        return {
-          output: content,
-          title,
-          metadata: {},
-        }
-    }
+    return { output, title, metadata: {} }
   },
 })