From af24910f9f1078975d887b63498b36e065766118 Mon Sep 17 00:00:00 2001
From: mi <mi@mideMacBook-Pro.local>
Date: Tue, 7 Apr 2026 10:11:19 +0800
Subject: [PATCH] fix(agent): auto-handoff when context length exceeds model
 limit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When tape history exceeds the model's context window, the LLM API
returns a 400 error. Since the model never gets called, it cannot
invoke tape.handoff to compress context — a deadlock.

Add automatic handoff recovery in _agent_loop: detect context-length
errors from the ToolAutoResult, perform tape.handoff to create an
anchor that truncates visible history, then retry with the original
prompt. Limited to 1 auto-retry to prevent infinite loops.
---
 src/bub/builtin/agent.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/src/bub/builtin/agent.py b/src/bub/builtin/agent.py
index 405cd458..9097ccd2 100644
--- a/src/bub/builtin/agent.py
+++ b/src/bub/builtin/agent.py
@@ -29,6 +29,11 @@
 
 CONTINUE_PROMPT = "Continue the task."
 HINT_RE = re.compile(r"\$([A-Za-z0-9_.-]+)")
+_CONTEXT_LENGTH_PATTERNS = re.compile(
+    r"context.{0,20}length|maximum.{0,20}context|token.{0,10}limit|prompt.{0,10}too long|tokens? > \d+ maximum",
+    re.IGNORECASE,
+)
+MAX_AUTO_HANDOFF_RETRIES = 1
 
 
 class Agent:
@@ -121,6 +126,7 @@ async def _agent_loop(
     ) -> str:
         next_prompt: str | list[dict] = prompt
         display_model = model or self.settings.model
+        auto_handoff_remaining = MAX_AUTO_HANDOFF_RETRIES
         await self.tapes.append_event(
             tape.name,
             "loop.start",
@@ -188,6 +194,35 @@ async def _agent_loop(
                     },
                 )
                 continue
+
+            # Check if this is a context-length error that can be recovered via auto-handoff
+            if auto_handoff_remaining > 0 and _is_context_length_error(outcome.error):
+                auto_handoff_remaining -= 1
+                logger.warning(
+                    "auto_handoff: context length exceeded, performing automatic handoff. tape={} step={}",
+                    tape.name,
+                    step,
+                )
+                await self.tapes.handoff(
+                    tape.name,
+                    name="auto_handoff/context_overflow",
+                    state={"reason": "context_length_exceeded", "error": outcome.error},
+                )
+                await self.tapes.append_event(
+                    tape.name,
+                    "loop.step",
+                    {
+                        "step": step,
+                        "elapsed_ms": elapsed_ms,
+                        "status": "auto_handoff",
+                        "error": outcome.error,
+                        "date": datetime.now(UTC).isoformat(),
+                    },
+                )
+                # Retry with original prompt — the handoff anchor will truncate history
+                next_prompt = prompt
+                continue
+
             await self.tapes.append_event(
                 tape.name,
                 "loop.step",
@@ -318,6 +353,11 @@ def _parse_args(args_tokens: list[str]) -> Args:
     return Args(positional=positional, kwargs=kwargs)
 
 
+def _is_context_length_error(error_msg: str) -> bool:
+    """Check whether an error message indicates a context-length / prompt-too-long failure."""
+    return bool(_CONTEXT_LENGTH_PATTERNS.search(error_msg))
+
+
 def _extract_text_from_parts(parts: list[dict]) -> str:
     """Extract text content from multimodal content parts."""
     return "\n".join(p.get("text", "") for p in parts if p.get("type") == "text")