docker · dgageot · Mar 24, 2026
@@ -128,6 +128,11 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
 		}
 		loopDetector := newToolLoopDetector(loopThreshold)
 
+		// Track whether we already attempted auto-compaction for a context
+		// overflow on this iteration. This prevents an infinite loop when
+		// compaction fails to reduce the context enough.
+		compactionAttempted := false
+
 		// toolModelOverride holds the per-toolset model from the most recent
 		// tool calls. It applies for one LLM turn, then resets.
 		var toolModelOverride string
@@ -280,7 +285,9 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
 				// Auto-recovery: if the error is a context overflow and
 				// session compaction is enabled, compact the conversation
 				// and retry the request instead of surfacing raw errors.
-				if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction {
+				// Guard against infinite loops: only attempt once per overflow.
+				if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction && !compactionAttempted {
+					compactionAttempted = true
 					slog.Warn("Context window overflow detected, attempting auto-compaction",
 						"agent", a.Name(),
 						"session_id", sess.ID,
@@ -313,6 +320,11 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
 				return
 			}
 
+			// Model call succeeded — reset the compaction guard so that
+			// a future overflow later in the same session can still
+			// trigger auto-compaction.
+			compactionAttempted = false
+
 			if usedModel != nil && usedModel.ID() != model.ID() {
 				slog.Info("Used fallback model", "agent", a.Name(), "primary", model.ID(), "used", usedModel.ID())
 				events <- AgentInfo(a.Name(), usedModel.ID(), a.Description(), a.WelcomeMessage())

@@ -16,6 +16,7 @@ import (
 	"github.com/docker/docker-agent/pkg/chat"
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/model/provider/base"
+	"github.com/docker/docker-agent/pkg/modelerrors"
 	"github.com/docker/docker-agent/pkg/modelsdev"
 	"github.com/docker/docker-agent/pkg/permissions"
 	"github.com/docker/docker-agent/pkg/session"
@@ -1825,3 +1826,58 @@ func TestProcessToolCalls_UsesPinnedAgent(t *testing.T) {
 		}
 	}
 }
+
+// overflowProvider always returns a ContextOverflowError.
+type overflowProvider struct {
+	mu    sync.Mutex
+	calls int
+}
+
+func (p *overflowProvider) ID() string { return "test/overflow-model" }
+
+func (p *overflowProvider) CreateChatCompletionStream(context.Context, []chat.Message, []tools.Tool) (chat.MessageStream, error) {
+	p.mu.Lock()
+	p.calls++
+	p.mu.Unlock()
+	return nil, modelerrors.NewContextOverflowError(errors.New("prompt is too long: 200000 tokens > 128000 maximum"))
+}
+
+func (p *overflowProvider) BaseConfig() base.Config { return base.Config{} }
+
+func (p *overflowProvider) MaxTokens() int { return 0 }
+
+func (p *overflowProvider) CallCount() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	return p.calls
+}
+
+func TestContextOverflow_NoInfiniteLoop(t *testing.T) {
+	// When the model always returns a ContextOverflowError and compaction
+	// is enabled, the runtime should attempt compaction once and then stop
+	// instead of looping forever.
+	prov := &overflowProvider{}
+	root := agent.New("root", "You are a test agent", agent.WithModel(prov))
+	tm := team.New(team.WithAgents(root))
+
+	rt, err := NewLocalRuntime(tm, WithSessionCompaction(true), WithModelStore(mockModelStoreWithLimit{limit: 100}))
+	require.NoError(t, err)
+
+	sess := session.New(session.WithUserMessage("Hello"))
+	sess.Title = "Overflow Test"
+
+	evCh := rt.RunStream(t.Context(), sess)
+	var errorEvents []*ErrorEvent
+	for ev := range evCh {
+		if e, ok := ev.(*ErrorEvent); ok {
+			errorEvents = append(errorEvents, e)
+		}
+	}
+
+	// The provider should be called at most 3 times: once for the initial
+	// request, once for the summarization attempt (which also fails), and
+	// once for the retry after compaction.
+	require.LessOrEqual(t, prov.CallCount(), 3,
+		"expected at most 3 model calls (initial + summarization + retry), got %d", prov.CallCount())
+	require.NotEmpty(t, errorEvents, "expected an error event when compaction cannot resolve overflow")
+}