From 890f286f75238394594e35dc155d19d580a9cb63 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Tue, 24 Mar 2026 21:42:50 +0100 Subject: [PATCH] fix: prevent infinite loop when context overflow compaction fails When a ContextOverflowError occurs and session compaction is enabled, the runtime retries after compacting. If compaction fails to reduce the context enough, the same overflow recurs, causing an infinite loop. Add a compactionAttempted guard that allows only one compaction attempt per overflow. The guard resets after a successful model call so future overflows can still trigger compaction. Assisted-By: docker-agent --- pkg/runtime/loop.go | 14 +++++++++- pkg/runtime/runtime_test.go | 56 +++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go index e6a643d8b..3de6f2d92 100644 --- a/pkg/runtime/loop.go +++ b/pkg/runtime/loop.go @@ -128,6 +128,11 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c } loopDetector := newToolLoopDetector(loopThreshold) + // Track whether we already attempted auto-compaction for a context + // overflow on this iteration. This prevents an infinite loop when + // compaction fails to reduce the context enough. + compactionAttempted := false + // toolModelOverride holds the per-toolset model from the most recent // tool calls. It applies for one LLM turn, then resets. var toolModelOverride string @@ -280,7 +285,9 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c // Auto-recovery: if the error is a context overflow and // session compaction is enabled, compact the conversation // and retry the request instead of surfacing raw errors. - if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction { + // Guard against infinite loops: only attempt once per overflow. + if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction && !compactionAttempted { + compactionAttempted = true slog.Warn("Context window overflow detected, attempting auto-compaction", "agent", a.Name(), "session_id", sess.ID, @@ -313,6 +320,11 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c return } + // Model call succeeded — reset the compaction guard so that + // a future overflow later in the same session can still + // trigger auto-compaction. + compactionAttempted = false + if usedModel != nil && usedModel.ID() != model.ID() { slog.Info("Used fallback model", "agent", a.Name(), "primary", model.ID(), "used", usedModel.ID()) events <- AgentInfo(a.Name(), usedModel.ID(), a.Description(), a.WelcomeMessage()) diff --git a/pkg/runtime/runtime_test.go b/pkg/runtime/runtime_test.go index 4bb92cb9e..c57bd2e2d 100644 --- a/pkg/runtime/runtime_test.go +++ b/pkg/runtime/runtime_test.go @@ -16,6 +16,7 @@ import ( "github.com/docker/docker-agent/pkg/chat" "github.com/docker/docker-agent/pkg/config/latest" "github.com/docker/docker-agent/pkg/model/provider/base" + "github.com/docker/docker-agent/pkg/modelerrors" "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/permissions" "github.com/docker/docker-agent/pkg/session" @@ -1825,3 +1826,58 @@ func TestProcessToolCalls_UsesPinnedAgent(t *testing.T) { } } } + +// overflowProvider always returns a ContextOverflowError. +type overflowProvider struct { + mu sync.Mutex + calls int +} + +func (p *overflowProvider) ID() string { return "test/overflow-model" } + +func (p *overflowProvider) CreateChatCompletionStream(context.Context, []chat.Message, []tools.Tool) (chat.MessageStream, error) { + p.mu.Lock() + p.calls++ + p.mu.Unlock() + return nil, modelerrors.NewContextOverflowError(errors.New("prompt is too long: 200000 tokens > 128000 maximum")) +} + +func (p *overflowProvider) BaseConfig() base.Config { return base.Config{} } + +func (p *overflowProvider) MaxTokens() int { return 0 } + +func (p *overflowProvider) CallCount() int { + p.mu.Lock() + defer p.mu.Unlock() + return p.calls +} + +func TestContextOverflow_NoInfiniteLoop(t *testing.T) { + // When the model always returns a ContextOverflowError and compaction + // is enabled, the runtime should attempt compaction once and then stop + // instead of looping forever. + prov := &overflowProvider{} + root := agent.New("root", "You are a test agent", agent.WithModel(prov)) + tm := team.New(team.WithAgents(root)) + + rt, err := NewLocalRuntime(tm, WithSessionCompaction(true), WithModelStore(mockModelStoreWithLimit{limit: 100})) + require.NoError(t, err) + + sess := session.New(session.WithUserMessage("Hello")) + sess.Title = "Overflow Test" + + evCh := rt.RunStream(t.Context(), sess) + var errorEvents []*ErrorEvent + for ev := range evCh { + if e, ok := ev.(*ErrorEvent); ok { + errorEvents = append(errorEvents, e) + } + } + + // The provider should be called at most 3 times: once for the initial + // request, once for the summarization attempt (which also fails), and + // once for the retry after compaction. + require.LessOrEqual(t, prov.CallCount(), 3, + "expected at most 3 model calls (initial + summarization + retry), got %d", prov.CallCount()) + require.NotEmpty(t, errorEvents, "expected an error event when compaction cannot resolve overflow") +}