Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion pkg/runtime/loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
}
loopDetector := newToolLoopDetector(loopThreshold)

// Track whether we already attempted auto-compaction for a context
// overflow on this iteration. This prevents an infinite loop when
// compaction fails to reduce the context enough.
compactionAttempted := false

// toolModelOverride holds the per-toolset model from the most recent
// tool calls. It applies for one LLM turn, then resets.
var toolModelOverride string
Expand Down Expand Up @@ -280,7 +285,9 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
// Auto-recovery: if the error is a context overflow and
// session compaction is enabled, compact the conversation
// and retry the request instead of surfacing raw errors.
if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction {
// Guard against infinite loops: only attempt once per overflow.
if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction && !compactionAttempted {
compactionAttempted = true
slog.Warn("Context window overflow detected, attempting auto-compaction",
"agent", a.Name(),
"session_id", sess.ID,
Expand Down Expand Up @@ -313,6 +320,11 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
return
}

// Model call succeeded — reset the compaction guard so that
// a future overflow later in the same session can still
// trigger auto-compaction.
compactionAttempted = false

if usedModel != nil && usedModel.ID() != model.ID() {
slog.Info("Used fallback model", "agent", a.Name(), "primary", model.ID(), "used", usedModel.ID())
events <- AgentInfo(a.Name(), usedModel.ID(), a.Description(), a.WelcomeMessage())
Expand Down
56 changes: 56 additions & 0 deletions pkg/runtime/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/docker/docker-agent/pkg/chat"
"github.com/docker/docker-agent/pkg/config/latest"
"github.com/docker/docker-agent/pkg/model/provider/base"
"github.com/docker/docker-agent/pkg/modelerrors"
"github.com/docker/docker-agent/pkg/modelsdev"
"github.com/docker/docker-agent/pkg/permissions"
"github.com/docker/docker-agent/pkg/session"
Expand Down Expand Up @@ -1825,3 +1826,58 @@ func TestProcessToolCalls_UsesPinnedAgent(t *testing.T) {
}
}
}

// overflowProvider always returns a ContextOverflowError.
type overflowProvider struct {
mu sync.Mutex
calls int
}

func (p *overflowProvider) ID() string { return "test/overflow-model" }

func (p *overflowProvider) CreateChatCompletionStream(context.Context, []chat.Message, []tools.Tool) (chat.MessageStream, error) {
p.mu.Lock()
p.calls++
p.mu.Unlock()
return nil, modelerrors.NewContextOverflowError(errors.New("prompt is too long: 200000 tokens > 128000 maximum"))
}

func (p *overflowProvider) BaseConfig() base.Config { return base.Config{} }

func (p *overflowProvider) MaxTokens() int { return 0 }

func (p *overflowProvider) CallCount() int {
p.mu.Lock()
defer p.mu.Unlock()
return p.calls
}

func TestContextOverflow_NoInfiniteLoop(t *testing.T) {
// When the model always returns a ContextOverflowError and compaction
// is enabled, the runtime should attempt compaction once and then stop
// instead of looping forever.
prov := &overflowProvider{}
root := agent.New("root", "You are a test agent", agent.WithModel(prov))
tm := team.New(team.WithAgents(root))

rt, err := NewLocalRuntime(tm, WithSessionCompaction(true), WithModelStore(mockModelStoreWithLimit{limit: 100}))
require.NoError(t, err)

sess := session.New(session.WithUserMessage("Hello"))
sess.Title = "Overflow Test"

evCh := rt.RunStream(t.Context(), sess)
var errorEvents []*ErrorEvent
for ev := range evCh {
if e, ok := ev.(*ErrorEvent); ok {
errorEvents = append(errorEvents, e)
}
}

// The provider should be called at most 3 times: once for the initial
// request, once for the summarization attempt (which also fails), and
// once for the retry after compaction.
require.LessOrEqual(t, prov.CallCount(), 3,
"expected at most 3 model calls (initial + summarization + retry), got %d", prov.CallCount())
require.NotEmpty(t, errorEvents, "expected an error event when compaction cannot resolve overflow")
}
Loading