Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/fake/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc {
// Normalize max_tokens/max_output_tokens/maxOutputTokens field (varies based on models.dev
// cache state and provider cloning behavior). Handles both snake_case and camelCase variants.
maxTokensRegex := regexp.MustCompile(`"(?:max_(?:output_)?tokens|maxOutputTokens)":\d+,?`)
// Normalize Gemini thinkingConfig (varies based on provider defaults for thinking budget).
// This handles both camelCase (API) variants of the thinkingConfig field.
thinkingConfigRegex := regexp.MustCompile(`"thinkingConfig":\{[^}]*\},?`)

return func(r *http.Request, i cassette.Request) bool {
if r.Body == nil || r.Body == http.NoBody {
Expand Down Expand Up @@ -241,8 +244,10 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc {
// Normalize dynamic fields for matching
normalizedReq := callIDRegex.ReplaceAllString(string(reqBody), "call_ID")
normalizedReq = maxTokensRegex.ReplaceAllString(normalizedReq, "")
normalizedReq = thinkingConfigRegex.ReplaceAllString(normalizedReq, "")
normalizedCassette := callIDRegex.ReplaceAllString(i.Body, "call_ID")
normalizedCassette = maxTokensRegex.ReplaceAllString(normalizedCassette, "")
normalizedCassette = thinkingConfigRegex.ReplaceAllString(normalizedCassette, "")

return normalizedReq == normalizedCassette
}
Expand Down
26 changes: 25 additions & 1 deletion pkg/model/provider/gemini/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,32 @@ func (c *Client) buildConfig() *genai.GenerateContentConfig {
// Gemini 3 models use level-based configuration (thinkingLevel):
// - Gemini 3 Pro: "low", "high"
// - Gemini 3 Flash: "minimal", "low", "medium", "high"
if c.ModelConfig.ThinkingBudget != nil {
//
// When thinking is explicitly disabled via ModelOptions (e.g., for title generation),
// we set ThinkingBudget to 0 to disable thinking completely. This is required for
// operations where max_tokens is very low and thinking would cause the request to
// hang or fail. IncludeThoughts=false is also set to ensure no thinking content
// is returned.
if thinking := c.ModelOptions.Thinking(); thinking != nil && !*thinking {
// Explicitly disable thinking - required for operations like title generation
// where max_tokens is very low and thinking would consume the token budget.
// ThinkingBudget=0 disables thinking for both Gemini 2.5 and 3 models.
config.ThinkingConfig = &genai.ThinkingConfig{
IncludeThoughts: false,
ThinkingBudget: genai.Ptr(int32(0)),
}
slog.Debug("Gemini thinking explicitly disabled via ModelOptions",
"model", c.ModelConfig.Model,
"max_output_tokens", config.MaxOutputTokens,
)
} else if c.ModelConfig.ThinkingBudget != nil {
c.applyThinkingConfig(config)
} else {
slog.Debug("Gemini buildConfig: no thinking configuration applied",
"model", c.ModelConfig.Model,
"thinking_option", c.ModelOptions.Thinking(),
"thinking_budget", c.ModelConfig.ThinkingBudget,
)
}

if structuredOutput := c.ModelOptions.StructuredOutput(); structuredOutput != nil {
Expand Down
113 changes: 113 additions & 0 deletions pkg/model/provider/gemini/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/docker/cagent/pkg/config/latest"
"github.com/docker/cagent/pkg/model/provider/base"
"github.com/docker/cagent/pkg/model/provider/options"
)

func TestBuildConfig_Gemini25_ThinkingBudget(t *testing.T) {
Expand Down Expand Up @@ -279,6 +280,118 @@ func TestBuildConfig_CaseInsensitiveModel(t *testing.T) {
}
}

func TestBuildConfig_ThinkingExplicitlyDisabled(t *testing.T) {
t.Parallel()

// Test that when ModelOptions.Thinking() returns false, thinking is explicitly disabled.
// This is important for operations like title generation where max_tokens is very low.
tests := []struct {
name string
model string
thinkingBudget *latest.ThinkingBudget // Would normally enable thinking
}{
{
name: "gemini-3-flash-preview with thinking budget but disabled via options",
model: "gemini-3-flash-preview",
thinkingBudget: &latest.ThinkingBudget{Effort: "medium"},
},
{
name: "gemini-2.5-flash with thinking budget but disabled via options",
model: "gemini-2.5-flash",
thinkingBudget: &latest.ThinkingBudget{Tokens: 8192},
},
{
name: "gemini-3-pro with nil thinking budget but disabled via options",
model: "gemini-3-pro",
thinkingBudget: nil, // Even without explicit budget, Gemini 3 may use thinking by default
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

// Create ModelOptions with thinking explicitly disabled
var modelOpts options.ModelOptions
options.WithThinking(false)(&modelOpts)

client := &Client{
Config: base.Config{
ModelConfig: latest.ModelConfig{
Provider: "google",
Model: tt.model,
ThinkingBudget: tt.thinkingBudget,
},
ModelOptions: modelOpts,
},
}

config := client.buildConfig()

// ThinkingConfig should be set with IncludeThoughts=false and ThinkingBudget=0
require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be explicitly set when thinking is disabled")
assert.False(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be false when thinking is disabled")

// ThinkingBudget should be 0 to disable thinking completely
require.NotNil(t, config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be set to 0 when thinking is disabled")
assert.Equal(t, int32(0), *config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be 0 when thinking is disabled")

// ThinkingLevel should be empty/unset
assert.Empty(t, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be empty when thinking is disabled")
})
}
}

func TestBuildConfig_ThinkingExplicitlyEnabled(t *testing.T) {
t.Parallel()

// Test that when ModelOptions.Thinking() returns true, thinking is NOT overridden
// and the ThinkingBudget from ModelConfig is used.
var modelOpts options.ModelOptions
options.WithThinking(true)(&modelOpts)

client := &Client{
Config: base.Config{
ModelConfig: latest.ModelConfig{
Provider: "google",
Model: "gemini-3-flash-preview",
ThinkingBudget: &latest.ThinkingBudget{Effort: "medium"},
},
ModelOptions: modelOpts,
},
}

config := client.buildConfig()

// ThinkingConfig should be set with IncludeThoughts=true from applyThinkingConfig
require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be set")
assert.True(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be true when thinking is enabled")
assert.Equal(t, genai.ThinkingLevelMedium, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be set from ThinkingBudget")
}

func TestBuildConfig_ThinkingNotSet(t *testing.T) {
t.Parallel()

// Test that when ModelOptions.Thinking() is nil (not set), behavior falls back to ThinkingBudget
client := &Client{
Config: base.Config{
ModelConfig: latest.ModelConfig{
Provider: "google",
Model: "gemini-3-flash",
ThinkingBudget: &latest.ThinkingBudget{Effort: "high"},
},
// ModelOptions.Thinking() is nil by default
},
}

config := client.buildConfig()

// ThinkingConfig should be set from ThinkingBudget
require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be set from ThinkingBudget")
assert.True(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be true")
assert.Equal(t, genai.ThinkingLevelHigh, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should match ThinkingBudget")
}

// ptr is a helper to create a pointer to an int32 value.
func ptr(v int32) *int32 {
return &v
Expand Down
3 changes: 3 additions & 0 deletions pkg/model/provider/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,8 @@ func FromModelOptions(m ModelOptions) []Opt {
if len(m.providers) > 0 {
out = append(out, WithProviders(m.providers))
}
if m.thinking != nil {
out = append(out, WithThinking(*m.thinking))
}
return out
}
10 changes: 10 additions & 0 deletions pkg/sessiontitle/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"io"
"log/slog"
"strings"
"time"

"github.com/docker/cagent/pkg/chat"
"github.com/docker/cagent/pkg/model/provider"
Expand All @@ -19,6 +20,11 @@ import (
const (
systemPrompt = "You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response."
userPromptFormat = "Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n%s\n\n"

// titleGenerationTimeout is the maximum time to wait for title generation.
// Title generation should be quick since we disable thinking and use low max_tokens.
// If the API is slow or hanging (e.g., due to server-side thinking), we should timeout.
titleGenerationTimeout = 30 * time.Second
)

// Generator generates session titles using a one-shot LLM completion.
Expand All @@ -42,6 +48,10 @@ func (g *Generator) Generate(ctx context.Context, sessionID string, userMessages
return "", nil
}

// Apply timeout to prevent hanging on slow or unresponsive models
ctx, cancel := context.WithTimeout(ctx, titleGenerationTimeout)
defer cancel()

slog.Debug("Generating title for session", "session_id", sessionID, "message_count", len(userMessages))

// Format messages for the prompt
Expand Down