diff --git a/pkg/fake/proxy.go b/pkg/fake/proxy.go index d8185f9fc..46c138e32 100644 --- a/pkg/fake/proxy.go +++ b/pkg/fake/proxy.go @@ -214,6 +214,9 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc { // Normalize max_tokens/max_output_tokens/maxOutputTokens field (varies based on models.dev // cache state and provider cloning behavior). Handles both snake_case and camelCase variants. maxTokensRegex := regexp.MustCompile(`"(?:max_(?:output_)?tokens|maxOutputTokens)":\d+,?`) + // Normalize Gemini thinkingConfig (varies based on provider defaults for thinking budget). + // This handles both camelCase (API) variants of the thinkingConfig field. + thinkingConfigRegex := regexp.MustCompile(`"thinkingConfig":\{[^}]*\},?`) return func(r *http.Request, i cassette.Request) bool { if r.Body == nil || r.Body == http.NoBody { @@ -241,8 +244,10 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc { // Normalize dynamic fields for matching normalizedReq := callIDRegex.ReplaceAllString(string(reqBody), "call_ID") normalizedReq = maxTokensRegex.ReplaceAllString(normalizedReq, "") + normalizedReq = thinkingConfigRegex.ReplaceAllString(normalizedReq, "") normalizedCassette := callIDRegex.ReplaceAllString(i.Body, "call_ID") normalizedCassette = maxTokensRegex.ReplaceAllString(normalizedCassette, "") + normalizedCassette = thinkingConfigRegex.ReplaceAllString(normalizedCassette, "") return normalizedReq == normalizedCassette } diff --git a/pkg/model/provider/gemini/client.go b/pkg/model/provider/gemini/client.go index 3642d6c0d..84d668459 100644 --- a/pkg/model/provider/gemini/client.go +++ b/pkg/model/provider/gemini/client.go @@ -320,8 +320,32 @@ func (c *Client) buildConfig() *genai.GenerateContentConfig { // Gemini 3 models use level-based configuration (thinkingLevel): // - Gemini 3 Pro: "low", "high" // - Gemini 3 Flash: "minimal", "low", "medium", "high" - if c.ModelConfig.ThinkingBudget != nil { + // + // When thinking is explicitly disabled via ModelOptions (e.g., for title generation), + // we set ThinkingBudget to 0 to disable thinking completely. This is required for + // operations where max_tokens is very low and thinking would cause the request to + // hang or fail. IncludeThoughts=false is also set to ensure no thinking content + // is returned. + if thinking := c.ModelOptions.Thinking(); thinking != nil && !*thinking { + // Explicitly disable thinking - required for operations like title generation + // where max_tokens is very low and thinking would consume the token budget. + // ThinkingBudget=0 disables thinking for both Gemini 2.5 and 3 models. + config.ThinkingConfig = &genai.ThinkingConfig{ + IncludeThoughts: false, + ThinkingBudget: genai.Ptr(int32(0)), + } + slog.Debug("Gemini thinking explicitly disabled via ModelOptions", + "model", c.ModelConfig.Model, + "max_output_tokens", config.MaxOutputTokens, + ) + } else if c.ModelConfig.ThinkingBudget != nil { c.applyThinkingConfig(config) + } else { + slog.Debug("Gemini buildConfig: no thinking configuration applied", + "model", c.ModelConfig.Model, + "thinking_option", c.ModelOptions.Thinking(), + "thinking_budget", c.ModelConfig.ThinkingBudget, + ) } if structuredOutput := c.ModelOptions.StructuredOutput(); structuredOutput != nil { diff --git a/pkg/model/provider/gemini/client_test.go b/pkg/model/provider/gemini/client_test.go index a30376771..79cf81767 100644 --- a/pkg/model/provider/gemini/client_test.go +++ b/pkg/model/provider/gemini/client_test.go @@ -9,6 +9,7 @@ import ( "github.com/docker/cagent/pkg/config/latest" "github.com/docker/cagent/pkg/model/provider/base" + "github.com/docker/cagent/pkg/model/provider/options" ) func TestBuildConfig_Gemini25_ThinkingBudget(t *testing.T) { @@ -279,6 +280,118 @@ func TestBuildConfig_CaseInsensitiveModel(t *testing.T) { } } +func TestBuildConfig_ThinkingExplicitlyDisabled(t *testing.T) { + t.Parallel() + + // Test that when ModelOptions.Thinking() returns false, thinking is explicitly disabled. + // This is important for operations like title generation where max_tokens is very low. + tests := []struct { + name string + model string + thinkingBudget *latest.ThinkingBudget // Would normally enable thinking + }{ + { + name: "gemini-3-flash-preview with thinking budget but disabled via options", + model: "gemini-3-flash-preview", + thinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + }, + { + name: "gemini-2.5-flash with thinking budget but disabled via options", + model: "gemini-2.5-flash", + thinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + }, + { + name: "gemini-3-pro with nil thinking budget but disabled via options", + model: "gemini-3-pro", + thinkingBudget: nil, // Even without explicit budget, Gemini 3 may use thinking by default + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + // Create ModelOptions with thinking explicitly disabled + var modelOpts options.ModelOptions + options.WithThinking(false)(&modelOpts) + + client := &Client{ + Config: base.Config{ + ModelConfig: latest.ModelConfig{ + Provider: "google", + Model: tt.model, + ThinkingBudget: tt.thinkingBudget, + }, + ModelOptions: modelOpts, + }, + } + + config := client.buildConfig() + + // ThinkingConfig should be set with IncludeThoughts=false and ThinkingBudget=0 + require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be explicitly set when thinking is disabled") + assert.False(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be false when thinking is disabled") + + // ThinkingBudget should be 0 to disable thinking completely + require.NotNil(t, config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be set to 0 when thinking is disabled") + assert.Equal(t, int32(0), *config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be 0 when thinking is disabled") + + // ThinkingLevel should be empty/unset + assert.Empty(t, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be empty when thinking is disabled") + }) + } +} + +func TestBuildConfig_ThinkingExplicitlyEnabled(t *testing.T) { + t.Parallel() + + // Test that when ModelOptions.Thinking() returns true, thinking is NOT overridden + // and the ThinkingBudget from ModelConfig is used. + var modelOpts options.ModelOptions + options.WithThinking(true)(&modelOpts) + + client := &Client{ + Config: base.Config{ + ModelConfig: latest.ModelConfig{ + Provider: "google", + Model: "gemini-3-flash-preview", + ThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + }, + ModelOptions: modelOpts, + }, + } + + config := client.buildConfig() + + // ThinkingConfig should be set with IncludeThoughts=true from applyThinkingConfig + require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be set") + assert.True(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be true when thinking is enabled") + assert.Equal(t, genai.ThinkingLevelMedium, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be set from ThinkingBudget") +} + +func TestBuildConfig_ThinkingNotSet(t *testing.T) { + t.Parallel() + + // Test that when ModelOptions.Thinking() is nil (not set), behavior falls back to ThinkingBudget + client := &Client{ + Config: base.Config{ + ModelConfig: latest.ModelConfig{ + Provider: "google", + Model: "gemini-3-flash", + ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, + }, + // ModelOptions.Thinking() is nil by default + }, + } + + config := client.buildConfig() + + // ThinkingConfig should be set from ThinkingBudget + require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be set from ThinkingBudget") + assert.True(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be true") + assert.Equal(t, genai.ThinkingLevelHigh, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should match ThinkingBudget") +} + // ptr is a helper to create a pointer to an int32 value. func ptr(v int32) *int32 { return &v diff --git a/pkg/model/provider/options/options.go b/pkg/model/provider/options/options.go index 3e033ef45..d08ba33b0 100644 --- a/pkg/model/provider/options/options.go +++ b/pkg/model/provider/options/options.go @@ -94,5 +94,8 @@ func FromModelOptions(m ModelOptions) []Opt { if len(m.providers) > 0 { out = append(out, WithProviders(m.providers)) } + if m.thinking != nil { + out = append(out, WithThinking(*m.thinking)) + } return out } diff --git a/pkg/sessiontitle/generator.go b/pkg/sessiontitle/generator.go index 3661b51a1..a939b784b 100644 --- a/pkg/sessiontitle/generator.go +++ b/pkg/sessiontitle/generator.go @@ -10,6 +10,7 @@ import ( "io" "log/slog" "strings" + "time" "github.com/docker/cagent/pkg/chat" "github.com/docker/cagent/pkg/model/provider" @@ -19,6 +20,11 @@ import ( const ( systemPrompt = "You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response." userPromptFormat = "Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n%s\n\n" + + // titleGenerationTimeout is the maximum time to wait for title generation. + // Title generation should be quick since we disable thinking and use low max_tokens. + // If the API is slow or hanging (e.g., due to server-side thinking), we should timeout. + titleGenerationTimeout = 30 * time.Second ) // Generator generates session titles using a one-shot LLM completion. @@ -42,6 +48,10 @@ func (g *Generator) Generate(ctx context.Context, sessionID string, userMessages return "", nil } + // Apply timeout to prevent hanging on slow or unresponsive models + ctx, cancel := context.WithTimeout(ctx, titleGenerationTimeout) + defer cancel() + slog.Debug("Generating title for session", "session_id", sessionID, "message_count", len(userMessages)) // Format messages for the prompt