docker · dgageot · Feb 5, 2026 · Feb 5, 2026
@@ -214,6 +214,9 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc {
 	// Normalize max_tokens/max_output_tokens/maxOutputTokens field (varies based on models.dev
 	// cache state and provider cloning behavior). Handles both snake_case and camelCase variants.
 	maxTokensRegex := regexp.MustCompile(`"(?:max_(?:output_)?tokens|maxOutputTokens)":\d+,?`)
+	// Normalize Gemini thinkingConfig (varies based on provider defaults for thinking budget).
+	// This handles both camelCase (API) variants of the thinkingConfig field.
+	thinkingConfigRegex := regexp.MustCompile(`"thinkingConfig":\{[^}]*\},?`)
 
 	return func(r *http.Request, i cassette.Request) bool {
 		if r.Body == nil || r.Body == http.NoBody {
@@ -241,8 +244,10 @@ func DefaultMatcher(onError func(err error)) recorder.MatcherFunc {
 		// Normalize dynamic fields for matching
 		normalizedReq := callIDRegex.ReplaceAllString(string(reqBody), "call_ID")
 		normalizedReq = maxTokensRegex.ReplaceAllString(normalizedReq, "")
+		normalizedReq = thinkingConfigRegex.ReplaceAllString(normalizedReq, "")
 		normalizedCassette := callIDRegex.ReplaceAllString(i.Body, "call_ID")
 		normalizedCassette = maxTokensRegex.ReplaceAllString(normalizedCassette, "")
+		normalizedCassette = thinkingConfigRegex.ReplaceAllString(normalizedCassette, "")
 
 		return normalizedReq == normalizedCassette
 	}

@@ -320,8 +320,32 @@ func (c *Client) buildConfig() *genai.GenerateContentConfig {
 	// Gemini 3 models use level-based configuration (thinkingLevel):
 	// - Gemini 3 Pro: "low", "high"
 	// - Gemini 3 Flash: "minimal", "low", "medium", "high"
-	if c.ModelConfig.ThinkingBudget != nil {
+	//
+	// When thinking is explicitly disabled via ModelOptions (e.g., for title generation),
+	// we set ThinkingBudget to 0 to disable thinking completely. This is required for
+	// operations where max_tokens is very low and thinking would cause the request to
+	// hang or fail. IncludeThoughts=false is also set to ensure no thinking content
+	// is returned.
+	if thinking := c.ModelOptions.Thinking(); thinking != nil && !*thinking {
+		// Explicitly disable thinking - required for operations like title generation
+		// where max_tokens is very low and thinking would consume the token budget.
+		// ThinkingBudget=0 disables thinking for both Gemini 2.5 and 3 models.
+		config.ThinkingConfig = &genai.ThinkingConfig{
+			IncludeThoughts: false,
+			ThinkingBudget:  genai.Ptr(int32(0)),
+		}
+		slog.Debug("Gemini thinking explicitly disabled via ModelOptions",
+			"model", c.ModelConfig.Model,
+			"max_output_tokens", config.MaxOutputTokens,
+		)
+	} else if c.ModelConfig.ThinkingBudget != nil {
 		c.applyThinkingConfig(config)
+	} else {
+		slog.Debug("Gemini buildConfig: no thinking configuration applied",
+			"model", c.ModelConfig.Model,
+			"thinking_option", c.ModelOptions.Thinking(),
+			"thinking_budget", c.ModelConfig.ThinkingBudget,
+		)
 	}
 
 	if structuredOutput := c.ModelOptions.StructuredOutput(); structuredOutput != nil {

@@ -9,6 +9,7 @@ import (
 
 	"github.com/docker/cagent/pkg/config/latest"
 	"github.com/docker/cagent/pkg/model/provider/base"
+	"github.com/docker/cagent/pkg/model/provider/options"
 )
 
 func TestBuildConfig_Gemini25_ThinkingBudget(t *testing.T) {
@@ -279,6 +280,118 @@ func TestBuildConfig_CaseInsensitiveModel(t *testing.T) {
 	}
 }
 
+func TestBuildConfig_ThinkingExplicitlyDisabled(t *testing.T) {
+	t.Parallel()
+
+	// Test that when ModelOptions.Thinking() returns false, thinking is explicitly disabled.
+	// This is important for operations like title generation where max_tokens is very low.
+	tests := []struct {
+		name           string
+		model          string
+		thinkingBudget *latest.ThinkingBudget // Would normally enable thinking
+	}{
+		{
+			name:           "gemini-3-flash-preview with thinking budget but disabled via options",
+			model:          "gemini-3-flash-preview",
+			thinkingBudget: &latest.ThinkingBudget{Effort: "medium"},
+		},
+		{
+			name:           "gemini-2.5-flash with thinking budget but disabled via options",
+			model:          "gemini-2.5-flash",
+			thinkingBudget: &latest.ThinkingBudget{Tokens: 8192},
+		},
+		{
+			name:           "gemini-3-pro with nil thinking budget but disabled via options",
+			model:          "gemini-3-pro",
+			thinkingBudget: nil, // Even without explicit budget, Gemini 3 may use thinking by default
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			// Create ModelOptions with thinking explicitly disabled
+			var modelOpts options.ModelOptions
+			options.WithThinking(false)(&modelOpts)
+
+			client := &Client{
+				Config: base.Config{
+					ModelConfig: latest.ModelConfig{
+						Provider:       "google",
+						Model:          tt.model,
+						ThinkingBudget: tt.thinkingBudget,
+					},
+					ModelOptions: modelOpts,
+				},
+			}
+
+			config := client.buildConfig()
+
+			// ThinkingConfig should be set with IncludeThoughts=false and ThinkingBudget=0
+			require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be explicitly set when thinking is disabled")
+			assert.False(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be false when thinking is disabled")
+
+			// ThinkingBudget should be 0 to disable thinking completely
+			require.NotNil(t, config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be set to 0 when thinking is disabled")
+			assert.Equal(t, int32(0), *config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be 0 when thinking is disabled")
+
+			// ThinkingLevel should be empty/unset
+			assert.Empty(t, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be empty when thinking is disabled")
+		})
+	}
+}
+
+func TestBuildConfig_ThinkingExplicitlyEnabled(t *testing.T) {
+	t.Parallel()
+
+	// Test that when ModelOptions.Thinking() returns true, thinking is NOT overridden
+	// and the ThinkingBudget from ModelConfig is used.
+	var modelOpts options.ModelOptions
+	options.WithThinking(true)(&modelOpts)
+
+	client := &Client{
+		Config: base.Config{
+			ModelConfig: latest.ModelConfig{
+				Provider:       "google",
+				Model:          "gemini-3-flash-preview",
+				ThinkingBudget: &latest.ThinkingBudget{Effort: "medium"},
+			},
+			ModelOptions: modelOpts,
+		},
+	}
+
+	config := client.buildConfig()
+
+	// ThinkingConfig should be set with IncludeThoughts=true from applyThinkingConfig
+	require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be set")
+	assert.True(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be true when thinking is enabled")
+	assert.Equal(t, genai.ThinkingLevelMedium, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be set from ThinkingBudget")
+}
+
+func TestBuildConfig_ThinkingNotSet(t *testing.T) {
+	t.Parallel()
+
+	// Test that when ModelOptions.Thinking() is nil (not set), behavior falls back to ThinkingBudget
+	client := &Client{
+		Config: base.Config{
+			ModelConfig: latest.ModelConfig{
+				Provider:       "google",
+				Model:          "gemini-3-flash",
+				ThinkingBudget: &latest.ThinkingBudget{Effort: "high"},
+			},
+			// ModelOptions.Thinking() is nil by default
+		},
+	}
+
+	config := client.buildConfig()
+
+	// ThinkingConfig should be set from ThinkingBudget
+	require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be set from ThinkingBudget")
+	assert.True(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be true")
+	assert.Equal(t, genai.ThinkingLevelHigh, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should match ThinkingBudget")
+}
+
 // ptr is a helper to create a pointer to an int32 value.
 func ptr(v int32) *int32 {
 	return &v

@@ -94,5 +94,8 @@ func FromModelOptions(m ModelOptions) []Opt {
 	if len(m.providers) > 0 {
 		out = append(out, WithProviders(m.providers))
 	}
+	if m.thinking != nil {
+		out = append(out, WithThinking(*m.thinking))
+	}
 	return out
 }
@@ -10,6 +10,7 @@ import (
 	"io"
 	"log/slog"
 	"strings"
+	"time"
 
 	"github.com/docker/cagent/pkg/chat"
 	"github.com/docker/cagent/pkg/model/provider"
@@ -19,6 +20,11 @@ import (
 const (
 	systemPrompt     = "You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response."
 	userPromptFormat = "Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n%s\n\n"
+
+	// titleGenerationTimeout is the maximum time to wait for title generation.
+	// Title generation should be quick since we disable thinking and use low max_tokens.
+	// If the API is slow or hanging (e.g., due to server-side thinking), we should timeout.
+	titleGenerationTimeout = 30 * time.Second
 )
 
 // Generator generates session titles using a one-shot LLM completion.
@@ -42,6 +48,10 @@ func (g *Generator) Generate(ctx context.Context, sessionID string, userMessages
 		return "", nil
 	}
 
+	// Apply timeout to prevent hanging on slow or unresponsive models
+	ctx, cancel := context.WithTimeout(ctx, titleGenerationTimeout)
+	defer cancel()
+
 	slog.Debug("Generating title for session", "session_id", sessionID, "message_count", len(userMessages))
 
 	// Format messages for the prompt