diff --git a/agent-schema.json b/agent-schema.json index b106a8509..2e1d76307 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -535,7 +535,7 @@ "description": "Whether to track usage" }, "thinking_budget": { - "description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'), default 'medium'. Anthropic: integer token budget (1024-32768), default 8192. Amazon Bedrock (Claude): same as Anthropic. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max), default -1. Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'), default 'high' for Pro, 'medium' for Flash.", + "description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'). Anthropic: integer token budget (1024-32768), 'adaptive' (lets the model decide), or effort levels ('low','medium','high','max') which use adaptive thinking with the given effort. Amazon Bedrock (Claude): integer token budget or effort levels ('low','medium','high') mapped to token budgets. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max). Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'). Thinking is only enabled when explicitly configured.", "oneOf": [ { "type": "string", @@ -544,9 +544,11 @@ "minimal", "low", "medium", - "high" + "high", + "max", + "adaptive" ], - "description": "Reasoning effort level (OpenAI, Gemini 3). Use 'none' to disable thinking." + "description": "Reasoning effort level. 'adaptive'/'max' are Anthropic-specific. Use 'none' to disable thinking." }, { "type": "integer", @@ -562,6 +564,8 @@ "low", "medium", "high", + "max", + "adaptive", -1, 1024, 8192, diff --git a/examples/thinking_budget.yaml b/examples/thinking_budget.yaml index 80e83b3b4..8c906e7c8 100644 --- a/examples/thinking_budget.yaml +++ b/examples/thinking_budget.yaml @@ -6,7 +6,7 @@ agents: root: model: gpt-5-mini-min # <- try with gpt-5-mini-high - # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high + # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high or claude-opus-4-6-adaptive # model: gemini-2-5-flash-dynamic-thinking # <- try with -no-thinking, -low or -high variants description: a helpful assistant that thinks instruction: you are a helpful assistant who can also use tools, but only if you need to @@ -29,15 +29,25 @@ models: claude-4-5-sonnet-min: provider: anthropic model: claude-sonnet-4-5-20250929 - thinking_budget: 1024 # <- tokens, 1024 is the minimum + thinking_budget: 1024 # <- explicit token budget (1024-32768) for older models claude-4-5-sonnet-high: provider: anthropic model: claude-sonnet-4-5-20250929 - thinking_budget: 32768 # <- tokens, 32768 is the Anthropic suggested maximum without batching + thinking_budget: 32768 # <- explicit token budget (32768 is the Anthropic suggested maximum) provider_opts: interleaved_thinking: true # <- enables interleaved thinking, aka tool calling during model reasoning + claude-opus-4-6-adaptive: + provider: anthropic + model: claude-opus-4-6 + thinking_budget: adaptive # <- lets the model decide when and how much to think (recommended for 4.6) + + claude-opus-4-6-low: + provider: anthropic + model: claude-opus-4-6 + thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max" + gemini-2-5-flash-dynamic-thinking: provider: google model: gemini-2.5-flash diff --git a/pkg/config/latest/types.go b/pkg/config/latest/types.go index e3ff0dc6c..1c8f3558c 100644 --- a/pkg/config/latest/types.go +++ b/pkg/config/latest/types.go @@ -397,7 +397,10 @@ type ModelConfig struct { TrackUsage *bool `json:"track_usage,omitempty"` // ThinkingBudget controls reasoning effort/budget: // - For OpenAI: accepts string levels "minimal", "low", "medium", "high" - // - For Anthropic: accepts integer token budget (1024-32000) + // - For Anthropic: accepts integer token budget (1024-32000), "adaptive", + // or string levels "low", "medium", "high", "max" (uses adaptive thinking with effort) + // - For Bedrock Claude: accepts integer token budget or string levels + // "minimal", "low", "medium", "high" (mapped to token budgets via EffortTokens) // - For other providers: may be ignored ThinkingBudget *ThinkingBudget `json:"thinking_budget,omitempty"` // Routing defines rules for routing requests to different models. @@ -670,6 +673,7 @@ func (d DeferConfig) MarshalYAML() (any, error) { // ThinkingBudget represents reasoning budget configuration. // It accepts either a string effort level or an integer token budget: // - String: "minimal", "low", "medium", "high" (for OpenAI) +// - String: "adaptive" (for Anthropic models that support adaptive thinking) // - Integer: token count (for Anthropic, range 1024-32768) type ThinkingBudget struct { // Effort stores string-based reasoning effort levels @@ -717,6 +721,7 @@ func (t ThinkingBudget) MarshalYAML() (any, error) { // NOT disabled when: // - Tokens > 0 or Tokens == -1 (explicit token budget) // - Effort is a real level like "medium" or "high" +// - Effort is "adaptive" func (t *ThinkingBudget) IsDisabled() bool { if t == nil { return false @@ -724,7 +729,42 @@ func (t *ThinkingBudget) IsDisabled() bool { if t.Tokens == 0 && t.Effort == "" { return true } - return t.Effort == "none" + return strings.EqualFold(t.Effort, "none") +} + +// IsAdaptive returns true if the thinking budget is set to adaptive mode. +// Adaptive thinking lets the model decide how much thinking to do. +func (t *ThinkingBudget) IsAdaptive() bool { + if t == nil { + return false + } + return strings.EqualFold(t.Effort, "adaptive") +} + +// EffortTokens maps a string effort level to a token budget for providers +// that only support token-based thinking (e.g. Bedrock Claude). +// +// The Anthropic direct API uses adaptive thinking + output_config.effort +// for string levels instead; see anthropicEffort in the anthropic package. +// +// Returns (tokens, true) when a mapping exists, or (0, false) when +// the budget uses an explicit token count or an unrecognised effort string. +func (t *ThinkingBudget) EffortTokens() (int, bool) { + if t == nil || t.Effort == "" { + return 0, false + } + switch strings.ToLower(strings.TrimSpace(t.Effort)) { + case "minimal": + return 1024, true + case "low": + return 2048, true + case "medium": + return 8192, true + case "high": + return 16384, true + default: + return 0, false + } } // MarshalJSON implements custom marshaling to output simple string or int format diff --git a/pkg/config/latest/types_test.go b/pkg/config/latest/types_test.go index e503d3cbb..6aede16cb 100644 --- a/pkg/config/latest/types_test.go +++ b/pkg/config/latest/types_test.go @@ -121,6 +121,77 @@ func TestThinkingBudget_MarshalUnmarshal_Zero(t *testing.T) { require.Equal(t, "thinking_budget: 0\n", string(output)) } +func TestThinkingBudget_IsDisabled(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + b *ThinkingBudget + want bool + }{ + {"nil", nil, false}, + {"zero tokens", &ThinkingBudget{Tokens: 0}, true}, + {"none effort", &ThinkingBudget{Effort: "none"}, true}, + {"positive tokens", &ThinkingBudget{Tokens: 8192}, false}, + {"medium effort", &ThinkingBudget{Effort: "medium"}, false}, + {"adaptive effort", &ThinkingBudget{Effort: "adaptive"}, false}, + {"negative tokens (dynamic)", &ThinkingBudget{Tokens: -1}, false}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + require.Equal(t, tt.want, tt.b.IsDisabled()) + }) + } +} + +func TestThinkingBudget_IsAdaptive(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + b *ThinkingBudget + want bool + }{ + {"nil", nil, false}, + {"adaptive", &ThinkingBudget{Effort: "adaptive"}, true}, + {"medium", &ThinkingBudget{Effort: "medium"}, false}, + {"tokens", &ThinkingBudget{Tokens: 8192}, false}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + require.Equal(t, tt.want, tt.b.IsAdaptive()) + }) + } +} + +func TestThinkingBudget_EffortTokens(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + b *ThinkingBudget + wantTokens int + wantOK bool + }{ + {"nil", nil, 0, false}, + {"minimal", &ThinkingBudget{Effort: "minimal"}, 1024, true}, + {"low", &ThinkingBudget{Effort: "low"}, 2048, true}, + {"medium", &ThinkingBudget{Effort: "medium"}, 8192, true}, + {"high", &ThinkingBudget{Effort: "high"}, 16384, true}, + {"adaptive", &ThinkingBudget{Effort: "adaptive"}, 0, false}, + {"none", &ThinkingBudget{Effort: "none"}, 0, false}, + {"explicit tokens", &ThinkingBudget{Tokens: 4096}, 0, false}, + {"empty effort", &ThinkingBudget{}, 0, false}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + tokens, ok := tt.b.EffortTokens() + require.Equal(t, tt.wantOK, ok) + require.Equal(t, tt.wantTokens, tokens) + }) + } +} + func TestAgents_UnmarshalYAML_RejectsUnknownFields(t *testing.T) { t.Parallel() diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go index 55b5aa274..ae9286ec9 100644 --- a/pkg/model/provider/anthropic/beta_client.go +++ b/pkg/model/provider/anthropic/beta_client.go @@ -95,20 +95,38 @@ func (c *Client) createBetaStream( // For interleaved thinking to make sense, we use a default of 16384 tokens for the thinking budget thinkingEnabled := c.ModelOptions.Thinking() == nil || *c.ModelOptions.Thinking() if thinkingEnabled { - thinkingTokens := int64(16384) - if c.ModelConfig.ThinkingBudget != nil { - thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens) + if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() { + // Adaptive thinking: let the model decide how much thinking to do + adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam() + params.Thinking = anthropic.BetaThinkingConfigParamUnion{ + OfAdaptive: &adaptive, + } + slog.Debug("Anthropic Beta API using adaptive thinking") + } else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { + // Effort level: use adaptive thinking + output_config.effort + adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam() + params.Thinking = anthropic.BetaThinkingConfigParamUnion{ + OfAdaptive: &adaptive, + } + params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort) + slog.Debug("Anthropic Beta API using adaptive thinking with effort", + "effort", effort) } else { - slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) - } - switch { - case thinkingTokens >= 1024 && thinkingTokens < maxTokens: - params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens) - slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) - case thinkingTokens >= maxTokens: - slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens) - default: - slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens) + thinkingTokens := int64(16384) + if c.ModelConfig.ThinkingBudget != nil { + thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens) + } else { + slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) + } + switch { + case thinkingTokens >= 1024 && thinkingTokens < maxTokens: + params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens) + slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) + case thinkingTokens >= maxTokens: + slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens) + default: + slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens) + } } } else { slog.Debug("Anthropic Beta API: Thinking disabled via /think command") diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index 10e05b701..8678d8437 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -50,12 +50,23 @@ func (c *Client) getResponseTrailer() http.Header { // adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget. // Anthropic's max_tokens represents the combined budget for thinking + output tokens. // Returns the adjusted maxTokens value and an error if user-set max_tokens is too low. +// +// This only applies to fixed token budgets. Adaptive thinking and effort-based +// budgets don't need adjustment since the model manages its own thinking allocation. func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) { - if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 { + if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.IsAdaptive() { + return maxTokens, nil + } + // Effort-based budgets use adaptive thinking — no token adjustment needed. + if _, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { return maxTokens, nil } thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens) + if thinkingTokens <= 0 { + return maxTokens, nil + } + minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer if maxTokens <= thinkingTokens { @@ -297,7 +308,25 @@ func (c *Client) CreateChatCompletionStream( // Apply thinking budget first, as it affects whether we can set temperature thinkingEnabled := false - if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 { + if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() { + // Adaptive thinking: let the model decide how much thinking to do + adaptive := anthropic.NewThinkingConfigAdaptiveParam() + params.Thinking = anthropic.ThinkingConfigParamUnion{ + OfAdaptive: &adaptive, + } + thinkingEnabled = true + slog.Debug("Anthropic API using adaptive thinking (standard messages)") + } else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { + // Effort level: use adaptive thinking + output_config.effort + adaptive := anthropic.NewThinkingConfigAdaptiveParam() + params.Thinking = anthropic.ThinkingConfigParamUnion{ + OfAdaptive: &adaptive, + } + params.OutputConfig.Effort = anthropic.OutputConfigEffort(effort) + thinkingEnabled = true + slog.Debug("Anthropic API using adaptive thinking with effort", + "effort", effort) + } else if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 { thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens) switch { case thinkingTokens >= 1024 && thinkingTokens < maxTokens: @@ -895,6 +924,29 @@ func differenceIDs(a, b map[string]struct{}) []string { return missing } +// anthropicEffort maps a ThinkingBudget effort string to an Anthropic API +// effort level ("low", "medium", "high", "max"). Returns ("", false) when +// the budget uses token counts, adaptive mode, or an unrecognised string. +func anthropicEffort(b *latest.ThinkingBudget) (string, bool) { + if b == nil { + return "", false + } + switch strings.ToLower(strings.TrimSpace(b.Effort)) { + case "low": + return "low", true + case "minimal": // "minimal" is not in the Anthropic API; map to closest + return "low", true + case "medium": + return "medium", true + case "high": + return "high", true + case "max": + return "max", true + default: + return "", false + } +} + // anthropicContextLimit returns a reasonable default context window for Anthropic models. // We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time. func anthropicContextLimit(model string) int64 { diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go index 85e815372..135659049 100644 --- a/pkg/model/provider/bedrock/client.go +++ b/pkg/model/provider/bedrock/client.go @@ -275,16 +275,23 @@ func (c *Client) buildInferenceConfig() *types.InferenceConfiguration { return cfg } +// resolveThinkingTokens returns the effective token budget for thinking. +// It handles both explicit token counts and effort-level strings. +// Returns 0 if no valid thinking budget is configured. +func (c *Client) resolveThinkingTokens() int { + if c.ModelConfig.ThinkingBudget == nil { + return 0 + } + if tokens, ok := c.ModelConfig.ThinkingBudget.EffortTokens(); ok { + return tokens + } + return c.ModelConfig.ThinkingBudget.Tokens +} + // isThinkingEnabled mirrors the validation in buildAdditionalModelRequestFields // to determine if thinking params will affect inference config (temp/topP constraints). func (c *Client) isThinkingEnabled() bool { - if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 { - return false - } - - tokens := c.ModelConfig.ThinkingBudget.Tokens - - // Check minimum (Claude requires at least 1024 tokens for thinking) + tokens := c.resolveThinkingTokens() if tokens < 1024 { return false } @@ -310,12 +317,11 @@ func (c *Client) promptCachingEnabled() bool { // buildAdditionalModelRequestFields configures Claude's extended thinking (reasoning) mode. func (c *Client) buildAdditionalModelRequestFields() document.Interface { - if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 { + tokens := c.resolveThinkingTokens() + if tokens <= 0 { return nil } - tokens := c.ModelConfig.ThinkingBudget.Tokens - // Validate minimum (Claude requires at least 1024 tokens for thinking) if tokens < 1024 { slog.Warn("Bedrock thinking_budget below minimum (1024), ignoring", diff --git a/pkg/model/provider/custom_provider_test.go b/pkg/model/provider/custom_provider_test.go index cc8b9aa8c..5aa276cab 100644 --- a/pkg/model/provider/custom_provider_test.go +++ b/pkg/model/provider/custom_provider_test.go @@ -472,7 +472,7 @@ func TestResolveProviderTypeFromConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - assert.Equal(t, tt.expected, resolveProviderTypeFromConfig(tt.config)) + assert.Equal(t, tt.expected, resolveProviderType(tt.config)) }) } } diff --git a/pkg/model/provider/model_defaults_test.go b/pkg/model/provider/model_defaults_test.go index a1524b586..f55f0bf2e 100644 --- a/pkg/model/provider/model_defaults_test.go +++ b/pkg/model/provider/model_defaults_test.go @@ -1,7 +1,6 @@ package provider import ( - "maps" "testing" "github.com/stretchr/testify/assert" @@ -10,452 +9,143 @@ import ( "github.com/docker/docker-agent/pkg/config/latest" ) -// TestApplyModelDefaults_OpenAI tests that OpenAI models get the correct default thinking_budget. -func TestApplyModelDefaults_OpenAI(t *testing.T) { +func TestApplyModelDefaults(t *testing.T) { t.Parallel() + boolPtr := func(v bool) *bool { return &v } + tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectProviderOptsKeys []string + name string + config *latest.ModelConfig + wantBudget *latest.ThinkingBudget // nil means no thinking + wantInterleaved *bool // nil means key must not exist }{ + // --- OpenAI: only o-series gets defaults --- { - name: "openai provider gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/gpt-4o: no default thinking", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, }, { - name: "openai_chatcompletions api_type gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "custom", - Model: "custom-model", - ProviderOpts: map[string]any{"api_type": "openai_chatcompletions"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/gpt-5: no default thinking", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-5"}, }, { - name: "openai_responses api_type gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "custom", - Model: "custom-model", - ProviderOpts: map[string]any{"api_type": "openai_responses"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/o3-mini: thinking-only model gets default", + config: &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "mistral alias (openai) gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "mistral", - Model: "mistral-large-latest", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/o1: thinking-only model gets default", + config: &latest.ModelConfig{Provider: "openai", Model: "o1"}, + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "xai alias (openai) gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "xai", - Model: "grok-2", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/o4-mini: thinking-only model gets default", + config: &latest.ModelConfig{Provider: "openai", Model: "o4-mini"}, + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "explicit thinking_budget is preserved", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, + name: "openai/o3-mini: explicit budget overrides default", + config: &latest.ModelConfig{Provider: "openai", Model: "o3-mini", ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}}, + wantBudget: &latest.ThinkingBudget{Effort: "high"}, }, { - name: "explicit thinking_budget with tokens is preserved", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 5000}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 5000}, + name: "openai/gpt-4o: explicit budget preserved", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}}, + wantBudget: &latest.ThinkingBudget{Effort: "high"}, }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Apply defaults - applyModelDefaults(tt.config) - - // Verify thinking budget - require.NotNil(t, tt.config.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Effort, tt.config.ThinkingBudget.Effort, "Effort should match") - assert.Equal(t, tt.expectThinkingBudget.Tokens, tt.config.ThinkingBudget.Tokens, "Tokens should match") - }) - } -} - -// TestApplyModelDefaults_Anthropic tests that Anthropic models get the correct defaults. -func TestApplyModelDefaults_Anthropic(t *testing.T) { - t.Parallel() - tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking bool - expectExplicitInterleaved bool // true if we expect an explicit value in ProviderOpts - }{ - { - name: "anthropic provider gets 8192 thinking_budget default", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, - }, - { - name: "anthropic provider with no initial ProviderOpts", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-opus-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, - }, + // --- Aliases (resolve to openai) — no default thinking --- { - name: "explicit thinking_budget is preserved", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "mistral: no default thinking", + config: &latest.ModelConfig{Provider: "mistral", Model: "mistral-large-latest"}, }, { - name: "explicit interleaved_thinking false is preserved", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ProviderOpts: map[string]any{"interleaved_thinking": false}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: false, - expectExplicitInterleaved: true, + name: "xai: no default thinking", + config: &latest.ModelConfig{Provider: "xai", Model: "grok-2"}, }, { - name: "explicit interleaved_thinking true is preserved", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ProviderOpts: map[string]any{"interleaved_thinking": true}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, - }, - { - name: "existing ProviderOpts are preserved", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ProviderOpts: map[string]any{"some_other_option": "value"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "custom openai_chatcompletions: no default thinking", + config: &latest.ModelConfig{Provider: "custom", Model: "custom-model", ProviderOpts: map[string]any{"api_type": "openai_chatcompletions"}}, }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Save original ProviderOpts keys to check preservation - originalOpts := make(map[string]any) - if tt.config.ProviderOpts != nil { - maps.Copy(originalOpts, tt.config.ProviderOpts) - } - // Apply defaults - applyModelDefaults(tt.config) - - // Verify thinking budget - require.NotNil(t, tt.config.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Tokens, tt.config.ThinkingBudget.Tokens, "Tokens should match") - - // Verify interleaved_thinking - if tt.expectExplicitInterleaved { - require.NotNil(t, tt.config.ProviderOpts, "ProviderOpts should be set") - val, exists := tt.config.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should be set in ProviderOpts") - assert.Equal(t, tt.expectInterleavedThinking, val, "interleaved_thinking should match expected value") - } - - // Verify original ProviderOpts are preserved - for k, v := range originalOpts { - if k != "interleaved_thinking" { - assert.Equal(t, v, tt.config.ProviderOpts[k], "original ProviderOpts key %s should be preserved", k) - } - } - }) - } -} - -// TestApplyModelDefaults_Google tests that Google Gemini models get the correct defaults. -func TestApplyModelDefaults_Google(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectNoDefault bool // true if no default should be applied - }{ - { - name: "gemini-2.5-flash gets dynamic thinking default (-1)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "gemini-2.5-pro gets dynamic thinking default (-1)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "gemini-2.5-flash-lite gets dynamic thinking default (-1)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash-lite", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "gemini-3-pro gets high thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "gemini-3-pro-preview gets high thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro-preview", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "gemini-3.1-pro-preview gets high thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3.1-pro-preview", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "gemini-3-flash gets medium thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-flash", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, + // --- Anthropic: no default, but interleaved_thinking when budget set --- { - name: "gemini-3-flash-preview gets medium thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-flash-preview", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "anthropic: no default thinking", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, }, { - name: "gemini-3.1-flash-preview gets medium thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3.1-flash-preview", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "anthropic: explicit budget enables interleaved_thinking", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}}, + wantBudget: &latest.ThinkingBudget{Tokens: 16384}, + wantInterleaved: boolPtr(true), }, { - name: "gemini-2.0-flash is not affected (old model)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.0-flash", - }, - expectNoDefault: true, + name: "anthropic: adaptive budget enables interleaved_thinking", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-opus-4-6", ThinkingBudget: &latest.ThinkingBudget{Effort: "adaptive"}}, + wantBudget: &latest.ThinkingBudget{Effort: "adaptive"}, + wantInterleaved: boolPtr(true), }, { - name: "gemini-1.5-pro is not affected (old model)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-1.5-pro", - }, - expectNoDefault: true, + name: "anthropic: explicit interleaved_thinking=false is preserved", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, ProviderOpts: map[string]any{"interleaved_thinking": false}}, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(false), }, + + // --- Google: no default thinking --- { - name: "explicit thinking_budget is preserved for gemini-2.5", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + name: "google/gemini-2.5-flash: no default thinking", + config: &latest.ModelConfig{Provider: "google", Model: "gemini-2.5-flash"}, }, { - name: "explicit thinking_budget is preserved for gemini-3", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - ThinkingBudget: &latest.ThinkingBudget{Effort: "low"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "low"}, + name: "google/gemini-3-pro: no default thinking", + config: &latest.ModelConfig{Provider: "google", Model: "gemini-3-pro"}, }, { - name: "thinking_budget 0 disables thinking completely (nil)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectNoDefault: true, // thinking_budget: 0 means disable thinking entirely + name: "google: explicit budget preserved", + config: &latest.ModelConfig{Provider: "google", Model: "gemini-2.5-flash", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Apply defaults - applyModelDefaults(tt.config) - - if tt.expectNoDefault { - assert.Nil(t, tt.config.ThinkingBudget, "ThinkingBudget should not be set for old Gemini model") - return - } - - // Verify thinking budget - require.NotNil(t, tt.config.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Effort, tt.config.ThinkingBudget.Effort, "Effort should match") - assert.Equal(t, tt.expectThinkingBudget.Tokens, tt.config.ThinkingBudget.Tokens, "Tokens should match") - }) - } -} - -// TestApplyModelDefaults_Bedrock tests that Amazon Bedrock Claude models get the correct defaults. -func TestApplyModelDefaults_Bedrock(t *testing.T) { - t.Parallel() - tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking bool - expectExplicitInterleaved bool // true if we expect an explicit value in ProviderOpts - expectNoDefault bool // true if no default should be applied - }{ - { - name: "bedrock claude model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, - }, + // --- Bedrock: no default thinking, interleaved_thinking when budget set on Claude --- { - name: "bedrock claude-sonnet-4 model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-sonnet-4-20250514-v1:0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "bedrock claude: no default thinking", + config: &latest.ModelConfig{Provider: "amazon-bedrock", Model: "anthropic.claude-3-sonnet"}, }, { - name: "bedrock global claude model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "bedrock global claude: no default thinking", + config: &latest.ModelConfig{Provider: "amazon-bedrock", Model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0"}, }, { - name: "bedrock claude opus model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-opus-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "bedrock claude: explicit budget enables interleaved_thinking", + config: &latest.ModelConfig{Provider: "amazon-bedrock", Model: "anthropic.claude-3-sonnet", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, { - name: "bedrock non-claude model is not affected", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "amazon.titan-text-express-v1", - }, - expectNoDefault: true, + name: "bedrock non-claude: not affected", + config: &latest.ModelConfig{Provider: "amazon-bedrock", Model: "amazon.titan-text-express-v1"}, }, + + // --- Disabled thinking normalised to nil --- { - name: "bedrock mistral model is not affected", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "mistral.mistral-large-latest", - }, - expectNoDefault: true, + name: "thinking_budget: 0 becomes nil", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}}, }, { - name: "explicit thinking_budget is preserved", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "thinking_budget: none becomes nil", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}}, }, + + // --- Unknown / other providers: no effect --- { - name: "explicit interleaved_thinking false is preserved", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - ProviderOpts: map[string]any{"interleaved_thinking": false}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: false, - expectExplicitInterleaved: true, + name: "unknown provider: no effect", + config: &latest.ModelConfig{Provider: "unknown", Model: "some-model"}, }, { - name: "existing ProviderOpts are preserved", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - ProviderOpts: map[string]any{"region": "us-west-2"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "dmr: no effect", + config: &latest.ModelConfig{Provider: "dmr", Model: "ai/llama3.2"}, }, } @@ -463,201 +153,68 @@ func TestApplyModelDefaults_Bedrock(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - // Save original ProviderOpts keys to check preservation - originalOpts := make(map[string]any) - if tt.config.ProviderOpts != nil { - maps.Copy(originalOpts, tt.config.ProviderOpts) - } - - // Apply defaults applyModelDefaults(tt.config) - if tt.expectNoDefault { - assert.Nil(t, tt.config.ThinkingBudget, "ThinkingBudget should not be set for non-Claude Bedrock model") - if tt.config.ProviderOpts != nil { - _, exists := tt.config.ProviderOpts["interleaved_thinking"] - assert.False(t, exists, "interleaved_thinking should not be set for non-Claude Bedrock model") - } - return - } - - // Verify thinking budget - require.NotNil(t, tt.config.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Tokens, tt.config.ThinkingBudget.Tokens, "Tokens should match") - - // Verify interleaved_thinking - if tt.expectExplicitInterleaved { - require.NotNil(t, tt.config.ProviderOpts, "ProviderOpts should be set") - val, exists := tt.config.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should be set in ProviderOpts") - assert.Equal(t, tt.expectInterleavedThinking, val, "interleaved_thinking should match expected value") + // Check thinking budget. + if tt.wantBudget == nil { + assert.Nil(t, tt.config.ThinkingBudget) + } else { + require.NotNil(t, tt.config.ThinkingBudget) + assert.Equal(t, *tt.wantBudget, *tt.config.ThinkingBudget) } - // Verify original ProviderOpts are preserved - for k, v := range originalOpts { - if k != "interleaved_thinking" { - assert.Equal(t, v, tt.config.ProviderOpts[k], "original ProviderOpts key %s should be preserved", k) + // Check interleaved_thinking. + if tt.wantInterleaved == nil { + if tt.config.ProviderOpts != nil { + _, exists := tt.config.ProviderOpts["interleaved_thinking"] + assert.False(t, exists, "interleaved_thinking should not be set") } + } else { + require.NotNil(t, tt.config.ProviderOpts) + assert.Equal(t, *tt.wantInterleaved, tt.config.ProviderOpts["interleaved_thinking"]) } }) } } -// TestApplyModelDefaults_NonAffectedProviders tests that other providers are not affected. -func TestApplyModelDefaults_NonAffectedProviders(t *testing.T) { +func TestApplyProviderDefaults(t *testing.T) { t.Parallel() - tests := []struct { - name string - config *latest.ModelConfig - }{ - { - name: "google gemini-2.0-flash is not affected (old model)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.0-flash", - }, - }, - { - name: "dmr provider is not affected", - config: &latest.ModelConfig{ - Provider: "dmr", - Model: "ai/llama3.2", - }, - }, - { - name: "amazon-bedrock non-claude model is not affected", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "amazon.titan-text-express-v1", - }, - }, - { - name: "unknown provider is not affected", - config: &latest.ModelConfig{ - Provider: "unknown", - Model: "some-model", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Apply defaults - applyModelDefaults(tt.config) - - // Verify thinking_budget is NOT set - assert.Nil(t, tt.config.ThinkingBudget, "ThinkingBudget should not be set for non-affected provider") - - // Verify interleaved_thinking is NOT set - if tt.config.ProviderOpts != nil { - _, exists := tt.config.ProviderOpts["interleaved_thinking"] - assert.False(t, exists, "interleaved_thinking should not be set for non-affected provider") - } - }) - } -} - -// TestApplyProviderDefaults_IncludesModelDefaults tests that applyProviderDefaults -// also applies model-specific defaults via applyModelDefaults. -func TestApplyProviderDefaults_IncludesModelDefaults(t *testing.T) { - t.Parallel() + boolPtr := func(v bool) *bool { return &v } tests := []struct { - name string - config *latest.ModelConfig - customProviders map[string]latest.ProviderConfig - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking *bool + name string + config *latest.ModelConfig + customProviders map[string]latest.ProviderConfig + wantBudget *latest.ThinkingBudget + wantInterleaved *bool }{ { - name: "openai model from config gets defaults", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "anthropic model from config gets defaults", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), - }, - { - name: "google gemini-2.5 model gets defaults", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "google gemini-3-pro model gets defaults", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "google gemini-3-flash model gets defaults", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-flash", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai o3-mini: thinking-only gets default through provider defaults", + config: &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "bedrock claude model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), + name: "openai gpt-4o: no default through provider defaults", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, }, { - name: "bedrock global claude model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), + name: "anthropic with explicit budget gets interleaved through provider defaults", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, { - name: "custom provider with openai api_type gets openai defaults", - config: &latest.ModelConfig{ - Provider: "my_gateway", - Model: "gpt-4o", - }, + name: "custom openai provider: no default thinking", + config: &latest.ModelConfig{Provider: "my_gateway", Model: "gpt-4o"}, customProviders: map[string]latest.ProviderConfig{ - "my_gateway": { - APIType: "openai_chatcompletions", - BaseURL: "https://api.example.com/v1", - TokenKey: "MY_KEY", - }, + "my_gateway": {APIType: "openai_chatcompletions", BaseURL: "https://api.example.com/v1", TokenKey: "MY_KEY"}, }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "custom provider with anthropic api_type gets anthropic defaults", - config: &latest.ModelConfig{ - Provider: "my_anthropic_gateway", - Model: "claude-sonnet-4-0", - ProviderOpts: map[string]any{ - "api_type": "anthropic", - }, - }, - customProviders: nil, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), + name: "explicit thinking preserved unchanged", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}}, + wantBudget: &latest.ThinkingBudget{Effort: "high"}, }, } @@ -667,153 +224,42 @@ func TestApplyProviderDefaults_IncludesModelDefaults(t *testing.T) { result := applyProviderDefaults(tt.config, tt.customProviders) - // Verify thinking budget - if tt.expectThinkingBudget != nil { - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort, "Effort should match") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens, "Tokens should match") + if tt.wantBudget == nil { + assert.Nil(t, result.ThinkingBudget) + } else { + require.NotNil(t, result.ThinkingBudget) + assert.Equal(t, *tt.wantBudget, *result.ThinkingBudget) } - // Verify interleaved_thinking for Anthropic - if tt.expectInterleavedThinking != nil { - require.NotNil(t, result.ProviderOpts, "ProviderOpts should be set") - val, exists := result.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should be set") - assert.Equal(t, *tt.expectInterleavedThinking, val, "interleaved_thinking should match") - } - }) - } -} - -// TestApplyProviderDefaults_ThinkingDefaultsApplied tests that thinking defaults -// are always applied when the config doesn't have an explicit thinking budget. -func TestApplyProviderDefaults_ThinkingDefaultsApplied(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking bool - }{ - { - name: "OpenAI gets default thinking_budget", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "Anthropic gets default thinking_budget and interleaved_thinking", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - }, - { - name: "Google Gemini 2.5 gets default thinking_budget", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "Google Gemini 3 Pro gets default thinking_budget", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "Bedrock Claude gets default thinking_budget and interleaved_thinking", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Apply provider defaults - result := applyProviderDefaults(tt.config, nil) - - // Verify default thinking budget was applied - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort, "Effort should match") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens, "Tokens should match") - - // Verify interleaved_thinking for Anthropic/Bedrock - if tt.expectInterleavedThinking { - require.NotNil(t, result.ProviderOpts, "ProviderOpts should be set") - val, exists := result.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should be set") - assert.Equal(t, true, val, "interleaved_thinking should be true") + if tt.wantInterleaved != nil { + require.NotNil(t, result.ProviderOpts) + assert.Equal(t, *tt.wantInterleaved, result.ProviderOpts["interleaved_thinking"]) } }) } } -// TestApplyProviderDefaults_ExplicitThinkingPreserved tests that explicitly set -// thinking options are preserved and not overwritten by defaults. -func TestApplyProviderDefaults_ExplicitThinkingPreserved(t *testing.T) { - t.Parallel() - - config := &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - } - - result := applyProviderDefaults(config, nil) - - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be preserved") - assert.Equal(t, "high", result.ThinkingBudget.Effort, "Effort should be preserved") -} - -// TestApplyProviderDefaults_DisabledThinkingBecomesNil tests that explicitly disabled -// thinking (thinking_budget: 0 or thinking_budget: none) results in nil ThinkingBudget. -func TestApplyProviderDefaults_DisabledThinkingBecomesNil(t *testing.T) { +func TestIsOpenAIThinkingOnlyModel(t *testing.T) { t.Parallel() - tests := []struct { - name string - config *latest.ModelConfig + for _, tt := range []struct { + model string + want bool }{ - { - name: "thinking_budget 0 becomes nil", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - }, - { - name: "thinking_budget none becomes nil", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { + {"o1", true}, + {"o1-preview", true}, + {"o1-mini", true}, + {"o3", true}, + {"o3-mini", true}, + {"o4-mini", true}, + {"gpt-4o", false}, + {"gpt-4.1", false}, + {"gpt-5", false}, + {"custom-model", false}, + } { + t.Run(tt.model, func(t *testing.T) { t.Parallel() - - result := applyProviderDefaults(tt.config, nil) - - assert.Nil(t, result.ThinkingBudget, "ThinkingBudget should be nil when explicitly disabled") + assert.Equal(t, tt.want, isOpenAIThinkingOnlyModel(tt.model)) }) } } diff --git a/pkg/model/provider/override_test.go b/pkg/model/provider/override_test.go index 3da941c7d..5839f8e67 100644 --- a/pkg/model/provider/override_test.go +++ b/pkg/model/provider/override_test.go @@ -10,127 +10,79 @@ import ( "github.com/docker/docker-agent/pkg/model/provider/options" ) -// TestApplyOverrides_Thinking tests that applyOverrides correctly clears -// thinking configuration when Thinking is set to false (disabled). -func TestApplyOverrides_Thinking(t *testing.T) { +func TestApplyOverrides(t *testing.T) { t.Parallel() + boolPtr := func(v bool) *bool { return &v } + tests := []struct { - name string - config *latest.ModelConfig - thinkingEnabled *bool // nil means no override, true means enabled, false means disabled - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking *bool // nil means key should not exist + name string + config *latest.ModelConfig + thinking *bool // nil = no override + wantBudget *latest.ThinkingBudget + wantInterleaved *bool // nil = key must not exist }{ + // --- Disable clears everything --- { - name: "clears explicit thinking_budget when disabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - thinkingEnabled: new(false), - expectThinkingBudget: nil, - }, - { - name: "clears interleaved_thinking when disabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - ProviderOpts: map[string]any{"interleaved_thinking": true}, - }, - thinkingEnabled: new(false), - expectThinkingBudget: nil, - expectInterleavedThinking: nil, // key should be removed + name: "disable: clears thinking_budget", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + thinking: boolPtr(false), }, { - name: "preserves thinking_budget when enabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - thinkingEnabled: new(true), - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + name: "disable: clears interleaved_thinking", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, ProviderOpts: map[string]any{"interleaved_thinking": true}}, + thinking: boolPtr(false), }, + + // --- Enable preserves existing budget --- { - name: "preserves interleaved_thinking when enabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"interleaved_thinking": true}, - }, - thinkingEnabled: new(true), - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), + name: "enable: preserves existing budget", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, }, { - name: "preserves other ProviderOpts when clearing thinking", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{ - "interleaved_thinking": true, - "other_option": "preserved", - }, - }, - thinkingEnabled: new(false), - expectThinkingBudget: nil, + name: "enable: preserves existing budget + interleaved", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, ProviderOpts: map[string]any{"interleaved_thinking": true}}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, + + // --- Enable applies defaults when no budget --- { - name: "nil options is a no-op", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - thinkingEnabled: nil, // Will pass nil opts - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + name: "enable: OpenAI gets medium default", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "applies defaults when enabled and ThinkingBudget is nil (OpenAI)", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: nil, // No thinking configured - }, - thinkingEnabled: new(true), - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, // OpenAI default + name: "enable: Anthropic gets 8192 + interleaved", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, { - name: "applies defaults when enabled and ThinkingBudget is nil (Anthropic)", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: nil, // No thinking configured - }, - thinkingEnabled: new(true), - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, // Anthropic default - expectInterleavedThinking: new(true), // Anthropic default + name: "enable: restores from tokens=0", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "restores defaults when /think used with tokens=0", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, // User had thinking disabled - }, - thinkingEnabled: new(true), // User runs /think - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, // Apply OpenAI default + name: "enable: restores from effort=none", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, + + // --- No override = no-op --- { - name: "restores defaults when /think used with effort=none", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}, // User had thinking disabled - }, - thinkingEnabled: new(true), // User runs /think - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, // Apply Anthropic default - expectInterleavedThinking: new(true), + name: "nil opts: config unchanged", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + thinking: nil, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, }, } @@ -138,370 +90,178 @@ func TestApplyOverrides_Thinking(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - // Build options var opts *options.ModelOptions - if tt.thinkingEnabled != nil { - mo := options.ModelOptions{} - options.WithThinking(*tt.thinkingEnabled)(&mo) - opts = &mo + if tt.thinking != nil { + o := options.ModelOptions{} + options.WithThinking(*tt.thinking)(&o) + opts = &o } - // Save original other options for preservation check - var originalOtherOpts map[string]any - if tt.config.ProviderOpts != nil { - originalOtherOpts = make(map[string]any) - for k, v := range tt.config.ProviderOpts { - if k != "interleaved_thinking" { - originalOtherOpts[k] = v - } - } - } - - // Apply overrides result := applyOverrides(tt.config, opts) - // Verify thinking budget - if tt.expectThinkingBudget == nil { - assert.Nil(t, result.ThinkingBudget, "ThinkingBudget should be nil") + // Budget + if tt.wantBudget == nil { + assert.Nil(t, result.ThinkingBudget) } else { - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens) - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort) + require.NotNil(t, result.ThinkingBudget) + assert.Equal(t, *tt.wantBudget, *result.ThinkingBudget) } - // Verify interleaved_thinking - if tt.expectInterleavedThinking == nil && tt.thinkingEnabled != nil && !*tt.thinkingEnabled { - // Key should be removed when thinking is disabled + // interleaved_thinking + if tt.wantInterleaved == nil && tt.thinking != nil && !*tt.thinking { if result.ProviderOpts != nil { _, exists := result.ProviderOpts["interleaved_thinking"] assert.False(t, exists, "interleaved_thinking should be removed") } - } else if tt.expectInterleavedThinking != nil { + } else if tt.wantInterleaved != nil { require.NotNil(t, result.ProviderOpts) - val, exists := result.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should exist") - assert.Equal(t, *tt.expectInterleavedThinking, val) - } - - // Verify other ProviderOpts are preserved - for k, v := range originalOtherOpts { - require.NotNil(t, result.ProviderOpts, "ProviderOpts should exist for preserved keys") - assert.Equal(t, v, result.ProviderOpts[k], "other ProviderOpts key %s should be preserved", k) + assert.Equal(t, *tt.wantInterleaved, result.ProviderOpts["interleaved_thinking"]) } }) } } -// TestApplyOverrides_AllProviders tests that thinking override works for all providers. -func TestApplyOverrides_AllProviders(t *testing.T) { +// TestApplyOverrides_DoesNotModifyOriginal verifies that applyOverrides creates +// a proper copy: neither the struct fields, the ProviderOpts map, nor the +// ThinkingBudget pointer of the original config are mutated. +func TestApplyOverrides_DoesNotModifyOriginal(t *testing.T) { t.Parallel() - providers := []struct { - name string - provider string - model string - }{ - {"OpenAI", "openai", "gpt-4o"}, - {"Anthropic", "anthropic", "claude-sonnet-4-0"}, - {"Google", "google", "gemini-2.5-flash"}, - {"Bedrock Claude", "amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0"}, - {"Mistral (alias)", "mistral", "mistral-large-latest"}, - {"xAI (alias)", "xai", "grok-2"}, + original := &latest.ModelConfig{ + Provider: "anthropic", + Model: "claude-sonnet-4-0", + ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + ProviderOpts: map[string]any{"interleaved_thinking": true, "other": "value"}, } - for _, p := range providers { - t.Run(p.name, func(t *testing.T) { - t.Parallel() - - // Create config with thinking budget - config := &latest.ModelConfig{ - Provider: p.provider, - Model: p.model, - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - } + o := options.ModelOptions{} + options.WithThinking(false)(&o) + result := applyOverrides(original, &o) - // Apply override with thinking disabled - mo := options.ModelOptions{} - options.WithThinking(false)(&mo) - result := applyOverrides(config, &mo) - - // Thinking should be cleared for all providers - assert.Nil(t, result.ThinkingBudget, - "ThinkingBudget should be cleared for provider %s", p.provider) - }) - } -} - -// TestDefaultsThenOverrides tests the full flow: defaults applied first, then overrides. -func TestDefaultsThenOverrides(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - config *latest.ModelConfig - thinkingEnabled bool - expectThinkingBudget *latest.ThinkingBudget - }{ - { - name: "OpenAI: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - // No ThinkingBudget set - defaults will apply - }, - thinkingEnabled: false, - expectThinkingBudget: nil, // Override clears the default - }, - { - name: "OpenAI: defaults applied, preserved when enabled", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - }, - thinkingEnabled: true, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, // Default preserved - }, - { - name: "Anthropic: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - { - name: "Anthropic: defaults applied, preserved when enabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - thinkingEnabled: true, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - { - name: "Google Gemini 2.5: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - { - name: "Google Gemini 3 Pro: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - { - name: "Bedrock Claude: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - { - name: "Explicit budget cleared by override", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 32000}, // Explicit - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() + // Result should have thinking cleared. + assert.Nil(t, result.ThinkingBudget, "result ThinkingBudget should be nil") - // Step 1: Apply defaults (simulating createDirectProvider flow) - result := applyProviderDefaults(tt.config, nil) + // Original ThinkingBudget must be untouched. + require.NotNil(t, original.ThinkingBudget, "original ThinkingBudget must survive") + assert.Equal(t, 8192, original.ThinkingBudget.Tokens) - // Step 2: Apply overrides - mo := options.ModelOptions{} - options.WithThinking(tt.thinkingEnabled)(&mo) - result = applyOverrides(result, &mo) + // Original ProviderOpts map must still have interleaved_thinking. + val, exists := original.ProviderOpts["interleaved_thinking"] + require.True(t, exists, "original ProviderOpts must still contain interleaved_thinking") + assert.Equal(t, true, val) - // Verify result - if tt.expectThinkingBudget == nil { - assert.Nil(t, result.ThinkingBudget, "ThinkingBudget should be nil after override") - } else { - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens) - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort) - } - }) - } + // Other keys must survive in both original and result. + assert.Equal(t, "value", original.ProviderOpts["other"]) + require.NotNil(t, result.ProviderOpts) + assert.Equal(t, "value", result.ProviderOpts["other"]) } -// TestApplyOverrides_NilOpts tests that nil options returns config unchanged. -func TestApplyOverrides_NilOpts(t *testing.T) { +// TestApplyOverrides_DisablePreservesOtherProviderOpts verifies that disabling +// thinking only removes "interleaved_thinking" and leaves other keys intact. +func TestApplyOverrides_DisablePreservesOtherProviderOpts(t *testing.T) { t.Parallel() config := &latest.ModelConfig{ Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"interleaved_thinking": true}, + ProviderOpts: map[string]any{"interleaved_thinking": true, "custom_key": "preserved"}, } - result := applyOverrides(config, nil) + o := options.ModelOptions{} + options.WithThinking(false)(&o) + result := applyOverrides(config, &o) - // Should be unchanged - require.NotNil(t, result.ThinkingBudget) - assert.Equal(t, 8192, result.ThinkingBudget.Tokens) - assert.Equal(t, true, result.ProviderOpts["interleaved_thinking"]) -} + // Thinking should be cleared. + assert.Nil(t, result.ThinkingBudget) -// TestApplyOverrides_DoesNotModifyOriginal tests that applyOverrides creates a copy. -func TestApplyOverrides_DoesNotModifyOriginal(t *testing.T) { - t.Parallel() + // interleaved_thinking should be removed. + _, exists := result.ProviderOpts["interleaved_thinking"] + assert.False(t, exists, "interleaved_thinking should be removed from result") - original := &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"interleaved_thinking": true}, - } - - mo := options.ModelOptions{} - options.WithThinking(false)(&mo) - result := applyOverrides(original, &mo) - - // Original should be unchanged - require.NotNil(t, original.ThinkingBudget, "Original ThinkingBudget should be unchanged") - assert.Equal(t, 8192, original.ThinkingBudget.Tokens) - - // Result should have changes - assert.Nil(t, result.ThinkingBudget, "Result ThinkingBudget should be nil") + // Other keys must survive. + assert.Equal(t, "preserved", result.ProviderOpts["custom_key"]) } -// TestApplyOverrides_RestoresDefaultsFromDisabled tests that using /think when -// the config has thinking explicitly disabled (Tokens=0 or Effort="none") applies -// provider defaults. This is the key behavior that makes /think work when YAML -// starts with thinking_budget: 0 or thinking_budget: none. -// -// Note: applyProviderDefaults now converts disabled thinking (Tokens=0 or Effort="none") -// to nil ThinkingBudget. The /think command (applyOverrides with Thinking=true) then -// applies provider defaults since ThinkingBudget is nil. -func TestApplyOverrides_RestoresDefaultsFromDisabled(t *testing.T) { +// TestDefaultsThenOverrides tests the complete flow: provider defaults → overrides. +func TestDefaultsThenOverrides(t *testing.T) { t.Parallel() tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget + name string + config *latest.ModelConfig + thinking bool + wantBudget *latest.ThinkingBudget }{ - { - name: "Anthropic: /think with Tokens=0 applies default 8192", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - { - name: "Anthropic: /think with Effort=none applies default 8192", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - { - name: "OpenAI: /think with Tokens=0 applies default medium", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "OpenAI: /think with Effort=none applies default medium", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "Gemini 2.5: /think with Tokens=0 applies default -1 (dynamic)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "Bedrock Claude: /think with Tokens=0 applies default 8192", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, + // Disable on models without defaults — already nil, stays nil. + {"gpt-4o /think off", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, false, nil}, + {"anthropic /think off", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, false, nil}, + + // Enable on models without defaults — applies provider defaults. + {"gpt-4o /think on", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, true, &latest.ThinkingBudget{Effort: "medium"}}, + {"anthropic /think on", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, true, &latest.ThinkingBudget{Tokens: 8192}}, + {"gemini-2.5 /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-2.5-flash"}, true, &latest.ThinkingBudget{Tokens: -1}}, + {"gemini-3-pro /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-3-pro"}, true, &latest.ThinkingBudget{Effort: "high"}}, + {"gemini-3-flash /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-3-flash"}, true, &latest.ThinkingBudget{Effort: "medium"}}, + {"bedrock claude /think on", &latest.ModelConfig{Provider: "amazon-bedrock", Model: "anthropic.claude-3-sonnet"}, true, &latest.ThinkingBudget{Tokens: 8192}}, + + // Old Gemini model that doesn't support thinking — /think should be a no-op. + {"gemini-2.0 /think on (no thinking support)", &latest.ModelConfig{Provider: "google", Model: "gemini-2.0-flash"}, true, nil}, + + // Thinking-only model defaults preserved when enabled, cleared when disabled. + {"o3-mini /think on", &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, true, &latest.ThinkingBudget{Effort: "medium"}}, + {"o3-mini /think off", &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, false, nil}, + + // Explicit budget cleared by disable. + {"explicit cleared", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 32000}}, false, nil}, + + // Restore from disabled (thinking_budget: 0) via /think on. + {"restore from 0", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}}, true, &latest.ThinkingBudget{Effort: "medium"}}, + {"restore from none", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}}, true, &latest.ThinkingBudget{Tokens: 8192}}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - // Step 1: Apply provider defaults (simulating createDirectProvider flow) - // This now converts disabled thinking (Tokens=0 or Effort="none") to nil result := applyProviderDefaults(tt.config, nil) - // Verify thinking is disabled (nil) after provider defaults - assert.Nil(t, result.ThinkingBudget, - "ThinkingBudget should be nil after applyProviderDefaults when explicitly disabled") + o := options.ModelOptions{} + options.WithThinking(tt.thinking)(&o) + result = applyOverrides(result, &o) - // Step 2: Apply override with thinking explicitly enabled (simulates /think toggle) - mo := options.ModelOptions{} - options.WithThinking(true)(&mo) - result = applyOverrides(result, &mo) - - // Verify defaults were applied - /think enables thinking with provider defaults - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set after /think") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens, "Tokens should match default") - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort, "Effort should match default") + if tt.wantBudget == nil { + assert.Nil(t, result.ThinkingBudget) + } else { + require.NotNil(t, result.ThinkingBudget) + assert.Equal(t, *tt.wantBudget, *result.ThinkingBudget) + } }) } } -// TestIsThinkingBudgetDisabled tests the helper function. -func TestIsThinkingBudgetDisabled(t *testing.T) { +// TestApplyProviderDefaults_DoesNotModifyOriginal verifies that applyProviderDefaults +// does not mutate the input config's ProviderOpts map. +func TestApplyProviderDefaults_DoesNotModifyOriginal(t *testing.T) { t.Parallel() - tests := []struct { - name string - budget *latest.ThinkingBudget - expected bool - }{ - {"nil budget", nil, false}, - {"Tokens=0", &latest.ThinkingBudget{Tokens: 0}, true}, - {"Effort=none", &latest.ThinkingBudget{Effort: "none"}, true}, - {"Tokens=8192", &latest.ThinkingBudget{Tokens: 8192}, false}, - {"Effort=medium", &latest.ThinkingBudget{Effort: "medium"}, false}, - {"Tokens=-1 (dynamic)", &latest.ThinkingBudget{Tokens: -1}, false}, + original := &latest.ModelConfig{ + Provider: "anthropic", + Model: "claude-sonnet-4-0", + ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + ProviderOpts: map[string]any{"custom_key": "original_value"}, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - assert.Equal(t, tt.expected, tt.budget.IsDisabled()) - }) - } + result := applyProviderDefaults(original, nil) + + // Result should have interleaved_thinking set (because thinking_budget is set). + require.NotNil(t, result.ProviderOpts) + assert.Equal(t, true, result.ProviderOpts["interleaved_thinking"]) + + // Original must NOT have interleaved_thinking added. + _, exists := original.ProviderOpts["interleaved_thinking"] + assert.False(t, exists, "original ProviderOpts must not be mutated by applyProviderDefaults") + + // Original custom key must still be there. + assert.Equal(t, "original_value", original.ProviderOpts["custom_key"]) } diff --git a/pkg/model/provider/provider.go b/pkg/model/provider/provider.go index f43d49c1e..50c1ff00c 100644 --- a/pkg/model/provider/provider.go +++ b/pkg/model/provider/provider.go @@ -228,84 +228,61 @@ func createDirectProvider(ctx context.Context, cfg *latest.ModelConfig, env envi // Apply defaults from custom providers (from config) or built-in aliases enhancedCfg := applyProviderDefaults(cfg, globalOptions.Providers()) - if thinking := globalOptions.Thinking(); thinking != nil && !*thinking { - enhancedCfg.ThinkingBudget = nil - - // with thinking explicitly disabled, also remove the interleaved_thinking provider option - if enhancedCfg.ProviderOpts != nil { - // Copy to avoid mutating shared ProviderOpts in the original config - optsCopy := make(map[string]any, len(enhancedCfg.ProviderOpts)) - maps.Copy(optsCopy, enhancedCfg.ProviderOpts) - delete(optsCopy, "interleaved_thinking") - enhancedCfg.ProviderOpts = optsCopy - } - } - // Apply overrides (e.g., disable thinking if requested by session) + // Apply overrides (e.g., disable/enable thinking via /think command) enhancedCfg = applyOverrides(enhancedCfg, &globalOptions) - // Resolve the provider type with priority: - // 1. cfg.ProviderOpts["api_type"] (from custom provider or model override) - // 2. built-in alias APIType - // 3. provider name itself - providerType := resolveProviderTypeFromConfig(enhancedCfg) + providerType := resolveProviderType(enhancedCfg) switch providerType { case "openai", "openai_chatcompletions", "openai_responses": return openai.NewClient(ctx, enhancedCfg, env, opts...) - case "anthropic": return anthropic.NewClient(ctx, enhancedCfg, env, opts...) - case "google": return gemini.NewClient(ctx, enhancedCfg, env, opts...) - case "dmr": return dmr.NewClient(ctx, enhancedCfg, opts...) - case "amazon-bedrock": return bedrock.NewClient(ctx, enhancedCfg, env, opts...) - default: slog.Error("Unknown provider type", "type", providerType) return nil, fmt.Errorf("unknown provider type: %s", providerType) } } -// resolveProviderTypeFromConfig determines the provider type to use based on config. -// Priority: -// 1. cfg.ProviderOpts["api_type"] (from custom provider or model-level override) -// 2. built-in alias APIType (e.g., "mistral" -> "openai") -// 3. provider name itself (e.g., "openai", "anthropic") -func resolveProviderTypeFromConfig(cfg *latest.ModelConfig) string { - // Check for api_type in ProviderOpts (set by custom providers or model override) +// --------------------------------------------------------------------------- +// Provider-type resolution +// --------------------------------------------------------------------------- + +// resolveProviderType determines the effective API type for a config. +// Priority: ProviderOpts["api_type"] > built-in alias > provider name. +func resolveProviderType(cfg *latest.ModelConfig) string { if cfg.ProviderOpts != nil { if apiType, ok := cfg.ProviderOpts["api_type"].(string); ok && apiType != "" { - slog.Debug("Using api_type from provider config", - "provider", cfg.Provider, - "model", cfg.Model, - "api_type", apiType, - "base_url", cfg.BaseURL, - ) return apiType } } - - // Check built-in alias if alias, exists := Aliases[cfg.Provider]; exists && alias.APIType != "" { return alias.APIType } - - // Fall back to provider name return cfg.Provider } +// --------------------------------------------------------------------------- +// Provider defaults +// --------------------------------------------------------------------------- + // applyProviderDefaults applies default configuration from custom providers or built-in aliases. // Custom providers (from config) take precedence over built-in aliases. // This sets default base URLs, token keys, api_type, and model-specific defaults (like thinking budget). +// +// The returned config is a deep-enough copy: the caller's ModelConfig, ProviderOpts map, +// and ThinkingBudget pointer are never mutated. func applyProviderDefaults(cfg *latest.ModelConfig, customProviders map[string]latest.ProviderConfig) *latest.ModelConfig { - // Create a copy to avoid modifying the original - enhancedCfg := *cfg + // Create a copy to avoid modifying the original. + // cloneModelConfig also deep-copies ProviderOpts so writes are safe. + enhancedCfg := cloneModelConfig(cfg) if customProviders != nil { if providerCfg, exists := customProviders[cfg.Provider]; exists { @@ -334,8 +311,8 @@ func applyProviderDefaults(cfg *latest.ModelConfig, customProviders map[string]l enhancedCfg.ProviderOpts["api_type"] = apiType } - applyModelDefaults(&enhancedCfg) - return &enhancedCfg + applyModelDefaults(enhancedCfg) + return enhancedCfg } } @@ -352,184 +329,176 @@ func applyProviderDefaults(cfg *latest.ModelConfig, customProviders map[string]l } // Apply model-specific defaults (e.g., thinking budget for Claude/GPT models) - applyModelDefaults(&enhancedCfg) - return &enhancedCfg + applyModelDefaults(enhancedCfg) + return enhancedCfg +} + +// --------------------------------------------------------------------------- +// Thinking defaults and overrides +// --------------------------------------------------------------------------- + +// applyModelDefaults applies provider-specific default values for model configuration. +// +// Thinking defaults policy: +// - thinking_budget: 0 or thinking_budget: none → thinking is off (nil). +// - thinking_budget explicitly set to a real value → kept as-is; interleaved_thinking +// is auto-enabled for Anthropic/Bedrock-Claude. +// - thinking_budget NOT set: +// - Thinking-only models (OpenAI o-series) get "medium". +// - All other models get no thinking. +// +// NOTE: max_tokens is NOT set here; see teamloader and runtime/model_switcher. +func applyModelDefaults(cfg *latest.ModelConfig) { + // Explicitly disabled → normalise to nil so providers never see it. + if cfg.ThinkingBudget.IsDisabled() { + cfg.ThinkingBudget = nil + slog.Debug("Thinking explicitly disabled", + "provider", cfg.Provider, "model", cfg.Model) + return + } + + providerType := resolveProviderType(cfg) + + // User already set a real thinking_budget — just apply side-effects. + if cfg.ThinkingBudget != nil { + ensureInterleavedThinking(cfg, providerType) + return + } + + // No thinking_budget configured — only thinking-only models get a default. + switch providerType { + case "openai", "openai_chatcompletions", "openai_responses": + if isOpenAIThinkingOnlyModel(cfg.Model) { + cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} + slog.Debug("Applied default thinking for thinking-only OpenAI model", + "provider", cfg.Provider, "model", cfg.Model) + } + } } -// applyOverrides applies session-level or request-level overrides to the configuration. -// This is called AFTER defaults are applied, allowing overrides to clear or modify default values. +// applyOverrides applies session-level overrides to the configuration (e.g. /think toggle). +// The returned config never shares mutable state with the input. func applyOverrides(cfg *latest.ModelConfig, opts *options.ModelOptions) *latest.ModelConfig { if opts == nil { return cfg } - // Create a copy to avoid modifying the original - enhancedCfg := *cfg - t := opts.Thinking() if t == nil { - return &enhancedCfg + return cfg } - // If thinking is explicitly disabled (e.g., via /think command), clear thinking configuration + enhancedCfg := cloneModelConfig(cfg) + + // /think OFF — clear everything. if !*t { enhancedCfg.ThinkingBudget = nil - if enhancedCfg.ProviderOpts != nil { - delete(enhancedCfg.ProviderOpts, "interleaved_thinking") - } - slog.Debug("Override: thinking disabled - cleared thinking configuration", - "provider", cfg.Provider, - "model", cfg.Model, - ) - return &enhancedCfg + delete(enhancedCfg.ProviderOpts, "interleaved_thinking") + slog.Debug("Override: thinking disabled", + "provider", cfg.Provider, "model", cfg.Model) + return enhancedCfg } - // If thinking is explicitly enabled (e.g., via /think command), ensure thinking is configured. - // This handles two cases: - // 1. ThinkingBudget is nil (not configured) - apply defaults to enable thinking - // 2. ThinkingBudget is explicitly disabled (Tokens == 0 or Effort == "none") - clear and re-apply defaults - // This allows /think to enable thinking with provider defaults even when config had thinking_budget: 0 + // /think ON — make sure there is a sensible budget. if enhancedCfg.ThinkingBudget == nil || enhancedCfg.ThinkingBudget.IsDisabled() { enhancedCfg.ThinkingBudget = nil - applyModelDefaults(&enhancedCfg) - slog.Debug("Override: thinking enabled - applied default thinking configuration", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", enhancedCfg.ThinkingBudget, - ) + setThinkingDefaults(enhancedCfg) + slog.Debug("Override: thinking enabled with defaults", + "provider", cfg.Provider, "model", cfg.Model, + "thinking_budget", enhancedCfg.ThinkingBudget) } - return &enhancedCfg + return enhancedCfg } -// applyModelDefaults applies provider-specific default values for model configuration. -// These defaults are applied only if the user hasn't explicitly set the values. -// -// NOTE: max_tokens is NOT set here because: -// 1. Different providers read it differently (ModelConfig vs ModelOptions) -// 2. Runtime can do modelsdev lookups for model-specific limits -// 3. Providers have their own fallbacks (e.g., Anthropic defaults to 8192) -// max_tokens defaults are handled in teamloader and runtime/model_switcher via options. -// -// Config-level defaults (set here): -// - OpenAI: thinking_budget = "medium" -// - Anthropic: thinking_budget = 8192, interleaved_thinking = true -// - Google: Gemini 2.5 → thinking_budget = -1 (dynamic), Gemini 3 Pro → "high", Gemini 3 Flash → "medium" -// - Amazon Bedrock (Claude models only): thinking_budget = 8192, interleaved_thinking = true -func applyModelDefaults(cfg *latest.ModelConfig) { - // If thinking is explicitly disabled (thinking_budget: 0 or thinking_budget: none), - // set ThinkingBudget to nil to completely disable thinking. - // This ensures no thinking config is sent to the provider. - if cfg.ThinkingBudget.IsDisabled() { - cfg.ThinkingBudget = nil - slog.Debug("Thinking explicitly disabled via thinking_budget: 0 or none", - "provider", cfg.Provider, - "model", cfg.Model, - ) - return // Don't apply any provider defaults for thinking - } - - // Resolve the actual provider type (handling aliases like mistral -> openai) - providerType := cfg.Provider - if alias, exists := Aliases[cfg.Provider]; exists && alias.APIType != "" { - providerType = alias.APIType - } - // Also check for api_type override in ProviderOpts - if cfg.ProviderOpts != nil { - if apiType, ok := cfg.ProviderOpts["api_type"].(string); ok && apiType != "" { - providerType = apiType - } - } +// setThinkingDefaults assigns a sensible default thinking budget for /think ON. +// Unlike applyModelDefaults this applies to every provider (not just thinking-only models) +// because the user explicitly asked for thinking. +func setThinkingDefaults(cfg *latest.ModelConfig) { + providerType := resolveProviderType(cfg) switch providerType { case "openai", "openai_chatcompletions", "openai_responses": - applyOpenAIDefaults(cfg) + cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} case "anthropic": - applyAnthropicDefaults(cfg) + cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} + ensureInterleavedThinking(cfg, providerType) case "google": - applyGoogleDefaults(cfg) + cfg.ThinkingBudget = defaultGoogleThinkingBudget(cfg.Model) case "amazon-bedrock": - applyBedrockDefaults(cfg) + if isBedrockClaudeModel(cfg.Model) { + cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} + ensureInterleavedThinking(cfg, providerType) + } } } -// applyOpenAIDefaults applies default configuration for OpenAI models. -func applyOpenAIDefaults(cfg *latest.ModelConfig) { - // Default thinking_budget to "medium" if not set - if cfg.ThinkingBudget == nil { - cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} - slog.Debug("Applied default thinking_budget for OpenAI", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", "medium", - ) +// defaultGoogleThinkingBudget returns a sensible thinking budget for a Google model. +// Returns nil for models that don't have a known thinking capability. +func defaultGoogleThinkingBudget(model string) *latest.ThinkingBudget { + m := strings.ToLower(model) + switch { + case strings.HasPrefix(m, "gemini-2.5-"): + return &latest.ThinkingBudget{Tokens: -1} + case isGeminiProModel(m): + return &latest.ThinkingBudget{Effort: "high"} + case isGeminiFlashModel(m): + return &latest.ThinkingBudget{Effort: "medium"} + default: + // Unknown or older Gemini models (e.g. gemini-2.0-*): don't enable + // thinking since the API may reject it. + return nil } } -// applyAnthropicDefaults applies default configuration for Anthropic models. -func applyAnthropicDefaults(cfg *latest.ModelConfig) { - // Default thinking_budget to 8192 tokens if not set - if cfg.ThinkingBudget == nil { - cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} - slog.Debug("Applied default thinking_budget for Anthropic", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", 8192, - ) +// --------------------------------------------------------------------------- +// Shared helpers +// --------------------------------------------------------------------------- + +// cloneModelConfig returns a shallow copy of cfg with a deep copy of +// ProviderOpts so that callers can safely mutate the returned config's +// map and pointer fields without affecting the original. +func cloneModelConfig(cfg *latest.ModelConfig) *latest.ModelConfig { + c := *cfg + if cfg.ProviderOpts != nil { + c.ProviderOpts = make(map[string]any, len(cfg.ProviderOpts)) + maps.Copy(c.ProviderOpts, cfg.ProviderOpts) } + return &c +} - // Default interleaved_thinking to true if not set +// ensureInterleavedThinking sets interleaved_thinking=true in ProviderOpts +// for Anthropic and Bedrock-Claude models, unless the user already set it. +func ensureInterleavedThinking(cfg *latest.ModelConfig, providerType string) { + needsInterleaved := providerType == "anthropic" || + (providerType == "amazon-bedrock" && isBedrockClaudeModel(cfg.Model)) + if !needsInterleaved { + return + } if cfg.ProviderOpts == nil { cfg.ProviderOpts = make(map[string]any) } if _, has := cfg.ProviderOpts["interleaved_thinking"]; !has { cfg.ProviderOpts["interleaved_thinking"] = true - slog.Debug("Applied default interleaved_thinking for Anthropic", - "provider", cfg.Provider, - "model", cfg.Model, - "interleaved_thinking", true, - ) + slog.Debug("Auto-enabled interleaved_thinking", + "provider", cfg.Provider, "model", cfg.Model) } } -// applyGoogleDefaults applies default configuration for Google Gemini models. -// - Gemini 2.5 models: thinking_budget = -1 (dynamic thinking) -// - Gemini 3+ Pro models: thinking_budget effort = "high" -// - Gemini 3+ Flash models: thinking_budget effort = "medium" -func applyGoogleDefaults(cfg *latest.ModelConfig) { - if cfg.ThinkingBudget != nil { - return // User explicitly set thinking_budget - } - - model := strings.ToLower(cfg.Model) +// isOpenAIThinkingOnlyModel returns true for OpenAI models that require thinking +// to function properly (o-series reasoning models). +func isOpenAIThinkingOnlyModel(model string) bool { + m := strings.ToLower(model) + return strings.HasPrefix(m, "o1") || + strings.HasPrefix(m, "o3") || + strings.HasPrefix(m, "o4") +} - switch { - case strings.HasPrefix(model, "gemini-2.5-"): - // Gemini 2.5 models use token-based thinking budget (-1 = dynamic) - cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: -1} - slog.Debug("Applied default thinking_budget for Google Gemini 2.5", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", -1, - ) - case isGeminiProModel(model): - // Gemini 3+ Pro models use level-based thinking (high) - cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "high"} - slog.Debug("Applied default thinking_budget for Google Gemini 3+ Pro", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", "high", - ) - case isGeminiFlashModel(model): - // Gemini 3+ Flash models use level-based thinking (medium) - cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} - slog.Debug("Applied default thinking_budget for Google Gemini 3+ Flash", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", "medium", - ) - } - // For other Gemini models (e.g., gemini-2.0-*), leave unchanged +// isBedrockClaudeModel returns true if the model ID is a Claude model on Bedrock. +// Claude model IDs on Bedrock start with "anthropic.claude-" or "global.anthropic.claude-". +func isBedrockClaudeModel(model string) bool { + m := strings.ToLower(model) + return strings.HasPrefix(m, "anthropic.claude-") || strings.HasPrefix(m, "global.anthropic.claude-") } // gemini3Family extracts the model family (e.g. "pro", "flash") from a @@ -568,42 +537,3 @@ func isGeminiProModel(model string) bool { func isGeminiFlashModel(model string) bool { return strings.HasPrefix(gemini3Family(model), "flash") } - -// applyBedrockDefaults applies default configuration for Amazon Bedrock models. -// Only applies to Claude models (anthropic.claude-* or global.anthropic.claude-*). -func applyBedrockDefaults(cfg *latest.ModelConfig) { - // Only apply defaults for Claude models on Bedrock - if !isBedrockClaudeModel(cfg.Model) { - return - } - - // Default thinking_budget to 8192 tokens if not set - if cfg.ThinkingBudget == nil { - cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} - slog.Debug("Applied default thinking_budget for Bedrock Claude", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", 8192, - ) - } - - // Default interleaved_thinking to true if not set - if cfg.ProviderOpts == nil { - cfg.ProviderOpts = make(map[string]any) - } - if _, has := cfg.ProviderOpts["interleaved_thinking"]; !has { - cfg.ProviderOpts["interleaved_thinking"] = true - slog.Debug("Applied default interleaved_thinking for Bedrock Claude", - "provider", cfg.Provider, - "model", cfg.Model, - "interleaved_thinking", true, - ) - } -} - -// isBedrockClaudeModel returns true if the model ID is a Claude model on Bedrock. -// Claude model IDs on Bedrock start with "anthropic.claude-" or "global.anthropic.claude-". -func isBedrockClaudeModel(model string) bool { - m := strings.ToLower(model) - return strings.HasPrefix(m, "anthropic.claude-") || strings.HasPrefix(m, "global.anthropic.claude-") -}