From 0b7d92e280f2ff798177a24d482a7f3062df1a67 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 11:59:48 +0100 Subject: [PATCH] Rework thinking budget: opt-in by default, adaptive thinking, effort levels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thinking was unconditionally enabled for all models with provider-specific defaults (e.g. 'medium' for OpenAI, 8192 tokens for Anthropic). This meant every model paid the latency and cost of thinking even when the user never asked for it. This commit makes thinking opt-in: it is only enabled when the user sets thinking_budget in their YAML config, with one exception — thinking-only models (OpenAI o-series) still get a default of 'medium' since they cannot function without it. New features: - Adaptive thinking for Anthropic (thinking_budget: adaptive). Uses thinking.type=adaptive which lets the model decide when and how much to think. Recommended for Claude 4.6 models. - Effort-level strings for Anthropic (thinking_budget: low/medium/high/max). Translated to adaptive thinking + output_config.effort in the API. Previously these strings were silently ignored because the Anthropic client only checked for token-based budgets. - Effort-level strings for Bedrock Claude. Mapped to token budgets via EffortTokens() since the Bedrock API does not support adaptive thinking natively. Bug fixes: - Anthropic/Bedrock clients silently ignored string effort levels (minimal/low/medium/high). A config with thinking_budget: high produced no thinking at all because the code only checked .Tokens > 0. - applyOverrides and applyProviderDefaults used shallow struct copies that shared the underlying ProviderOpts map. Disabling thinking via /think deleted interleaved_thinking from the original config's map. Introduced cloneModelConfig() to deep-copy the map. - /think on a Gemini 2.0 model (which does not support thinking) returned a 'medium' budget that caused API errors. The default case now returns nil for unknown/older Gemini models. Code quality: - Extracted resolveProviderType() to replace three copies of the same provider-type resolution logic. - Extracted ensureInterleavedThinking() to replace four copies of the same ProviderOpts write pattern. - Separated setThinkingDefaults (used by /think toggle, generous) from applyModelDefaults (used at config load, conservative). - Removed empty applyGoogleDefaults, merged applyAnthropicDefaults and applyBedrockDefaults into shared helpers. - Consolidated test files from 8+ test functions into compact table- driven tests with a unified assertion pattern. - Moved ThinkingBudget method tests (IsDisabled, IsAdaptive, EffortTokens) to pkg/config/latest where the type lives. Schema and examples updated to document adaptive, max, and effort levels. Assisted-By: docker-agent --- agent-schema.json | 10 +- examples/thinking_budget.yaml | 16 +- pkg/config/latest/types.go | 44 +- pkg/config/latest/types_test.go | 71 ++ pkg/model/provider/anthropic/beta_client.go | 44 +- pkg/model/provider/anthropic/client.go | 56 +- pkg/model/provider/bedrock/client.go | 26 +- pkg/model/provider/custom_provider_test.go | 2 +- pkg/model/provider/model_defaults_test.go | 838 ++++---------------- pkg/model/provider/override_test.go | 568 ++++--------- pkg/model/provider/provider.go | 364 ++++----- 11 files changed, 688 insertions(+), 1351 deletions(-) diff --git a/agent-schema.json b/agent-schema.json index b106a8509..2e1d76307 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -535,7 +535,7 @@ "description": "Whether to track usage" }, "thinking_budget": { - "description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'), default 'medium'. Anthropic: integer token budget (1024-32768), default 8192. Amazon Bedrock (Claude): same as Anthropic. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max), default -1. Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'), default 'high' for Pro, 'medium' for Flash.", + "description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'). Anthropic: integer token budget (1024-32768), 'adaptive' (lets the model decide), or effort levels ('low','medium','high','max') which use adaptive thinking with the given effort. Amazon Bedrock (Claude): integer token budget or effort levels ('low','medium','high') mapped to token budgets. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max). Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'). Thinking is only enabled when explicitly configured.", "oneOf": [ { "type": "string", @@ -544,9 +544,11 @@ "minimal", "low", "medium", - "high" + "high", + "max", + "adaptive" ], - "description": "Reasoning effort level (OpenAI, Gemini 3). Use 'none' to disable thinking." + "description": "Reasoning effort level. 'adaptive'/'max' are Anthropic-specific. Use 'none' to disable thinking." }, { "type": "integer", @@ -562,6 +564,8 @@ "low", "medium", "high", + "max", + "adaptive", -1, 1024, 8192, diff --git a/examples/thinking_budget.yaml b/examples/thinking_budget.yaml index 80e83b3b4..8c906e7c8 100644 --- a/examples/thinking_budget.yaml +++ b/examples/thinking_budget.yaml @@ -6,7 +6,7 @@ agents: root: model: gpt-5-mini-min # <- try with gpt-5-mini-high - # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high + # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high or claude-opus-4-6-adaptive # model: gemini-2-5-flash-dynamic-thinking # <- try with -no-thinking, -low or -high variants description: a helpful assistant that thinks instruction: you are a helpful assistant who can also use tools, but only if you need to @@ -29,15 +29,25 @@ models: claude-4-5-sonnet-min: provider: anthropic model: claude-sonnet-4-5-20250929 - thinking_budget: 1024 # <- tokens, 1024 is the minimum + thinking_budget: 1024 # <- explicit token budget (1024-32768) for older models claude-4-5-sonnet-high: provider: anthropic model: claude-sonnet-4-5-20250929 - thinking_budget: 32768 # <- tokens, 32768 is the Anthropic suggested maximum without batching + thinking_budget: 32768 # <- explicit token budget (32768 is the Anthropic suggested maximum) provider_opts: interleaved_thinking: true # <- enables interleaved thinking, aka tool calling during model reasoning + claude-opus-4-6-adaptive: + provider: anthropic + model: claude-opus-4-6 + thinking_budget: adaptive # <- lets the model decide when and how much to think (recommended for 4.6) + + claude-opus-4-6-low: + provider: anthropic + model: claude-opus-4-6 + thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max" + gemini-2-5-flash-dynamic-thinking: provider: google model: gemini-2.5-flash diff --git a/pkg/config/latest/types.go b/pkg/config/latest/types.go index e3ff0dc6c..1c8f3558c 100644 --- a/pkg/config/latest/types.go +++ b/pkg/config/latest/types.go @@ -397,7 +397,10 @@ type ModelConfig struct { TrackUsage *bool `json:"track_usage,omitempty"` // ThinkingBudget controls reasoning effort/budget: // - For OpenAI: accepts string levels "minimal", "low", "medium", "high" - // - For Anthropic: accepts integer token budget (1024-32000) + // - For Anthropic: accepts integer token budget (1024-32000), "adaptive", + // or string levels "low", "medium", "high", "max" (uses adaptive thinking with effort) + // - For Bedrock Claude: accepts integer token budget or string levels + // "minimal", "low", "medium", "high" (mapped to token budgets via EffortTokens) // - For other providers: may be ignored ThinkingBudget *ThinkingBudget `json:"thinking_budget,omitempty"` // Routing defines rules for routing requests to different models. @@ -670,6 +673,7 @@ func (d DeferConfig) MarshalYAML() (any, error) { // ThinkingBudget represents reasoning budget configuration. // It accepts either a string effort level or an integer token budget: // - String: "minimal", "low", "medium", "high" (for OpenAI) +// - String: "adaptive" (for Anthropic models that support adaptive thinking) // - Integer: token count (for Anthropic, range 1024-32768) type ThinkingBudget struct { // Effort stores string-based reasoning effort levels @@ -717,6 +721,7 @@ func (t ThinkingBudget) MarshalYAML() (any, error) { // NOT disabled when: // - Tokens > 0 or Tokens == -1 (explicit token budget) // - Effort is a real level like "medium" or "high" +// - Effort is "adaptive" func (t *ThinkingBudget) IsDisabled() bool { if t == nil { return false @@ -724,7 +729,42 @@ func (t *ThinkingBudget) IsDisabled() bool { if t.Tokens == 0 && t.Effort == "" { return true } - return t.Effort == "none" + return strings.EqualFold(t.Effort, "none") +} + +// IsAdaptive returns true if the thinking budget is set to adaptive mode. +// Adaptive thinking lets the model decide how much thinking to do. +func (t *ThinkingBudget) IsAdaptive() bool { + if t == nil { + return false + } + return strings.EqualFold(t.Effort, "adaptive") +} + +// EffortTokens maps a string effort level to a token budget for providers +// that only support token-based thinking (e.g. Bedrock Claude). +// +// The Anthropic direct API uses adaptive thinking + output_config.effort +// for string levels instead; see anthropicEffort in the anthropic package. +// +// Returns (tokens, true) when a mapping exists, or (0, false) when +// the budget uses an explicit token count or an unrecognised effort string. +func (t *ThinkingBudget) EffortTokens() (int, bool) { + if t == nil || t.Effort == "" { + return 0, false + } + switch strings.ToLower(strings.TrimSpace(t.Effort)) { + case "minimal": + return 1024, true + case "low": + return 2048, true + case "medium": + return 8192, true + case "high": + return 16384, true + default: + return 0, false + } } // MarshalJSON implements custom marshaling to output simple string or int format diff --git a/pkg/config/latest/types_test.go b/pkg/config/latest/types_test.go index e503d3cbb..6aede16cb 100644 --- a/pkg/config/latest/types_test.go +++ b/pkg/config/latest/types_test.go @@ -121,6 +121,77 @@ func TestThinkingBudget_MarshalUnmarshal_Zero(t *testing.T) { require.Equal(t, "thinking_budget: 0\n", string(output)) } +func TestThinkingBudget_IsDisabled(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + b *ThinkingBudget + want bool + }{ + {"nil", nil, false}, + {"zero tokens", &ThinkingBudget{Tokens: 0}, true}, + {"none effort", &ThinkingBudget{Effort: "none"}, true}, + {"positive tokens", &ThinkingBudget{Tokens: 8192}, false}, + {"medium effort", &ThinkingBudget{Effort: "medium"}, false}, + {"adaptive effort", &ThinkingBudget{Effort: "adaptive"}, false}, + {"negative tokens (dynamic)", &ThinkingBudget{Tokens: -1}, false}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + require.Equal(t, tt.want, tt.b.IsDisabled()) + }) + } +} + +func TestThinkingBudget_IsAdaptive(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + b *ThinkingBudget + want bool + }{ + {"nil", nil, false}, + {"adaptive", &ThinkingBudget{Effort: "adaptive"}, true}, + {"medium", &ThinkingBudget{Effort: "medium"}, false}, + {"tokens", &ThinkingBudget{Tokens: 8192}, false}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + require.Equal(t, tt.want, tt.b.IsAdaptive()) + }) + } +} + +func TestThinkingBudget_EffortTokens(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + b *ThinkingBudget + wantTokens int + wantOK bool + }{ + {"nil", nil, 0, false}, + {"minimal", &ThinkingBudget{Effort: "minimal"}, 1024, true}, + {"low", &ThinkingBudget{Effort: "low"}, 2048, true}, + {"medium", &ThinkingBudget{Effort: "medium"}, 8192, true}, + {"high", &ThinkingBudget{Effort: "high"}, 16384, true}, + {"adaptive", &ThinkingBudget{Effort: "adaptive"}, 0, false}, + {"none", &ThinkingBudget{Effort: "none"}, 0, false}, + {"explicit tokens", &ThinkingBudget{Tokens: 4096}, 0, false}, + {"empty effort", &ThinkingBudget{}, 0, false}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + tokens, ok := tt.b.EffortTokens() + require.Equal(t, tt.wantOK, ok) + require.Equal(t, tt.wantTokens, tokens) + }) + } +} + func TestAgents_UnmarshalYAML_RejectsUnknownFields(t *testing.T) { t.Parallel() diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go index 55b5aa274..ae9286ec9 100644 --- a/pkg/model/provider/anthropic/beta_client.go +++ b/pkg/model/provider/anthropic/beta_client.go @@ -95,20 +95,38 @@ func (c *Client) createBetaStream( // For interleaved thinking to make sense, we use a default of 16384 tokens for the thinking budget thinkingEnabled := c.ModelOptions.Thinking() == nil || *c.ModelOptions.Thinking() if thinkingEnabled { - thinkingTokens := int64(16384) - if c.ModelConfig.ThinkingBudget != nil { - thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens) + if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() { + // Adaptive thinking: let the model decide how much thinking to do + adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam() + params.Thinking = anthropic.BetaThinkingConfigParamUnion{ + OfAdaptive: &adaptive, + } + slog.Debug("Anthropic Beta API using adaptive thinking") + } else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { + // Effort level: use adaptive thinking + output_config.effort + adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam() + params.Thinking = anthropic.BetaThinkingConfigParamUnion{ + OfAdaptive: &adaptive, + } + params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort) + slog.Debug("Anthropic Beta API using adaptive thinking with effort", + "effort", effort) } else { - slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) - } - switch { - case thinkingTokens >= 1024 && thinkingTokens < maxTokens: - params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens) - slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) - case thinkingTokens >= maxTokens: - slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens) - default: - slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens) + thinkingTokens := int64(16384) + if c.ModelConfig.ThinkingBudget != nil { + thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens) + } else { + slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) + } + switch { + case thinkingTokens >= 1024 && thinkingTokens < maxTokens: + params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens) + slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) + case thinkingTokens >= maxTokens: + slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens) + default: + slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens) + } } } else { slog.Debug("Anthropic Beta API: Thinking disabled via /think command") diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index 10e05b701..8678d8437 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -50,12 +50,23 @@ func (c *Client) getResponseTrailer() http.Header { // adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget. // Anthropic's max_tokens represents the combined budget for thinking + output tokens. // Returns the adjusted maxTokens value and an error if user-set max_tokens is too low. +// +// This only applies to fixed token budgets. Adaptive thinking and effort-based +// budgets don't need adjustment since the model manages its own thinking allocation. func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) { - if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 { + if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.IsAdaptive() { + return maxTokens, nil + } + // Effort-based budgets use adaptive thinking — no token adjustment needed. + if _, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { return maxTokens, nil } thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens) + if thinkingTokens <= 0 { + return maxTokens, nil + } + minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer if maxTokens <= thinkingTokens { @@ -297,7 +308,25 @@ func (c *Client) CreateChatCompletionStream( // Apply thinking budget first, as it affects whether we can set temperature thinkingEnabled := false - if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 { + if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() { + // Adaptive thinking: let the model decide how much thinking to do + adaptive := anthropic.NewThinkingConfigAdaptiveParam() + params.Thinking = anthropic.ThinkingConfigParamUnion{ + OfAdaptive: &adaptive, + } + thinkingEnabled = true + slog.Debug("Anthropic API using adaptive thinking (standard messages)") + } else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { + // Effort level: use adaptive thinking + output_config.effort + adaptive := anthropic.NewThinkingConfigAdaptiveParam() + params.Thinking = anthropic.ThinkingConfigParamUnion{ + OfAdaptive: &adaptive, + } + params.OutputConfig.Effort = anthropic.OutputConfigEffort(effort) + thinkingEnabled = true + slog.Debug("Anthropic API using adaptive thinking with effort", + "effort", effort) + } else if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 { thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens) switch { case thinkingTokens >= 1024 && thinkingTokens < maxTokens: @@ -895,6 +924,29 @@ func differenceIDs(a, b map[string]struct{}) []string { return missing } +// anthropicEffort maps a ThinkingBudget effort string to an Anthropic API +// effort level ("low", "medium", "high", "max"). Returns ("", false) when +// the budget uses token counts, adaptive mode, or an unrecognised string. +func anthropicEffort(b *latest.ThinkingBudget) (string, bool) { + if b == nil { + return "", false + } + switch strings.ToLower(strings.TrimSpace(b.Effort)) { + case "low": + return "low", true + case "minimal": // "minimal" is not in the Anthropic API; map to closest + return "low", true + case "medium": + return "medium", true + case "high": + return "high", true + case "max": + return "max", true + default: + return "", false + } +} + // anthropicContextLimit returns a reasonable default context window for Anthropic models. // We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time. func anthropicContextLimit(model string) int64 { diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go index 85e815372..135659049 100644 --- a/pkg/model/provider/bedrock/client.go +++ b/pkg/model/provider/bedrock/client.go @@ -275,16 +275,23 @@ func (c *Client) buildInferenceConfig() *types.InferenceConfiguration { return cfg } +// resolveThinkingTokens returns the effective token budget for thinking. +// It handles both explicit token counts and effort-level strings. +// Returns 0 if no valid thinking budget is configured. +func (c *Client) resolveThinkingTokens() int { + if c.ModelConfig.ThinkingBudget == nil { + return 0 + } + if tokens, ok := c.ModelConfig.ThinkingBudget.EffortTokens(); ok { + return tokens + } + return c.ModelConfig.ThinkingBudget.Tokens +} + // isThinkingEnabled mirrors the validation in buildAdditionalModelRequestFields // to determine if thinking params will affect inference config (temp/topP constraints). func (c *Client) isThinkingEnabled() bool { - if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 { - return false - } - - tokens := c.ModelConfig.ThinkingBudget.Tokens - - // Check minimum (Claude requires at least 1024 tokens for thinking) + tokens := c.resolveThinkingTokens() if tokens < 1024 { return false } @@ -310,12 +317,11 @@ func (c *Client) promptCachingEnabled() bool { // buildAdditionalModelRequestFields configures Claude's extended thinking (reasoning) mode. func (c *Client) buildAdditionalModelRequestFields() document.Interface { - if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 { + tokens := c.resolveThinkingTokens() + if tokens <= 0 { return nil } - tokens := c.ModelConfig.ThinkingBudget.Tokens - // Validate minimum (Claude requires at least 1024 tokens for thinking) if tokens < 1024 { slog.Warn("Bedrock thinking_budget below minimum (1024), ignoring", diff --git a/pkg/model/provider/custom_provider_test.go b/pkg/model/provider/custom_provider_test.go index cc8b9aa8c..5aa276cab 100644 --- a/pkg/model/provider/custom_provider_test.go +++ b/pkg/model/provider/custom_provider_test.go @@ -472,7 +472,7 @@ func TestResolveProviderTypeFromConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - assert.Equal(t, tt.expected, resolveProviderTypeFromConfig(tt.config)) + assert.Equal(t, tt.expected, resolveProviderType(tt.config)) }) } } diff --git a/pkg/model/provider/model_defaults_test.go b/pkg/model/provider/model_defaults_test.go index a1524b586..f55f0bf2e 100644 --- a/pkg/model/provider/model_defaults_test.go +++ b/pkg/model/provider/model_defaults_test.go @@ -1,7 +1,6 @@ package provider import ( - "maps" "testing" "github.com/stretchr/testify/assert" @@ -10,452 +9,143 @@ import ( "github.com/docker/docker-agent/pkg/config/latest" ) -// TestApplyModelDefaults_OpenAI tests that OpenAI models get the correct default thinking_budget. -func TestApplyModelDefaults_OpenAI(t *testing.T) { +func TestApplyModelDefaults(t *testing.T) { t.Parallel() + boolPtr := func(v bool) *bool { return &v } + tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectProviderOptsKeys []string + name string + config *latest.ModelConfig + wantBudget *latest.ThinkingBudget // nil means no thinking + wantInterleaved *bool // nil means key must not exist }{ + // --- OpenAI: only o-series gets defaults --- { - name: "openai provider gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/gpt-4o: no default thinking", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, }, { - name: "openai_chatcompletions api_type gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "custom", - Model: "custom-model", - ProviderOpts: map[string]any{"api_type": "openai_chatcompletions"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/gpt-5: no default thinking", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-5"}, }, { - name: "openai_responses api_type gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "custom", - Model: "custom-model", - ProviderOpts: map[string]any{"api_type": "openai_responses"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/o3-mini: thinking-only model gets default", + config: &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "mistral alias (openai) gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "mistral", - Model: "mistral-large-latest", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/o1: thinking-only model gets default", + config: &latest.ModelConfig{Provider: "openai", Model: "o1"}, + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "xai alias (openai) gets medium thinking_budget default", - config: &latest.ModelConfig{ - Provider: "xai", - Model: "grok-2", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai/o4-mini: thinking-only model gets default", + config: &latest.ModelConfig{Provider: "openai", Model: "o4-mini"}, + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "explicit thinking_budget is preserved", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, + name: "openai/o3-mini: explicit budget overrides default", + config: &latest.ModelConfig{Provider: "openai", Model: "o3-mini", ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}}, + wantBudget: &latest.ThinkingBudget{Effort: "high"}, }, { - name: "explicit thinking_budget with tokens is preserved", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 5000}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 5000}, + name: "openai/gpt-4o: explicit budget preserved", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}}, + wantBudget: &latest.ThinkingBudget{Effort: "high"}, }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Apply defaults - applyModelDefaults(tt.config) - - // Verify thinking budget - require.NotNil(t, tt.config.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Effort, tt.config.ThinkingBudget.Effort, "Effort should match") - assert.Equal(t, tt.expectThinkingBudget.Tokens, tt.config.ThinkingBudget.Tokens, "Tokens should match") - }) - } -} - -// TestApplyModelDefaults_Anthropic tests that Anthropic models get the correct defaults. -func TestApplyModelDefaults_Anthropic(t *testing.T) { - t.Parallel() - tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking bool - expectExplicitInterleaved bool // true if we expect an explicit value in ProviderOpts - }{ - { - name: "anthropic provider gets 8192 thinking_budget default", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, - }, - { - name: "anthropic provider with no initial ProviderOpts", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-opus-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, - }, + // --- Aliases (resolve to openai) — no default thinking --- { - name: "explicit thinking_budget is preserved", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "mistral: no default thinking", + config: &latest.ModelConfig{Provider: "mistral", Model: "mistral-large-latest"}, }, { - name: "explicit interleaved_thinking false is preserved", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ProviderOpts: map[string]any{"interleaved_thinking": false}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: false, - expectExplicitInterleaved: true, + name: "xai: no default thinking", + config: &latest.ModelConfig{Provider: "xai", Model: "grok-2"}, }, { - name: "explicit interleaved_thinking true is preserved", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ProviderOpts: map[string]any{"interleaved_thinking": true}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, - }, - { - name: "existing ProviderOpts are preserved", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ProviderOpts: map[string]any{"some_other_option": "value"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "custom openai_chatcompletions: no default thinking", + config: &latest.ModelConfig{Provider: "custom", Model: "custom-model", ProviderOpts: map[string]any{"api_type": "openai_chatcompletions"}}, }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Save original ProviderOpts keys to check preservation - originalOpts := make(map[string]any) - if tt.config.ProviderOpts != nil { - maps.Copy(originalOpts, tt.config.ProviderOpts) - } - // Apply defaults - applyModelDefaults(tt.config) - - // Verify thinking budget - require.NotNil(t, tt.config.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Tokens, tt.config.ThinkingBudget.Tokens, "Tokens should match") - - // Verify interleaved_thinking - if tt.expectExplicitInterleaved { - require.NotNil(t, tt.config.ProviderOpts, "ProviderOpts should be set") - val, exists := tt.config.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should be set in ProviderOpts") - assert.Equal(t, tt.expectInterleavedThinking, val, "interleaved_thinking should match expected value") - } - - // Verify original ProviderOpts are preserved - for k, v := range originalOpts { - if k != "interleaved_thinking" { - assert.Equal(t, v, tt.config.ProviderOpts[k], "original ProviderOpts key %s should be preserved", k) - } - } - }) - } -} - -// TestApplyModelDefaults_Google tests that Google Gemini models get the correct defaults. -func TestApplyModelDefaults_Google(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectNoDefault bool // true if no default should be applied - }{ - { - name: "gemini-2.5-flash gets dynamic thinking default (-1)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "gemini-2.5-pro gets dynamic thinking default (-1)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "gemini-2.5-flash-lite gets dynamic thinking default (-1)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash-lite", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "gemini-3-pro gets high thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "gemini-3-pro-preview gets high thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro-preview", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "gemini-3.1-pro-preview gets high thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3.1-pro-preview", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "gemini-3-flash gets medium thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-flash", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, + // --- Anthropic: no default, but interleaved_thinking when budget set --- { - name: "gemini-3-flash-preview gets medium thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-flash-preview", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "anthropic: no default thinking", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, }, { - name: "gemini-3.1-flash-preview gets medium thinking level default", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3.1-flash-preview", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "anthropic: explicit budget enables interleaved_thinking", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}}, + wantBudget: &latest.ThinkingBudget{Tokens: 16384}, + wantInterleaved: boolPtr(true), }, { - name: "gemini-2.0-flash is not affected (old model)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.0-flash", - }, - expectNoDefault: true, + name: "anthropic: adaptive budget enables interleaved_thinking", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-opus-4-6", ThinkingBudget: &latest.ThinkingBudget{Effort: "adaptive"}}, + wantBudget: &latest.ThinkingBudget{Effort: "adaptive"}, + wantInterleaved: boolPtr(true), }, { - name: "gemini-1.5-pro is not affected (old model)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-1.5-pro", - }, - expectNoDefault: true, + name: "anthropic: explicit interleaved_thinking=false is preserved", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, ProviderOpts: map[string]any{"interleaved_thinking": false}}, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(false), }, + + // --- Google: no default thinking --- { - name: "explicit thinking_budget is preserved for gemini-2.5", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + name: "google/gemini-2.5-flash: no default thinking", + config: &latest.ModelConfig{Provider: "google", Model: "gemini-2.5-flash"}, }, { - name: "explicit thinking_budget is preserved for gemini-3", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - ThinkingBudget: &latest.ThinkingBudget{Effort: "low"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "low"}, + name: "google/gemini-3-pro: no default thinking", + config: &latest.ModelConfig{Provider: "google", Model: "gemini-3-pro"}, }, { - name: "thinking_budget 0 disables thinking completely (nil)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectNoDefault: true, // thinking_budget: 0 means disable thinking entirely + name: "google: explicit budget preserved", + config: &latest.ModelConfig{Provider: "google", Model: "gemini-2.5-flash", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Apply defaults - applyModelDefaults(tt.config) - - if tt.expectNoDefault { - assert.Nil(t, tt.config.ThinkingBudget, "ThinkingBudget should not be set for old Gemini model") - return - } - - // Verify thinking budget - require.NotNil(t, tt.config.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Effort, tt.config.ThinkingBudget.Effort, "Effort should match") - assert.Equal(t, tt.expectThinkingBudget.Tokens, tt.config.ThinkingBudget.Tokens, "Tokens should match") - }) - } -} - -// TestApplyModelDefaults_Bedrock tests that Amazon Bedrock Claude models get the correct defaults. -func TestApplyModelDefaults_Bedrock(t *testing.T) { - t.Parallel() - tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking bool - expectExplicitInterleaved bool // true if we expect an explicit value in ProviderOpts - expectNoDefault bool // true if no default should be applied - }{ - { - name: "bedrock claude model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, - }, + // --- Bedrock: no default thinking, interleaved_thinking when budget set on Claude --- { - name: "bedrock claude-sonnet-4 model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-sonnet-4-20250514-v1:0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "bedrock claude: no default thinking", + config: &latest.ModelConfig{Provider: "amazon-bedrock", Model: "anthropic.claude-3-sonnet"}, }, { - name: "bedrock global claude model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "bedrock global claude: no default thinking", + config: &latest.ModelConfig{Provider: "amazon-bedrock", Model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0"}, }, { - name: "bedrock claude opus model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-opus-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "bedrock claude: explicit budget enables interleaved_thinking", + config: &latest.ModelConfig{Provider: "amazon-bedrock", Model: "anthropic.claude-3-sonnet", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, { - name: "bedrock non-claude model is not affected", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "amazon.titan-text-express-v1", - }, - expectNoDefault: true, + name: "bedrock non-claude: not affected", + config: &latest.ModelConfig{Provider: "amazon-bedrock", Model: "amazon.titan-text-express-v1"}, }, + + // --- Disabled thinking normalised to nil --- { - name: "bedrock mistral model is not affected", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "mistral.mistral-large-latest", - }, - expectNoDefault: true, + name: "thinking_budget: 0 becomes nil", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}}, }, { - name: "explicit thinking_budget is preserved", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "thinking_budget: none becomes nil", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}}, }, + + // --- Unknown / other providers: no effect --- { - name: "explicit interleaved_thinking false is preserved", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - ProviderOpts: map[string]any{"interleaved_thinking": false}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: false, - expectExplicitInterleaved: true, + name: "unknown provider: no effect", + config: &latest.ModelConfig{Provider: "unknown", Model: "some-model"}, }, { - name: "existing ProviderOpts are preserved", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - ProviderOpts: map[string]any{"region": "us-west-2"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - expectExplicitInterleaved: true, + name: "dmr: no effect", + config: &latest.ModelConfig{Provider: "dmr", Model: "ai/llama3.2"}, }, } @@ -463,201 +153,68 @@ func TestApplyModelDefaults_Bedrock(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - // Save original ProviderOpts keys to check preservation - originalOpts := make(map[string]any) - if tt.config.ProviderOpts != nil { - maps.Copy(originalOpts, tt.config.ProviderOpts) - } - - // Apply defaults applyModelDefaults(tt.config) - if tt.expectNoDefault { - assert.Nil(t, tt.config.ThinkingBudget, "ThinkingBudget should not be set for non-Claude Bedrock model") - if tt.config.ProviderOpts != nil { - _, exists := tt.config.ProviderOpts["interleaved_thinking"] - assert.False(t, exists, "interleaved_thinking should not be set for non-Claude Bedrock model") - } - return - } - - // Verify thinking budget - require.NotNil(t, tt.config.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Tokens, tt.config.ThinkingBudget.Tokens, "Tokens should match") - - // Verify interleaved_thinking - if tt.expectExplicitInterleaved { - require.NotNil(t, tt.config.ProviderOpts, "ProviderOpts should be set") - val, exists := tt.config.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should be set in ProviderOpts") - assert.Equal(t, tt.expectInterleavedThinking, val, "interleaved_thinking should match expected value") + // Check thinking budget. + if tt.wantBudget == nil { + assert.Nil(t, tt.config.ThinkingBudget) + } else { + require.NotNil(t, tt.config.ThinkingBudget) + assert.Equal(t, *tt.wantBudget, *tt.config.ThinkingBudget) } - // Verify original ProviderOpts are preserved - for k, v := range originalOpts { - if k != "interleaved_thinking" { - assert.Equal(t, v, tt.config.ProviderOpts[k], "original ProviderOpts key %s should be preserved", k) + // Check interleaved_thinking. + if tt.wantInterleaved == nil { + if tt.config.ProviderOpts != nil { + _, exists := tt.config.ProviderOpts["interleaved_thinking"] + assert.False(t, exists, "interleaved_thinking should not be set") } + } else { + require.NotNil(t, tt.config.ProviderOpts) + assert.Equal(t, *tt.wantInterleaved, tt.config.ProviderOpts["interleaved_thinking"]) } }) } } -// TestApplyModelDefaults_NonAffectedProviders tests that other providers are not affected. -func TestApplyModelDefaults_NonAffectedProviders(t *testing.T) { +func TestApplyProviderDefaults(t *testing.T) { t.Parallel() - tests := []struct { - name string - config *latest.ModelConfig - }{ - { - name: "google gemini-2.0-flash is not affected (old model)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.0-flash", - }, - }, - { - name: "dmr provider is not affected", - config: &latest.ModelConfig{ - Provider: "dmr", - Model: "ai/llama3.2", - }, - }, - { - name: "amazon-bedrock non-claude model is not affected", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "amazon.titan-text-express-v1", - }, - }, - { - name: "unknown provider is not affected", - config: &latest.ModelConfig{ - Provider: "unknown", - Model: "some-model", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Apply defaults - applyModelDefaults(tt.config) - - // Verify thinking_budget is NOT set - assert.Nil(t, tt.config.ThinkingBudget, "ThinkingBudget should not be set for non-affected provider") - - // Verify interleaved_thinking is NOT set - if tt.config.ProviderOpts != nil { - _, exists := tt.config.ProviderOpts["interleaved_thinking"] - assert.False(t, exists, "interleaved_thinking should not be set for non-affected provider") - } - }) - } -} - -// TestApplyProviderDefaults_IncludesModelDefaults tests that applyProviderDefaults -// also applies model-specific defaults via applyModelDefaults. -func TestApplyProviderDefaults_IncludesModelDefaults(t *testing.T) { - t.Parallel() + boolPtr := func(v bool) *bool { return &v } tests := []struct { - name string - config *latest.ModelConfig - customProviders map[string]latest.ProviderConfig - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking *bool + name string + config *latest.ModelConfig + customProviders map[string]latest.ProviderConfig + wantBudget *latest.ThinkingBudget + wantInterleaved *bool }{ { - name: "openai model from config gets defaults", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "anthropic model from config gets defaults", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), - }, - { - name: "google gemini-2.5 model gets defaults", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "google gemini-3-pro model gets defaults", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "google gemini-3-flash model gets defaults", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-flash", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "openai o3-mini: thinking-only gets default through provider defaults", + config: &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "bedrock claude model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), + name: "openai gpt-4o: no default through provider defaults", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, }, { - name: "bedrock global claude model gets defaults", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "global.anthropic.claude-sonnet-4-5-20250929-v1:0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), + name: "anthropic with explicit budget gets interleaved through provider defaults", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, { - name: "custom provider with openai api_type gets openai defaults", - config: &latest.ModelConfig{ - Provider: "my_gateway", - Model: "gpt-4o", - }, + name: "custom openai provider: no default thinking", + config: &latest.ModelConfig{Provider: "my_gateway", Model: "gpt-4o"}, customProviders: map[string]latest.ProviderConfig{ - "my_gateway": { - APIType: "openai_chatcompletions", - BaseURL: "https://api.example.com/v1", - TokenKey: "MY_KEY", - }, + "my_gateway": {APIType: "openai_chatcompletions", BaseURL: "https://api.example.com/v1", TokenKey: "MY_KEY"}, }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "custom provider with anthropic api_type gets anthropic defaults", - config: &latest.ModelConfig{ - Provider: "my_anthropic_gateway", - Model: "claude-sonnet-4-0", - ProviderOpts: map[string]any{ - "api_type": "anthropic", - }, - }, - customProviders: nil, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), + name: "explicit thinking preserved unchanged", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}}, + wantBudget: &latest.ThinkingBudget{Effort: "high"}, }, } @@ -667,153 +224,42 @@ func TestApplyProviderDefaults_IncludesModelDefaults(t *testing.T) { result := applyProviderDefaults(tt.config, tt.customProviders) - // Verify thinking budget - if tt.expectThinkingBudget != nil { - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort, "Effort should match") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens, "Tokens should match") + if tt.wantBudget == nil { + assert.Nil(t, result.ThinkingBudget) + } else { + require.NotNil(t, result.ThinkingBudget) + assert.Equal(t, *tt.wantBudget, *result.ThinkingBudget) } - // Verify interleaved_thinking for Anthropic - if tt.expectInterleavedThinking != nil { - require.NotNil(t, result.ProviderOpts, "ProviderOpts should be set") - val, exists := result.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should be set") - assert.Equal(t, *tt.expectInterleavedThinking, val, "interleaved_thinking should match") - } - }) - } -} - -// TestApplyProviderDefaults_ThinkingDefaultsApplied tests that thinking defaults -// are always applied when the config doesn't have an explicit thinking budget. -func TestApplyProviderDefaults_ThinkingDefaultsApplied(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking bool - }{ - { - name: "OpenAI gets default thinking_budget", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "Anthropic gets default thinking_budget and interleaved_thinking", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - }, - { - name: "Google Gemini 2.5 gets default thinking_budget", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "Google Gemini 3 Pro gets default thinking_budget", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - }, - { - name: "Bedrock Claude gets default thinking_budget and interleaved_thinking", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Apply provider defaults - result := applyProviderDefaults(tt.config, nil) - - // Verify default thinking budget was applied - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort, "Effort should match") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens, "Tokens should match") - - // Verify interleaved_thinking for Anthropic/Bedrock - if tt.expectInterleavedThinking { - require.NotNil(t, result.ProviderOpts, "ProviderOpts should be set") - val, exists := result.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should be set") - assert.Equal(t, true, val, "interleaved_thinking should be true") + if tt.wantInterleaved != nil { + require.NotNil(t, result.ProviderOpts) + assert.Equal(t, *tt.wantInterleaved, result.ProviderOpts["interleaved_thinking"]) } }) } } -// TestApplyProviderDefaults_ExplicitThinkingPreserved tests that explicitly set -// thinking options are preserved and not overwritten by defaults. -func TestApplyProviderDefaults_ExplicitThinkingPreserved(t *testing.T) { - t.Parallel() - - config := &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - } - - result := applyProviderDefaults(config, nil) - - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be preserved") - assert.Equal(t, "high", result.ThinkingBudget.Effort, "Effort should be preserved") -} - -// TestApplyProviderDefaults_DisabledThinkingBecomesNil tests that explicitly disabled -// thinking (thinking_budget: 0 or thinking_budget: none) results in nil ThinkingBudget. -func TestApplyProviderDefaults_DisabledThinkingBecomesNil(t *testing.T) { +func TestIsOpenAIThinkingOnlyModel(t *testing.T) { t.Parallel() - tests := []struct { - name string - config *latest.ModelConfig + for _, tt := range []struct { + model string + want bool }{ - { - name: "thinking_budget 0 becomes nil", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - }, - { - name: "thinking_budget none becomes nil", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { + {"o1", true}, + {"o1-preview", true}, + {"o1-mini", true}, + {"o3", true}, + {"o3-mini", true}, + {"o4-mini", true}, + {"gpt-4o", false}, + {"gpt-4.1", false}, + {"gpt-5", false}, + {"custom-model", false}, + } { + t.Run(tt.model, func(t *testing.T) { t.Parallel() - - result := applyProviderDefaults(tt.config, nil) - - assert.Nil(t, result.ThinkingBudget, "ThinkingBudget should be nil when explicitly disabled") + assert.Equal(t, tt.want, isOpenAIThinkingOnlyModel(tt.model)) }) } } diff --git a/pkg/model/provider/override_test.go b/pkg/model/provider/override_test.go index 3da941c7d..5839f8e67 100644 --- a/pkg/model/provider/override_test.go +++ b/pkg/model/provider/override_test.go @@ -10,127 +10,79 @@ import ( "github.com/docker/docker-agent/pkg/model/provider/options" ) -// TestApplyOverrides_Thinking tests that applyOverrides correctly clears -// thinking configuration when Thinking is set to false (disabled). -func TestApplyOverrides_Thinking(t *testing.T) { +func TestApplyOverrides(t *testing.T) { t.Parallel() + boolPtr := func(v bool) *bool { return &v } + tests := []struct { - name string - config *latest.ModelConfig - thinkingEnabled *bool // nil means no override, true means enabled, false means disabled - expectThinkingBudget *latest.ThinkingBudget - expectInterleavedThinking *bool // nil means key should not exist + name string + config *latest.ModelConfig + thinking *bool // nil = no override + wantBudget *latest.ThinkingBudget + wantInterleaved *bool // nil = key must not exist }{ + // --- Disable clears everything --- { - name: "clears explicit thinking_budget when disabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - thinkingEnabled: new(false), - expectThinkingBudget: nil, - }, - { - name: "clears interleaved_thinking when disabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, - ProviderOpts: map[string]any{"interleaved_thinking": true}, - }, - thinkingEnabled: new(false), - expectThinkingBudget: nil, - expectInterleavedThinking: nil, // key should be removed + name: "disable: clears thinking_budget", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + thinking: boolPtr(false), }, { - name: "preserves thinking_budget when enabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - thinkingEnabled: new(true), - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + name: "disable: clears interleaved_thinking", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, ProviderOpts: map[string]any{"interleaved_thinking": true}}, + thinking: boolPtr(false), }, + + // --- Enable preserves existing budget --- { - name: "preserves interleaved_thinking when enabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"interleaved_thinking": true}, - }, - thinkingEnabled: new(true), - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectInterleavedThinking: new(true), + name: "enable: preserves existing budget", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, }, { - name: "preserves other ProviderOpts when clearing thinking", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{ - "interleaved_thinking": true, - "other_option": "preserved", - }, - }, - thinkingEnabled: new(false), - expectThinkingBudget: nil, + name: "enable: preserves existing budget + interleaved", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, ProviderOpts: map[string]any{"interleaved_thinking": true}}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, + + // --- Enable applies defaults when no budget --- { - name: "nil options is a no-op", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - thinkingEnabled: nil, // Will pass nil opts - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + name: "enable: OpenAI gets medium default", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "applies defaults when enabled and ThinkingBudget is nil (OpenAI)", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: nil, // No thinking configured - }, - thinkingEnabled: new(true), - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, // OpenAI default + name: "enable: Anthropic gets 8192 + interleaved", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, { - name: "applies defaults when enabled and ThinkingBudget is nil (Anthropic)", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: nil, // No thinking configured - }, - thinkingEnabled: new(true), - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, // Anthropic default - expectInterleavedThinking: new(true), // Anthropic default + name: "enable: restores from tokens=0", + config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Effort: "medium"}, }, { - name: "restores defaults when /think used with tokens=0", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, // User had thinking disabled - }, - thinkingEnabled: new(true), // User runs /think - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, // Apply OpenAI default + name: "enable: restores from effort=none", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}}, + thinking: boolPtr(true), + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, + wantInterleaved: boolPtr(true), }, + + // --- No override = no-op --- { - name: "restores defaults when /think used with effort=none", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}, // User had thinking disabled - }, - thinkingEnabled: new(true), // User runs /think - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, // Apply Anthropic default - expectInterleavedThinking: new(true), + name: "nil opts: config unchanged", + config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, + thinking: nil, + wantBudget: &latest.ThinkingBudget{Tokens: 8192}, }, } @@ -138,370 +90,178 @@ func TestApplyOverrides_Thinking(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - // Build options var opts *options.ModelOptions - if tt.thinkingEnabled != nil { - mo := options.ModelOptions{} - options.WithThinking(*tt.thinkingEnabled)(&mo) - opts = &mo + if tt.thinking != nil { + o := options.ModelOptions{} + options.WithThinking(*tt.thinking)(&o) + opts = &o } - // Save original other options for preservation check - var originalOtherOpts map[string]any - if tt.config.ProviderOpts != nil { - originalOtherOpts = make(map[string]any) - for k, v := range tt.config.ProviderOpts { - if k != "interleaved_thinking" { - originalOtherOpts[k] = v - } - } - } - - // Apply overrides result := applyOverrides(tt.config, opts) - // Verify thinking budget - if tt.expectThinkingBudget == nil { - assert.Nil(t, result.ThinkingBudget, "ThinkingBudget should be nil") + // Budget + if tt.wantBudget == nil { + assert.Nil(t, result.ThinkingBudget) } else { - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens) - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort) + require.NotNil(t, result.ThinkingBudget) + assert.Equal(t, *tt.wantBudget, *result.ThinkingBudget) } - // Verify interleaved_thinking - if tt.expectInterleavedThinking == nil && tt.thinkingEnabled != nil && !*tt.thinkingEnabled { - // Key should be removed when thinking is disabled + // interleaved_thinking + if tt.wantInterleaved == nil && tt.thinking != nil && !*tt.thinking { if result.ProviderOpts != nil { _, exists := result.ProviderOpts["interleaved_thinking"] assert.False(t, exists, "interleaved_thinking should be removed") } - } else if tt.expectInterleavedThinking != nil { + } else if tt.wantInterleaved != nil { require.NotNil(t, result.ProviderOpts) - val, exists := result.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "interleaved_thinking should exist") - assert.Equal(t, *tt.expectInterleavedThinking, val) - } - - // Verify other ProviderOpts are preserved - for k, v := range originalOtherOpts { - require.NotNil(t, result.ProviderOpts, "ProviderOpts should exist for preserved keys") - assert.Equal(t, v, result.ProviderOpts[k], "other ProviderOpts key %s should be preserved", k) + assert.Equal(t, *tt.wantInterleaved, result.ProviderOpts["interleaved_thinking"]) } }) } } -// TestApplyOverrides_AllProviders tests that thinking override works for all providers. -func TestApplyOverrides_AllProviders(t *testing.T) { +// TestApplyOverrides_DoesNotModifyOriginal verifies that applyOverrides creates +// a proper copy: neither the struct fields, the ProviderOpts map, nor the +// ThinkingBudget pointer of the original config are mutated. +func TestApplyOverrides_DoesNotModifyOriginal(t *testing.T) { t.Parallel() - providers := []struct { - name string - provider string - model string - }{ - {"OpenAI", "openai", "gpt-4o"}, - {"Anthropic", "anthropic", "claude-sonnet-4-0"}, - {"Google", "google", "gemini-2.5-flash"}, - {"Bedrock Claude", "amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0"}, - {"Mistral (alias)", "mistral", "mistral-large-latest"}, - {"xAI (alias)", "xai", "grok-2"}, + original := &latest.ModelConfig{ + Provider: "anthropic", + Model: "claude-sonnet-4-0", + ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + ProviderOpts: map[string]any{"interleaved_thinking": true, "other": "value"}, } - for _, p := range providers { - t.Run(p.name, func(t *testing.T) { - t.Parallel() - - // Create config with thinking budget - config := &latest.ModelConfig{ - Provider: p.provider, - Model: p.model, - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - } + o := options.ModelOptions{} + options.WithThinking(false)(&o) + result := applyOverrides(original, &o) - // Apply override with thinking disabled - mo := options.ModelOptions{} - options.WithThinking(false)(&mo) - result := applyOverrides(config, &mo) - - // Thinking should be cleared for all providers - assert.Nil(t, result.ThinkingBudget, - "ThinkingBudget should be cleared for provider %s", p.provider) - }) - } -} - -// TestDefaultsThenOverrides tests the full flow: defaults applied first, then overrides. -func TestDefaultsThenOverrides(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - config *latest.ModelConfig - thinkingEnabled bool - expectThinkingBudget *latest.ThinkingBudget - }{ - { - name: "OpenAI: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - // No ThinkingBudget set - defaults will apply - }, - thinkingEnabled: false, - expectThinkingBudget: nil, // Override clears the default - }, - { - name: "OpenAI: defaults applied, preserved when enabled", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - }, - thinkingEnabled: true, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, // Default preserved - }, - { - name: "Anthropic: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - { - name: "Anthropic: defaults applied, preserved when enabled", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - }, - thinkingEnabled: true, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - { - name: "Google Gemini 2.5: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - { - name: "Google Gemini 3 Pro: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-pro", - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - { - name: "Bedrock Claude: defaults applied, then cleared by override", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - { - name: "Explicit budget cleared by override", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 32000}, // Explicit - }, - thinkingEnabled: false, - expectThinkingBudget: nil, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() + // Result should have thinking cleared. + assert.Nil(t, result.ThinkingBudget, "result ThinkingBudget should be nil") - // Step 1: Apply defaults (simulating createDirectProvider flow) - result := applyProviderDefaults(tt.config, nil) + // Original ThinkingBudget must be untouched. + require.NotNil(t, original.ThinkingBudget, "original ThinkingBudget must survive") + assert.Equal(t, 8192, original.ThinkingBudget.Tokens) - // Step 2: Apply overrides - mo := options.ModelOptions{} - options.WithThinking(tt.thinkingEnabled)(&mo) - result = applyOverrides(result, &mo) + // Original ProviderOpts map must still have interleaved_thinking. + val, exists := original.ProviderOpts["interleaved_thinking"] + require.True(t, exists, "original ProviderOpts must still contain interleaved_thinking") + assert.Equal(t, true, val) - // Verify result - if tt.expectThinkingBudget == nil { - assert.Nil(t, result.ThinkingBudget, "ThinkingBudget should be nil after override") - } else { - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens) - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort) - } - }) - } + // Other keys must survive in both original and result. + assert.Equal(t, "value", original.ProviderOpts["other"]) + require.NotNil(t, result.ProviderOpts) + assert.Equal(t, "value", result.ProviderOpts["other"]) } -// TestApplyOverrides_NilOpts tests that nil options returns config unchanged. -func TestApplyOverrides_NilOpts(t *testing.T) { +// TestApplyOverrides_DisablePreservesOtherProviderOpts verifies that disabling +// thinking only removes "interleaved_thinking" and leaves other keys intact. +func TestApplyOverrides_DisablePreservesOtherProviderOpts(t *testing.T) { t.Parallel() config := &latest.ModelConfig{ Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"interleaved_thinking": true}, + ProviderOpts: map[string]any{"interleaved_thinking": true, "custom_key": "preserved"}, } - result := applyOverrides(config, nil) + o := options.ModelOptions{} + options.WithThinking(false)(&o) + result := applyOverrides(config, &o) - // Should be unchanged - require.NotNil(t, result.ThinkingBudget) - assert.Equal(t, 8192, result.ThinkingBudget.Tokens) - assert.Equal(t, true, result.ProviderOpts["interleaved_thinking"]) -} + // Thinking should be cleared. + assert.Nil(t, result.ThinkingBudget) -// TestApplyOverrides_DoesNotModifyOriginal tests that applyOverrides creates a copy. -func TestApplyOverrides_DoesNotModifyOriginal(t *testing.T) { - t.Parallel() + // interleaved_thinking should be removed. + _, exists := result.ProviderOpts["interleaved_thinking"] + assert.False(t, exists, "interleaved_thinking should be removed from result") - original := &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"interleaved_thinking": true}, - } - - mo := options.ModelOptions{} - options.WithThinking(false)(&mo) - result := applyOverrides(original, &mo) - - // Original should be unchanged - require.NotNil(t, original.ThinkingBudget, "Original ThinkingBudget should be unchanged") - assert.Equal(t, 8192, original.ThinkingBudget.Tokens) - - // Result should have changes - assert.Nil(t, result.ThinkingBudget, "Result ThinkingBudget should be nil") + // Other keys must survive. + assert.Equal(t, "preserved", result.ProviderOpts["custom_key"]) } -// TestApplyOverrides_RestoresDefaultsFromDisabled tests that using /think when -// the config has thinking explicitly disabled (Tokens=0 or Effort="none") applies -// provider defaults. This is the key behavior that makes /think work when YAML -// starts with thinking_budget: 0 or thinking_budget: none. -// -// Note: applyProviderDefaults now converts disabled thinking (Tokens=0 or Effort="none") -// to nil ThinkingBudget. The /think command (applyOverrides with Thinking=true) then -// applies provider defaults since ThinkingBudget is nil. -func TestApplyOverrides_RestoresDefaultsFromDisabled(t *testing.T) { +// TestDefaultsThenOverrides tests the complete flow: provider defaults → overrides. +func TestDefaultsThenOverrides(t *testing.T) { t.Parallel() tests := []struct { - name string - config *latest.ModelConfig - expectThinkingBudget *latest.ThinkingBudget + name string + config *latest.ModelConfig + thinking bool + wantBudget *latest.ThinkingBudget }{ - { - name: "Anthropic: /think with Tokens=0 applies default 8192", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - { - name: "Anthropic: /think with Effort=none applies default 8192", - config: &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - { - name: "OpenAI: /think with Tokens=0 applies default medium", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "OpenAI: /think with Effort=none applies default medium", - config: &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "Gemini 2.5: /think with Tokens=0 applies default -1 (dynamic)", - config: &latest.ModelConfig{ - Provider: "google", - Model: "gemini-2.5-flash", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: -1}, - }, - { - name: "Bedrock Claude: /think with Tokens=0 applies default 8192", - config: &latest.ModelConfig{ - Provider: "amazon-bedrock", - Model: "anthropic.claude-3-sonnet", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}, - }, - expectThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, + // Disable on models without defaults — already nil, stays nil. + {"gpt-4o /think off", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, false, nil}, + {"anthropic /think off", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, false, nil}, + + // Enable on models without defaults — applies provider defaults. + {"gpt-4o /think on", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, true, &latest.ThinkingBudget{Effort: "medium"}}, + {"anthropic /think on", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, true, &latest.ThinkingBudget{Tokens: 8192}}, + {"gemini-2.5 /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-2.5-flash"}, true, &latest.ThinkingBudget{Tokens: -1}}, + {"gemini-3-pro /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-3-pro"}, true, &latest.ThinkingBudget{Effort: "high"}}, + {"gemini-3-flash /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-3-flash"}, true, &latest.ThinkingBudget{Effort: "medium"}}, + {"bedrock claude /think on", &latest.ModelConfig{Provider: "amazon-bedrock", Model: "anthropic.claude-3-sonnet"}, true, &latest.ThinkingBudget{Tokens: 8192}}, + + // Old Gemini model that doesn't support thinking — /think should be a no-op. + {"gemini-2.0 /think on (no thinking support)", &latest.ModelConfig{Provider: "google", Model: "gemini-2.0-flash"}, true, nil}, + + // Thinking-only model defaults preserved when enabled, cleared when disabled. + {"o3-mini /think on", &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, true, &latest.ThinkingBudget{Effort: "medium"}}, + {"o3-mini /think off", &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, false, nil}, + + // Explicit budget cleared by disable. + {"explicit cleared", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 32000}}, false, nil}, + + // Restore from disabled (thinking_budget: 0) via /think on. + {"restore from 0", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}}, true, &latest.ThinkingBudget{Effort: "medium"}}, + {"restore from none", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}}, true, &latest.ThinkingBudget{Tokens: 8192}}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - // Step 1: Apply provider defaults (simulating createDirectProvider flow) - // This now converts disabled thinking (Tokens=0 or Effort="none") to nil result := applyProviderDefaults(tt.config, nil) - // Verify thinking is disabled (nil) after provider defaults - assert.Nil(t, result.ThinkingBudget, - "ThinkingBudget should be nil after applyProviderDefaults when explicitly disabled") + o := options.ModelOptions{} + options.WithThinking(tt.thinking)(&o) + result = applyOverrides(result, &o) - // Step 2: Apply override with thinking explicitly enabled (simulates /think toggle) - mo := options.ModelOptions{} - options.WithThinking(true)(&mo) - result = applyOverrides(result, &mo) - - // Verify defaults were applied - /think enables thinking with provider defaults - require.NotNil(t, result.ThinkingBudget, "ThinkingBudget should be set after /think") - assert.Equal(t, tt.expectThinkingBudget.Tokens, result.ThinkingBudget.Tokens, "Tokens should match default") - assert.Equal(t, tt.expectThinkingBudget.Effort, result.ThinkingBudget.Effort, "Effort should match default") + if tt.wantBudget == nil { + assert.Nil(t, result.ThinkingBudget) + } else { + require.NotNil(t, result.ThinkingBudget) + assert.Equal(t, *tt.wantBudget, *result.ThinkingBudget) + } }) } } -// TestIsThinkingBudgetDisabled tests the helper function. -func TestIsThinkingBudgetDisabled(t *testing.T) { +// TestApplyProviderDefaults_DoesNotModifyOriginal verifies that applyProviderDefaults +// does not mutate the input config's ProviderOpts map. +func TestApplyProviderDefaults_DoesNotModifyOriginal(t *testing.T) { t.Parallel() - tests := []struct { - name string - budget *latest.ThinkingBudget - expected bool - }{ - {"nil budget", nil, false}, - {"Tokens=0", &latest.ThinkingBudget{Tokens: 0}, true}, - {"Effort=none", &latest.ThinkingBudget{Effort: "none"}, true}, - {"Tokens=8192", &latest.ThinkingBudget{Tokens: 8192}, false}, - {"Effort=medium", &latest.ThinkingBudget{Effort: "medium"}, false}, - {"Tokens=-1 (dynamic)", &latest.ThinkingBudget{Tokens: -1}, false}, + original := &latest.ModelConfig{ + Provider: "anthropic", + Model: "claude-sonnet-4-0", + ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + ProviderOpts: map[string]any{"custom_key": "original_value"}, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - assert.Equal(t, tt.expected, tt.budget.IsDisabled()) - }) - } + result := applyProviderDefaults(original, nil) + + // Result should have interleaved_thinking set (because thinking_budget is set). + require.NotNil(t, result.ProviderOpts) + assert.Equal(t, true, result.ProviderOpts["interleaved_thinking"]) + + // Original must NOT have interleaved_thinking added. + _, exists := original.ProviderOpts["interleaved_thinking"] + assert.False(t, exists, "original ProviderOpts must not be mutated by applyProviderDefaults") + + // Original custom key must still be there. + assert.Equal(t, "original_value", original.ProviderOpts["custom_key"]) } diff --git a/pkg/model/provider/provider.go b/pkg/model/provider/provider.go index f43d49c1e..50c1ff00c 100644 --- a/pkg/model/provider/provider.go +++ b/pkg/model/provider/provider.go @@ -228,84 +228,61 @@ func createDirectProvider(ctx context.Context, cfg *latest.ModelConfig, env envi // Apply defaults from custom providers (from config) or built-in aliases enhancedCfg := applyProviderDefaults(cfg, globalOptions.Providers()) - if thinking := globalOptions.Thinking(); thinking != nil && !*thinking { - enhancedCfg.ThinkingBudget = nil - - // with thinking explicitly disabled, also remove the interleaved_thinking provider option - if enhancedCfg.ProviderOpts != nil { - // Copy to avoid mutating shared ProviderOpts in the original config - optsCopy := make(map[string]any, len(enhancedCfg.ProviderOpts)) - maps.Copy(optsCopy, enhancedCfg.ProviderOpts) - delete(optsCopy, "interleaved_thinking") - enhancedCfg.ProviderOpts = optsCopy - } - } - // Apply overrides (e.g., disable thinking if requested by session) + // Apply overrides (e.g., disable/enable thinking via /think command) enhancedCfg = applyOverrides(enhancedCfg, &globalOptions) - // Resolve the provider type with priority: - // 1. cfg.ProviderOpts["api_type"] (from custom provider or model override) - // 2. built-in alias APIType - // 3. provider name itself - providerType := resolveProviderTypeFromConfig(enhancedCfg) + providerType := resolveProviderType(enhancedCfg) switch providerType { case "openai", "openai_chatcompletions", "openai_responses": return openai.NewClient(ctx, enhancedCfg, env, opts...) - case "anthropic": return anthropic.NewClient(ctx, enhancedCfg, env, opts...) - case "google": return gemini.NewClient(ctx, enhancedCfg, env, opts...) - case "dmr": return dmr.NewClient(ctx, enhancedCfg, opts...) - case "amazon-bedrock": return bedrock.NewClient(ctx, enhancedCfg, env, opts...) - default: slog.Error("Unknown provider type", "type", providerType) return nil, fmt.Errorf("unknown provider type: %s", providerType) } } -// resolveProviderTypeFromConfig determines the provider type to use based on config. -// Priority: -// 1. cfg.ProviderOpts["api_type"] (from custom provider or model-level override) -// 2. built-in alias APIType (e.g., "mistral" -> "openai") -// 3. provider name itself (e.g., "openai", "anthropic") -func resolveProviderTypeFromConfig(cfg *latest.ModelConfig) string { - // Check for api_type in ProviderOpts (set by custom providers or model override) +// --------------------------------------------------------------------------- +// Provider-type resolution +// --------------------------------------------------------------------------- + +// resolveProviderType determines the effective API type for a config. +// Priority: ProviderOpts["api_type"] > built-in alias > provider name. +func resolveProviderType(cfg *latest.ModelConfig) string { if cfg.ProviderOpts != nil { if apiType, ok := cfg.ProviderOpts["api_type"].(string); ok && apiType != "" { - slog.Debug("Using api_type from provider config", - "provider", cfg.Provider, - "model", cfg.Model, - "api_type", apiType, - "base_url", cfg.BaseURL, - ) return apiType } } - - // Check built-in alias if alias, exists := Aliases[cfg.Provider]; exists && alias.APIType != "" { return alias.APIType } - - // Fall back to provider name return cfg.Provider } +// --------------------------------------------------------------------------- +// Provider defaults +// --------------------------------------------------------------------------- + // applyProviderDefaults applies default configuration from custom providers or built-in aliases. // Custom providers (from config) take precedence over built-in aliases. // This sets default base URLs, token keys, api_type, and model-specific defaults (like thinking budget). +// +// The returned config is a deep-enough copy: the caller's ModelConfig, ProviderOpts map, +// and ThinkingBudget pointer are never mutated. func applyProviderDefaults(cfg *latest.ModelConfig, customProviders map[string]latest.ProviderConfig) *latest.ModelConfig { - // Create a copy to avoid modifying the original - enhancedCfg := *cfg + // Create a copy to avoid modifying the original. + // cloneModelConfig also deep-copies ProviderOpts so writes are safe. + enhancedCfg := cloneModelConfig(cfg) if customProviders != nil { if providerCfg, exists := customProviders[cfg.Provider]; exists { @@ -334,8 +311,8 @@ func applyProviderDefaults(cfg *latest.ModelConfig, customProviders map[string]l enhancedCfg.ProviderOpts["api_type"] = apiType } - applyModelDefaults(&enhancedCfg) - return &enhancedCfg + applyModelDefaults(enhancedCfg) + return enhancedCfg } } @@ -352,184 +329,176 @@ func applyProviderDefaults(cfg *latest.ModelConfig, customProviders map[string]l } // Apply model-specific defaults (e.g., thinking budget for Claude/GPT models) - applyModelDefaults(&enhancedCfg) - return &enhancedCfg + applyModelDefaults(enhancedCfg) + return enhancedCfg +} + +// --------------------------------------------------------------------------- +// Thinking defaults and overrides +// --------------------------------------------------------------------------- + +// applyModelDefaults applies provider-specific default values for model configuration. +// +// Thinking defaults policy: +// - thinking_budget: 0 or thinking_budget: none → thinking is off (nil). +// - thinking_budget explicitly set to a real value → kept as-is; interleaved_thinking +// is auto-enabled for Anthropic/Bedrock-Claude. +// - thinking_budget NOT set: +// - Thinking-only models (OpenAI o-series) get "medium". +// - All other models get no thinking. +// +// NOTE: max_tokens is NOT set here; see teamloader and runtime/model_switcher. +func applyModelDefaults(cfg *latest.ModelConfig) { + // Explicitly disabled → normalise to nil so providers never see it. + if cfg.ThinkingBudget.IsDisabled() { + cfg.ThinkingBudget = nil + slog.Debug("Thinking explicitly disabled", + "provider", cfg.Provider, "model", cfg.Model) + return + } + + providerType := resolveProviderType(cfg) + + // User already set a real thinking_budget — just apply side-effects. + if cfg.ThinkingBudget != nil { + ensureInterleavedThinking(cfg, providerType) + return + } + + // No thinking_budget configured — only thinking-only models get a default. + switch providerType { + case "openai", "openai_chatcompletions", "openai_responses": + if isOpenAIThinkingOnlyModel(cfg.Model) { + cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} + slog.Debug("Applied default thinking for thinking-only OpenAI model", + "provider", cfg.Provider, "model", cfg.Model) + } + } } -// applyOverrides applies session-level or request-level overrides to the configuration. -// This is called AFTER defaults are applied, allowing overrides to clear or modify default values. +// applyOverrides applies session-level overrides to the configuration (e.g. /think toggle). +// The returned config never shares mutable state with the input. func applyOverrides(cfg *latest.ModelConfig, opts *options.ModelOptions) *latest.ModelConfig { if opts == nil { return cfg } - // Create a copy to avoid modifying the original - enhancedCfg := *cfg - t := opts.Thinking() if t == nil { - return &enhancedCfg + return cfg } - // If thinking is explicitly disabled (e.g., via /think command), clear thinking configuration + enhancedCfg := cloneModelConfig(cfg) + + // /think OFF — clear everything. if !*t { enhancedCfg.ThinkingBudget = nil - if enhancedCfg.ProviderOpts != nil { - delete(enhancedCfg.ProviderOpts, "interleaved_thinking") - } - slog.Debug("Override: thinking disabled - cleared thinking configuration", - "provider", cfg.Provider, - "model", cfg.Model, - ) - return &enhancedCfg + delete(enhancedCfg.ProviderOpts, "interleaved_thinking") + slog.Debug("Override: thinking disabled", + "provider", cfg.Provider, "model", cfg.Model) + return enhancedCfg } - // If thinking is explicitly enabled (e.g., via /think command), ensure thinking is configured. - // This handles two cases: - // 1. ThinkingBudget is nil (not configured) - apply defaults to enable thinking - // 2. ThinkingBudget is explicitly disabled (Tokens == 0 or Effort == "none") - clear and re-apply defaults - // This allows /think to enable thinking with provider defaults even when config had thinking_budget: 0 + // /think ON — make sure there is a sensible budget. if enhancedCfg.ThinkingBudget == nil || enhancedCfg.ThinkingBudget.IsDisabled() { enhancedCfg.ThinkingBudget = nil - applyModelDefaults(&enhancedCfg) - slog.Debug("Override: thinking enabled - applied default thinking configuration", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", enhancedCfg.ThinkingBudget, - ) + setThinkingDefaults(enhancedCfg) + slog.Debug("Override: thinking enabled with defaults", + "provider", cfg.Provider, "model", cfg.Model, + "thinking_budget", enhancedCfg.ThinkingBudget) } - return &enhancedCfg + return enhancedCfg } -// applyModelDefaults applies provider-specific default values for model configuration. -// These defaults are applied only if the user hasn't explicitly set the values. -// -// NOTE: max_tokens is NOT set here because: -// 1. Different providers read it differently (ModelConfig vs ModelOptions) -// 2. Runtime can do modelsdev lookups for model-specific limits -// 3. Providers have their own fallbacks (e.g., Anthropic defaults to 8192) -// max_tokens defaults are handled in teamloader and runtime/model_switcher via options. -// -// Config-level defaults (set here): -// - OpenAI: thinking_budget = "medium" -// - Anthropic: thinking_budget = 8192, interleaved_thinking = true -// - Google: Gemini 2.5 → thinking_budget = -1 (dynamic), Gemini 3 Pro → "high", Gemini 3 Flash → "medium" -// - Amazon Bedrock (Claude models only): thinking_budget = 8192, interleaved_thinking = true -func applyModelDefaults(cfg *latest.ModelConfig) { - // If thinking is explicitly disabled (thinking_budget: 0 or thinking_budget: none), - // set ThinkingBudget to nil to completely disable thinking. - // This ensures no thinking config is sent to the provider. - if cfg.ThinkingBudget.IsDisabled() { - cfg.ThinkingBudget = nil - slog.Debug("Thinking explicitly disabled via thinking_budget: 0 or none", - "provider", cfg.Provider, - "model", cfg.Model, - ) - return // Don't apply any provider defaults for thinking - } - - // Resolve the actual provider type (handling aliases like mistral -> openai) - providerType := cfg.Provider - if alias, exists := Aliases[cfg.Provider]; exists && alias.APIType != "" { - providerType = alias.APIType - } - // Also check for api_type override in ProviderOpts - if cfg.ProviderOpts != nil { - if apiType, ok := cfg.ProviderOpts["api_type"].(string); ok && apiType != "" { - providerType = apiType - } - } +// setThinkingDefaults assigns a sensible default thinking budget for /think ON. +// Unlike applyModelDefaults this applies to every provider (not just thinking-only models) +// because the user explicitly asked for thinking. +func setThinkingDefaults(cfg *latest.ModelConfig) { + providerType := resolveProviderType(cfg) switch providerType { case "openai", "openai_chatcompletions", "openai_responses": - applyOpenAIDefaults(cfg) + cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} case "anthropic": - applyAnthropicDefaults(cfg) + cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} + ensureInterleavedThinking(cfg, providerType) case "google": - applyGoogleDefaults(cfg) + cfg.ThinkingBudget = defaultGoogleThinkingBudget(cfg.Model) case "amazon-bedrock": - applyBedrockDefaults(cfg) + if isBedrockClaudeModel(cfg.Model) { + cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} + ensureInterleavedThinking(cfg, providerType) + } } } -// applyOpenAIDefaults applies default configuration for OpenAI models. -func applyOpenAIDefaults(cfg *latest.ModelConfig) { - // Default thinking_budget to "medium" if not set - if cfg.ThinkingBudget == nil { - cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} - slog.Debug("Applied default thinking_budget for OpenAI", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", "medium", - ) +// defaultGoogleThinkingBudget returns a sensible thinking budget for a Google model. +// Returns nil for models that don't have a known thinking capability. +func defaultGoogleThinkingBudget(model string) *latest.ThinkingBudget { + m := strings.ToLower(model) + switch { + case strings.HasPrefix(m, "gemini-2.5-"): + return &latest.ThinkingBudget{Tokens: -1} + case isGeminiProModel(m): + return &latest.ThinkingBudget{Effort: "high"} + case isGeminiFlashModel(m): + return &latest.ThinkingBudget{Effort: "medium"} + default: + // Unknown or older Gemini models (e.g. gemini-2.0-*): don't enable + // thinking since the API may reject it. + return nil } } -// applyAnthropicDefaults applies default configuration for Anthropic models. -func applyAnthropicDefaults(cfg *latest.ModelConfig) { - // Default thinking_budget to 8192 tokens if not set - if cfg.ThinkingBudget == nil { - cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} - slog.Debug("Applied default thinking_budget for Anthropic", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", 8192, - ) +// --------------------------------------------------------------------------- +// Shared helpers +// --------------------------------------------------------------------------- + +// cloneModelConfig returns a shallow copy of cfg with a deep copy of +// ProviderOpts so that callers can safely mutate the returned config's +// map and pointer fields without affecting the original. +func cloneModelConfig(cfg *latest.ModelConfig) *latest.ModelConfig { + c := *cfg + if cfg.ProviderOpts != nil { + c.ProviderOpts = make(map[string]any, len(cfg.ProviderOpts)) + maps.Copy(c.ProviderOpts, cfg.ProviderOpts) } + return &c +} - // Default interleaved_thinking to true if not set +// ensureInterleavedThinking sets interleaved_thinking=true in ProviderOpts +// for Anthropic and Bedrock-Claude models, unless the user already set it. +func ensureInterleavedThinking(cfg *latest.ModelConfig, providerType string) { + needsInterleaved := providerType == "anthropic" || + (providerType == "amazon-bedrock" && isBedrockClaudeModel(cfg.Model)) + if !needsInterleaved { + return + } if cfg.ProviderOpts == nil { cfg.ProviderOpts = make(map[string]any) } if _, has := cfg.ProviderOpts["interleaved_thinking"]; !has { cfg.ProviderOpts["interleaved_thinking"] = true - slog.Debug("Applied default interleaved_thinking for Anthropic", - "provider", cfg.Provider, - "model", cfg.Model, - "interleaved_thinking", true, - ) + slog.Debug("Auto-enabled interleaved_thinking", + "provider", cfg.Provider, "model", cfg.Model) } } -// applyGoogleDefaults applies default configuration for Google Gemini models. -// - Gemini 2.5 models: thinking_budget = -1 (dynamic thinking) -// - Gemini 3+ Pro models: thinking_budget effort = "high" -// - Gemini 3+ Flash models: thinking_budget effort = "medium" -func applyGoogleDefaults(cfg *latest.ModelConfig) { - if cfg.ThinkingBudget != nil { - return // User explicitly set thinking_budget - } - - model := strings.ToLower(cfg.Model) +// isOpenAIThinkingOnlyModel returns true for OpenAI models that require thinking +// to function properly (o-series reasoning models). +func isOpenAIThinkingOnlyModel(model string) bool { + m := strings.ToLower(model) + return strings.HasPrefix(m, "o1") || + strings.HasPrefix(m, "o3") || + strings.HasPrefix(m, "o4") +} - switch { - case strings.HasPrefix(model, "gemini-2.5-"): - // Gemini 2.5 models use token-based thinking budget (-1 = dynamic) - cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: -1} - slog.Debug("Applied default thinking_budget for Google Gemini 2.5", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", -1, - ) - case isGeminiProModel(model): - // Gemini 3+ Pro models use level-based thinking (high) - cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "high"} - slog.Debug("Applied default thinking_budget for Google Gemini 3+ Pro", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", "high", - ) - case isGeminiFlashModel(model): - // Gemini 3+ Flash models use level-based thinking (medium) - cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} - slog.Debug("Applied default thinking_budget for Google Gemini 3+ Flash", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", "medium", - ) - } - // For other Gemini models (e.g., gemini-2.0-*), leave unchanged +// isBedrockClaudeModel returns true if the model ID is a Claude model on Bedrock. +// Claude model IDs on Bedrock start with "anthropic.claude-" or "global.anthropic.claude-". +func isBedrockClaudeModel(model string) bool { + m := strings.ToLower(model) + return strings.HasPrefix(m, "anthropic.claude-") || strings.HasPrefix(m, "global.anthropic.claude-") } // gemini3Family extracts the model family (e.g. "pro", "flash") from a @@ -568,42 +537,3 @@ func isGeminiProModel(model string) bool { func isGeminiFlashModel(model string) bool { return strings.HasPrefix(gemini3Family(model), "flash") } - -// applyBedrockDefaults applies default configuration for Amazon Bedrock models. -// Only applies to Claude models (anthropic.claude-* or global.anthropic.claude-*). -func applyBedrockDefaults(cfg *latest.ModelConfig) { - // Only apply defaults for Claude models on Bedrock - if !isBedrockClaudeModel(cfg.Model) { - return - } - - // Default thinking_budget to 8192 tokens if not set - if cfg.ThinkingBudget == nil { - cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} - slog.Debug("Applied default thinking_budget for Bedrock Claude", - "provider", cfg.Provider, - "model", cfg.Model, - "thinking_budget", 8192, - ) - } - - // Default interleaved_thinking to true if not set - if cfg.ProviderOpts == nil { - cfg.ProviderOpts = make(map[string]any) - } - if _, has := cfg.ProviderOpts["interleaved_thinking"]; !has { - cfg.ProviderOpts["interleaved_thinking"] = true - slog.Debug("Applied default interleaved_thinking for Bedrock Claude", - "provider", cfg.Provider, - "model", cfg.Model, - "interleaved_thinking", true, - ) - } -} - -// isBedrockClaudeModel returns true if the model ID is a Claude model on Bedrock. -// Claude model IDs on Bedrock start with "anthropic.claude-" or "global.anthropic.claude-". -func isBedrockClaudeModel(model string) bool { - m := strings.ToLower(model) - return strings.HasPrefix(m, "anthropic.claude-") || strings.HasPrefix(m, "global.anthropic.claude-") -}