From cb75233c77460218bcbbf57c38bc86ef215db52c Mon Sep 17 00:00:00 2001 From: David Gageot Date: Fri, 6 Feb 2026 12:42:13 +0100 Subject: [PATCH] Add debug title command for session title generation Add a 'cagent debug title ' command that generates a session title using the same sessiontitle.Generator code path as the TUI, making it easy to debug title generation behavior. Assisted-By: cagent --- cmd/root/debug.go | 99 +++++++++++++++---- e2e/cagent_debug_test.go | 30 +----- e2e/cagent_debug_title_test.go | 41 ++++++++ e2e/cagent_exec_test.go | 64 +++--------- e2e/helpers_test.go | 48 +++++++++ .../cassettes/TestDebug_Title/Anthropic.yaml | 50 ++++++++++ .../TestDebug_Title/Anthropic_Opus46.yaml | 53 ++++++++++ .../TestDebug_Title/Anthropic_Sonnet45.yaml | 47 +++++++++ .../Google_Gemini25FlashLite.yaml | 27 +++++ .../Google_Gemini3ProPreview.yaml | 27 +++++ .../cassettes/TestDebug_Title/OpenAI.yaml | 41 ++++++++ .../TestDebug_Title/OpenAI_gpt52codex.yaml | 65 ++++++++++++ .../TestDebug_Title/OpenAI_gpt52pro.yaml | 50 ++++++++++ pkg/model/provider/gemini/client.go | 38 +++++-- pkg/model/provider/gemini/client_test.go | 53 ++++++---- pkg/model/provider/openai/client.go | 16 ++- 16 files changed, 624 insertions(+), 125 deletions(-) create mode 100644 e2e/cagent_debug_title_test.go create mode 100644 e2e/helpers_test.go create mode 100644 e2e/testdata/cassettes/TestDebug_Title/Anthropic.yaml create mode 100644 e2e/testdata/cassettes/TestDebug_Title/Anthropic_Opus46.yaml create mode 100644 e2e/testdata/cassettes/TestDebug_Title/Anthropic_Sonnet45.yaml create mode 100644 e2e/testdata/cassettes/TestDebug_Title/Google_Gemini25FlashLite.yaml create mode 100644 e2e/testdata/cassettes/TestDebug_Title/Google_Gemini3ProPreview.yaml create mode 100644 e2e/testdata/cassettes/TestDebug_Title/OpenAI.yaml create mode 100644 e2e/testdata/cassettes/TestDebug_Title/OpenAI_gpt52codex.yaml create mode 100644 e2e/testdata/cassettes/TestDebug_Title/OpenAI_gpt52pro.yaml diff --git a/cmd/root/debug.go b/cmd/root/debug.go index 26f73e0ed..ea9994a1a 100644 --- a/cmd/root/debug.go +++ b/cmd/root/debug.go @@ -1,6 +1,8 @@ package root import ( + "context" + "fmt" "log/slog" "github.com/goccy/go-yaml" @@ -8,12 +10,15 @@ import ( "github.com/docker/cagent/pkg/cli" "github.com/docker/cagent/pkg/config" + "github.com/docker/cagent/pkg/sessiontitle" + "github.com/docker/cagent/pkg/team" "github.com/docker/cagent/pkg/teamloader" "github.com/docker/cagent/pkg/telemetry" ) type debugFlags struct { - runConfig config.RuntimeConfig + modelOverrides []string + runConfig config.RuntimeConfig } func newDebugCmd() *cobra.Command { @@ -37,24 +42,56 @@ func newDebugCmd() *cobra.Command { Args: cobra.ExactArgs(1), RunE: flags.runDebugToolsetsCommand, }) + titleCmd := &cobra.Command{ + Use: "title | ", + Short: "Generate a session title from a question", + Args: cobra.ExactArgs(2), + RunE: flags.runDebugTitleCommand, + } + titleCmd.Flags().StringArrayVar(&flags.modelOverrides, "model", nil, "Override agent model: [agent=]provider/model (repeatable)") + cmd.AddCommand(titleCmd) addRuntimeConfigFlags(cmd, &flags.runConfig) return cmd } +// resolveSource resolves an agent file reference to a config source. +func (f *debugFlags) resolveSource(agentFilename string) (config.Source, error) { + return config.Resolve(agentFilename, f.runConfig.EnvProvider()) +} + +// loadTeam loads an agent team from the given agent file and returns +// a cleanup function that must be deferred by the caller. +func (f *debugFlags) loadTeam(ctx context.Context, agentFilename string, opts ...teamloader.Opt) (*team.Team, func(), error) { + agentSource, err := f.resolveSource(agentFilename) + if err != nil { + return nil, nil, err + } + + t, err := teamloader.Load(ctx, agentSource, &f.runConfig, opts...) + if err != nil { + return nil, nil, err + } + + cleanup := func() { + if err := t.StopToolSets(ctx); err != nil { + slog.Error("Failed to stop tool sets", "error", err) + } + } + + return t, cleanup, nil +} + func (f *debugFlags) runDebugConfigCommand(cmd *cobra.Command, args []string) error { telemetry.TrackCommand("debug", append([]string{"config"}, args...)) - ctx := cmd.Context() - agentFilename := args[0] - - agentSource, err := config.Resolve(agentFilename, f.runConfig.EnvProvider()) + agentSource, err := f.resolveSource(args[0]) if err != nil { return err } - cfg, err := config.Load(ctx, agentSource) + cfg, err := config.Load(cmd.Context(), agentSource) if err != nil { return err } @@ -66,21 +103,17 @@ func (f *debugFlags) runDebugToolsetsCommand(cmd *cobra.Command, args []string) telemetry.TrackCommand("debug", append([]string{"toolsets"}, args...)) ctx := cmd.Context() - agentFilename := args[0] - out := cli.NewPrinter(cmd.OutOrStdout()) - agentSource, err := config.Resolve(agentFilename, f.runConfig.EnvProvider()) + t, cleanup, err := f.loadTeam(ctx, args[0]) if err != nil { return err } + defer cleanup() - team, err := teamloader.Load(ctx, agentSource, &f.runConfig) - if err != nil { - return err - } + out := cli.NewPrinter(cmd.OutOrStdout()) - for _, name := range team.AgentNames() { - agent, err := team.Agent(name) + for _, name := range t.AgentNames() { + agent, err := t.Agent(name) if err != nil { slog.Error("Failed to get agent", "name", name, "error", err) continue @@ -103,9 +136,39 @@ func (f *debugFlags) runDebugToolsetsCommand(cmd *cobra.Command, args []string) } } - if err := team.StopToolSets(ctx); err != nil { - slog.Error("Failed to stop tool sets", "error", err) + return nil +} + +func (f *debugFlags) runDebugTitleCommand(cmd *cobra.Command, args []string) error { + telemetry.TrackCommand("debug", append([]string{"title"}, args...)) + + ctx := cmd.Context() + + t, cleanup, err := f.loadTeam(ctx, args[0], teamloader.WithModelOverrides(f.modelOverrides)) + if err != nil { + return err } + defer cleanup() + + agent, err := t.DefaultAgent() + if err != nil { + return err + } + + model := agent.Model() + if model == nil { + return fmt.Errorf("agent %q has no model configured", agent.Name()) + } + + // Use the same title generation code path as the TUI (see runTUI in new.go) + gen := sessiontitle.New(model, agent.FallbackModels()...) + + title, err := gen.Generate(ctx, "debug", []string{args[1]}) + if err != nil { + return fmt.Errorf("generating title: %w", err) + } + + fmt.Fprintln(cmd.OutOrStdout(), title) - return err + return nil } diff --git a/e2e/cagent_debug_test.go b/e2e/cagent_debug_test.go index 0c73d5ef5..20a229337 100644 --- a/e2e/cagent_debug_test.go +++ b/e2e/cagent_debug_test.go @@ -1,21 +1,15 @@ package e2e_test import ( - "bytes" - "io" - "os" - "path/filepath" "testing" "github.com/stretchr/testify/require" - - "github.com/docker/cagent/cmd/root" ) func TestDebug_Toolsets_None(t *testing.T) { t.Parallel() - output := cagentDebug(t, "toolsets", "testdata/no_tools.yaml") + output := cagent(t, "debug", "toolsets", "testdata/no_tools.yaml") require.Equal(t, "No tools for root\n", output) } @@ -23,27 +17,7 @@ func TestDebug_Toolsets_None(t *testing.T) { func TestDebug_Toolsets_Todo(t *testing.T) { t.Parallel() - output := cagentDebug(t, "toolsets", "testdata/todo_tools.yaml") + output := cagent(t, "debug", "toolsets", "testdata/todo_tools.yaml") require.Equal(t, "2 tool(s) for root:\n + create_todo - Create a new todo item with a description\n + list_todos - List all current todos with their status\n", output) } - -func cagentDebug(t *testing.T, moreArgs ...string) string { - t.Helper() - - // `cagent debug ...` - args := []string{"debug"} - - // Use .env file to set DUMMY OPENAI key - dotEnv := filepath.Join(t.TempDir(), ".env") - err := os.WriteFile(dotEnv, []byte("OPENAI_API_KEY=DUMMY"), 0o644) - require.NoError(t, err) - args = append(args, "--env-from-file", dotEnv) - - // Run cagent debug - var stdout bytes.Buffer - err = root.Execute(t.Context(), nil, &stdout, io.Discard, append(args, moreArgs...)...) - require.NoError(t, err) - - return stdout.String() -} diff --git a/e2e/cagent_debug_title_test.go b/e2e/cagent_debug_title_test.go new file mode 100644 index 000000000..1e7a01669 --- /dev/null +++ b/e2e/cagent_debug_title_test.go @@ -0,0 +1,41 @@ +package e2e_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDebug_Title(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + model string + want string + }{ + // OpenAI + {"OpenAI", "openai/gpt-4o", "Exploring AI Capabilities\n"}, + {"OpenAI_gpt52pro", "openai/gpt-5.2-pro", "Assistant Capabilities Overview\n"}, + {"OpenAI_gpt52codex", "openai/gpt-5.2-codex", "AI Assistant Capabilities\n"}, + + // Anthopic + {"Anthropic", "anthropic/claude-haiku-4-5", "AI Assistant Capabilities Overview\n"}, + {"Anthropic_Sonnet45", "anthropic/claude-sonnet-4-5", "What can you do?\n"}, + {"Anthropic_Opus46", "anthropic/claude-opus-4-6", "AI Assistant Capabilities Overview\n"}, + + // Google + {"Google_Gemini25FlashLite", "google/gemini-2.5-flash-lite", "AI Capabilities Overview\n"}, + {"Google_Gemini3ProPreview", "google/gemini-3-pro-preview", "AI Capabilities Inquiry\n"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + title := cagent(t, "debug", "title", "testdata/basic.yaml", "--model="+tt.model, "What can you do?") + + assert.Equal(t, tt.want, title) + }) + } +} diff --git a/e2e/cagent_exec_test.go b/e2e/cagent_exec_test.go index ba3263506..b93014e05 100644 --- a/e2e/cagent_exec_test.go +++ b/e2e/cagent_exec_test.go @@ -1,19 +1,13 @@ package e2e_test import ( - "bytes" - "io" - "os" - "path/filepath" "testing" "github.com/stretchr/testify/require" - - "github.com/docker/cagent/cmd/root" ) func TestExec_OpenAI(t *testing.T) { - out := cagentExec(t, "testdata/basic.yaml", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic.yaml", "What's 2+2?") require.Equal(t, "\n--- Agent: root ---\n2 + 2 equals 4.", out) } @@ -21,7 +15,7 @@ func TestExec_OpenAI(t *testing.T) { // TestExec_OpenAI_V3Config tests that v3 configs work correctly with thinking disabled by default. // This uses gpt-5 with a v3 config file to verify thinking is disabled for old config versions. func TestExec_OpenAI_V3Config(t *testing.T) { - out := cagentExec(t, "testdata/basic_v3.yaml", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic_v3.yaml", "What's 2+2?") // v3 config with gpt-5 should work correctly (thinking disabled by default for old configs) require.Equal(t, "\n--- Agent: root ---\n4", out) @@ -30,7 +24,7 @@ func TestExec_OpenAI_V3Config(t *testing.T) { // TestExec_OpenAI_WithThinkingBudget tests that when thinking_budget is explicitly configured // in the YAML, thinking is enabled by default (without needing /think command). func TestExec_OpenAI_WithThinkingBudget(t *testing.T) { - out := cagentExec(t, "testdata/basic_with_thinking.yaml", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic_with_thinking.yaml", "What's 2+2?") // With thinking_budget explicitly configured, response should include reasoning // The output format includes the reasoning summary when thinking is enabled @@ -39,19 +33,19 @@ func TestExec_OpenAI_WithThinkingBudget(t *testing.T) { } func TestExec_OpenAI_ToolCall(t *testing.T) { - out := cagentExec(t, "testdata/fs_tools.yaml", "How many files in testdata/working_dir? Only output the number.") + out := cagent(t, "exec", "testdata/fs_tools.yaml", "How many files in testdata/working_dir? Only output the number.") require.Equal(t, "\n--- Agent: root ---\n\nCalling list_directory(path: \"testdata/working_dir\")\n\nlist_directory response → \"FILE README.me\\n\"\n1", out) } func TestExec_OpenAI_HideToolCalls(t *testing.T) { - out := cagentExec(t, "testdata/fs_tools.yaml", "--hide-tool-calls", "How many files in testdata/working_dir? Only output the number.") + out := cagent(t, "exec", "testdata/fs_tools.yaml", "--hide-tool-calls", "How many files in testdata/working_dir? Only output the number.") require.Equal(t, "\n--- Agent: root ---\n1", out) } func TestExec_OpenAI_gpt5(t *testing.T) { - out := cagentExec(t, "testdata/basic.yaml", "--model=openai/gpt-5", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic.yaml", "--model=openai/gpt-5", "What's 2+2?") // With thinking enabled by default, response may include reasoning summary require.Contains(t, out, "--- Agent: root ---") @@ -59,13 +53,13 @@ func TestExec_OpenAI_gpt5(t *testing.T) { } func TestExec_OpenAI_gpt5_1(t *testing.T) { - out := cagentExec(t, "testdata/basic.yaml", "--model=openai/gpt-5.1", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic.yaml", "--model=openai/gpt-5.1", "What's 2+2?") require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4.", out) } func TestExec_OpenAI_gpt5_codex(t *testing.T) { - out := cagentExec(t, "testdata/basic.yaml", "--model=openai/gpt-5-codex", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic.yaml", "--model=openai/gpt-5-codex", "What's 2+2?") // Model reasoning summary varies, just check for the core response require.Contains(t, out, "--- Agent: root ---") @@ -73,7 +67,7 @@ func TestExec_OpenAI_gpt5_codex(t *testing.T) { } func TestExec_Anthropic(t *testing.T) { - out := cagentExec(t, "testdata/basic.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?") // With interleaved thinking enabled by default, Anthropic responses include thinking content require.Contains(t, out, "--- Agent: root ---") @@ -81,7 +75,7 @@ func TestExec_Anthropic(t *testing.T) { } func TestExec_Anthropic_ToolCall(t *testing.T) { - out := cagentExec(t, "testdata/fs_tools.yaml", "--model=anthropic/claude-sonnet-4-0", "How many files in testdata/working_dir? Only output the number.") + out := cagent(t, "exec", "testdata/fs_tools.yaml", "--model=anthropic/claude-sonnet-4-0", "How many files in testdata/working_dir? Only output the number.") // With interleaved thinking enabled by default, Anthropic responses include thinking content require.Contains(t, out, "--- Agent: root ---") @@ -92,7 +86,7 @@ func TestExec_Anthropic_ToolCall(t *testing.T) { } func TestExec_Anthropic_AgentsMd(t *testing.T) { - out := cagentExec(t, "testdata/agents-md.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?") + out := cagent(t, "exec", "testdata/agents-md.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?") // With interleaved thinking enabled by default, Anthropic responses include thinking content require.Contains(t, out, "--- Agent: root ---") @@ -100,7 +94,7 @@ func TestExec_Anthropic_AgentsMd(t *testing.T) { } func TestExec_Gemini(t *testing.T) { - out := cagentExec(t, "testdata/basic.yaml", "--model=google/gemini-2.5-flash", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic.yaml", "--model=google/gemini-2.5-flash", "What's 2+2?") // With thinking enabled by default (dynamic thinking for Gemini 2.5), responses may include thinking content require.Contains(t, out, "--- Agent: root ---") @@ -109,7 +103,7 @@ func TestExec_Gemini(t *testing.T) { } func TestExec_Gemini_ToolCall(t *testing.T) { - out := cagentExec(t, "testdata/fs_tools.yaml", "--model=google/gemini-2.5-flash", "How many files in testdata/working_dir? Only output the number.") + out := cagent(t, "exec", "testdata/fs_tools.yaml", "--model=google/gemini-2.5-flash", "How many files in testdata/working_dir? Only output the number.") // With thinking enabled by default (dynamic thinking for Gemini 2.5), responses include thinking content require.Contains(t, out, "--- Agent: root ---") @@ -120,13 +114,13 @@ func TestExec_Gemini_ToolCall(t *testing.T) { } func TestExec_Mistral(t *testing.T) { - out := cagentExec(t, "testdata/basic.yaml", "--model=mistral/mistral-small", "What's 2+2?") + out := cagent(t, "exec", "testdata/basic.yaml", "--model=mistral/mistral-small", "What's 2+2?") require.Equal(t, "\n--- Agent: root ---\nThe sum of 2 + 2 is 4.", out) } func TestExec_Mistral_ToolCall(t *testing.T) { - out := cagentExec(t, "testdata/fs_tools.yaml", "--model=mistral/mistral-small", "How many files in testdata/working_dir? Only output the number.") + out := cagent(t, "exec", "testdata/fs_tools.yaml", "--model=mistral/mistral-small", "How many files in testdata/working_dir? Only output the number.") // NOTE: If you look at the LLM response, Mistral says it sees 2 files, yours truly got tired of re-running this test to get it to say "1". // For now, just update the expected output @@ -134,33 +128,7 @@ func TestExec_Mistral_ToolCall(t *testing.T) { } func TestExec_ToolCallsNeedAcceptance(t *testing.T) { - out := cagentExec(t, "testdata/file_writer.yaml", "Create a hello.txt file with \"Hello, World!\" content. Try only once. On error, exit without further message.") + out := cagent(t, "exec", "testdata/file_writer.yaml", "Create a hello.txt file with \"Hello, World!\" content. Try only once. On error, exit without further message.") require.Contains(t, out, `Can I run this tool? ([y]es/[a]ll/[n]o)`) } - -func cagentExec(t *testing.T, moreArgs ...string) string { - t.Helper() - - // `cagent exec ...` - args := []string{"exec"} - - // Use a dummy .env file to avoid using real JWT. Our proxy server doesn't need it. - dotEnv := filepath.Join(t.TempDir(), ".env") - err := os.WriteFile(dotEnv, []byte("DOCKER_TOKEN=DUMMY"), 0o644) - require.NoError(t, err) - args = append(args, "--env-from-file", dotEnv) - - // Start a recording AI proxy to record and replay traffic. - svr, _ := startRecordingAIProxy(t) - // Use a unique session DB path per test to avoid conflicts when tests run in parallel - sessionDB := filepath.Join(t.TempDir(), "session.db") - args = append(args, "--models-gateway", svr.URL, "--session-db", sessionDB) - - // Run cagent exec - var stdout bytes.Buffer - err = root.Execute(t.Context(), nil, &stdout, io.Discard, append(args, moreArgs...)...) - require.NoError(t, err) - - return stdout.String() -} diff --git a/e2e/helpers_test.go b/e2e/helpers_test.go new file mode 100644 index 000000000..76d57e85c --- /dev/null +++ b/e2e/helpers_test.go @@ -0,0 +1,48 @@ +package e2e_test + +import ( + "bytes" + "io" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/docker/cagent/cmd/root" +) + +// cagent runs a cagent CLI command and returns its stdout. +// The first argument is the command name ("exec", "debug", etc.). +// Commands that talk to an AI model ("exec", "debug title") automatically +// get a recording AI proxy. The "exec" command also gets a unique session DB. +func cagent(t *testing.T, command string, moreArgs ...string) string { + t.Helper() + + args := []string{command} + + // Use .env file to set dummy env vars so config loading doesn't fail. + dotEnv := filepath.Join(t.TempDir(), ".env") + err := os.WriteFile(dotEnv, []byte("OPENAI_API_KEY=DUMMY\nDOCKER_TOKEN=DUMMY"), 0o644) + require.NoError(t, err) + args = append(args, "--env-from-file", dotEnv) + + // Commands that talk to an AI model need a recording AI proxy. + needsProxy := command == "exec" || (command == "debug" && len(moreArgs) > 0 && moreArgs[0] == "title") + if needsProxy { + svr, _ := startRecordingAIProxy(t) + args = append(args, "--models-gateway", svr.URL) + } + + // The exec command needs a unique session DB per test. + if command == "exec" { + sessionDB := filepath.Join(t.TempDir(), "session.db") + args = append(args, "--session-db", sessionDB) + } + + var stdout bytes.Buffer + err = root.Execute(t.Context(), nil, &stdout, io.Discard, append(args, moreArgs...)...) + require.NoError(t, err) + + return stdout.String() +} diff --git a/e2e/testdata/cassettes/TestDebug_Title/Anthropic.yaml b/e2e/testdata/cassettes/TestDebug_Title/Anthropic.yaml new file mode 100644 index 000000000..021ca0be9 --- /dev/null +++ b/e2e/testdata/cassettes/TestDebug_Title/Anthropic.yaml @@ -0,0 +1,50 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: api.anthropic.com + body: '{"max_tokens":20,"messages":[{"content":[{"text":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What can you do?","cache_control":{"type":"ephemeral"},"type":"text"}],"role":"user"}],"model":"claude-haiku-4-5","system":[{"text":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response.","type":"text"}],"tools":[],"stream":true}' + url: https://api.anthropic.com/v1/messages + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: |+ + event: message_start + data: {"type":"message_start","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01SGrZj4Fy5RvpAaNZZom6rJ","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":142,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"AI"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Assistant"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Capabilities Overview"}} + + event: content_block_stop + data: {"type":"content_block_stop","index":0 } + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":142,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":8} } + + event: message_stop + data: {"type":"message_stop" } + + headers: {} + status: 200 OK + code: 200 + duration: 523.576583ms diff --git a/e2e/testdata/cassettes/TestDebug_Title/Anthropic_Opus46.yaml b/e2e/testdata/cassettes/TestDebug_Title/Anthropic_Opus46.yaml new file mode 100644 index 000000000..49ee8f6f5 --- /dev/null +++ b/e2e/testdata/cassettes/TestDebug_Title/Anthropic_Opus46.yaml @@ -0,0 +1,53 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: api.anthropic.com + body: '{"max_tokens":20,"messages":[{"content":[{"text":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What can you do?","cache_control":{"type":"ephemeral"},"type":"text"}],"role":"user"}],"model":"claude-opus-4-6","system":[{"text":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response.","type":"text"}],"tools":[],"stream":true}' + url: https://api.anthropic.com/v1/messages + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: |+ + event: message_start + data: {"type":"message_start","message":{"model":"claude-opus-4-6","id":"msg_01QRM9s1dKM1QPLwWoEXCKSS","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":143,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard","inference_geo":"global"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"AI"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Assistant"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Capabilities"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Overview"} } + + event: content_block_stop + data: {"type":"content_block_stop","index":0 } + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":143,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":8} } + + event: message_stop + data: {"type":"message_stop" } + + headers: {} + status: 200 OK + code: 200 + duration: 1.419557417s diff --git a/e2e/testdata/cassettes/TestDebug_Title/Anthropic_Sonnet45.yaml b/e2e/testdata/cassettes/TestDebug_Title/Anthropic_Sonnet45.yaml new file mode 100644 index 000000000..e69124052 --- /dev/null +++ b/e2e/testdata/cassettes/TestDebug_Title/Anthropic_Sonnet45.yaml @@ -0,0 +1,47 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: api.anthropic.com + body: '{"max_tokens":20,"messages":[{"content":[{"text":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What can you do?","cache_control":{"type":"ephemeral"},"type":"text"}],"role":"user"}],"model":"claude-sonnet-4-5","system":[{"text":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response.","type":"text"}],"tools":[],"stream":true}' + url: https://api.anthropic.com/v1/messages + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: |+ + event: message_start + data: {"type":"message_start","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01XwrCT2Bq52dCe3HLLTBWrV","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":142,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"What"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" can you do?"} } + + event: content_block_stop + data: {"type":"content_block_stop","index":0 } + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":142,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":8} } + + event: message_stop + data: {"type":"message_stop" } + + headers: {} + status: 200 OK + code: 200 + duration: 1.213171709s diff --git a/e2e/testdata/cassettes/TestDebug_Title/Google_Gemini25FlashLite.yaml b/e2e/testdata/cassettes/TestDebug_Title/Google_Gemini25FlashLite.yaml new file mode 100644 index 000000000..e824f41d2 --- /dev/null +++ b/e2e/testdata/cassettes/TestDebug_Title/Google_Gemini25FlashLite.yaml @@ -0,0 +1,27 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: generativelanguage.googleapis.com + body: | + {"contents":[{"parts":[{"text":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response."}],"role":"user"},{"parts":[{"text":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What can you do?\n\n\n"}],"role":"user"}],"generationConfig":{"maxOutputTokens":20,"thinkingConfig":{"thinkingBudget":0}}} + form: + alt: + - sse + url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:streamGenerateContent?alt=sse + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"AI Capabilities\"}],\"role\": \"model\"},\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 132,\"candidatesTokenCount\": 2,\"totalTokenCount\": 134,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 132}]},\"modelVersion\": \"gemini-2.5-flash-lite\",\"responseId\": \"wt6FacKuBcWnkdUP39X94Ag\"}\r\n\r\ndata: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \" Overview\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 132,\"candidatesTokenCount\": 3,\"totalTokenCount\": 135,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 132}]},\"modelVersion\": \"gemini-2.5-flash-lite\",\"responseId\": \"wt6FacKuBcWnkdUP39X94Ag\"}\r\n\r\n" + headers: {} + status: 200 OK + code: 200 + duration: 361.108ms diff --git a/e2e/testdata/cassettes/TestDebug_Title/Google_Gemini3ProPreview.yaml b/e2e/testdata/cassettes/TestDebug_Title/Google_Gemini3ProPreview.yaml new file mode 100644 index 000000000..ddeeabfbc --- /dev/null +++ b/e2e/testdata/cassettes/TestDebug_Title/Google_Gemini3ProPreview.yaml @@ -0,0 +1,27 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: generativelanguage.googleapis.com + body: | + {"contents":[{"parts":[{"text":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response."}],"role":"user"},{"parts":[{"text":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What can you do?\n\n\n"}],"role":"user"}],"generationConfig":{"maxOutputTokens":200,"thinkingConfig":{"thinkingLevel":"LOW"}}} + form: + alt: + - sse + url: https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-preview:streamGenerateContent?alt=sse + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"AI Capabilities Inquiry\"}],\"role\": \"model\"},\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 132,\"candidatesTokenCount\": 3,\"totalTokenCount\": 256,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 132}],\"thoughtsTokenCount\": 121},\"modelVersion\": \"gemini-3-pro-preview\",\"responseId\": \"HueFaaqiMbK0vdIPhoCD-A8\"}\r\n\r\ndata: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"\",\"thoughtSignature\": \"EuIECt8EAb4+9vu1obEirrLzTDws71xGqNAvZgVSzDyLUZWQvnuxrWpclzTD4zk14DXgytloMMU89nBt6AZLsYX6aurN6mty8dLi0bXBXvFqbytr+6kTT1XKx6PxkP4646Yw5/1DAJ6adz3v++WZAWNXO4towvVJ69l3xcJkAjmITU2vbYQTRcxrn09oOpZWMDNzg/gC7YJAbzEdUfIF0VEaAPucALqyYHjzkr+FGy8HCYNxU2R3/I8BLHtxobsLWaaSnL3qBLJ9RICBDHC5mIoNGurIUulH7g2rMITMLsixcg1xiGyM6mkKu69/x9MRA7NjJ9zQdbFYx3hi8AvBVvFqwCqTUlr/tvkWSVU5XY52eCRiVLBZsHipaYdZDvCifGUaLvtS/iu59T7LlLq9yDAa+Q0SuxULcr/g8HTFlyRtlUZDAOlbGOdKorZrW5tZ1ftOdaKmGV1OY6LiuY0kRyzYBhv8BXzFTLiosqBYECHdHIVuQfS9Apy22Sbghk7WUGF8kZjirqNTnoPg1XW26bXpdRPitKV7xC8a5DG9wNLKNpAKgco+B08AdjcyMc9uhTzWXB3aJWr0Biy1glZSlF+dAV+b7HrvmmKUppaHkdlitgstTEPcL86dqSEzLIQCLwtxGzb4KDFTaQv8Jm4MW/B3v5PtgO7uWlPHQ4jOpE3h2Gi5bskpi7gdZG70RtsSAeINlcxgrCnEqrys1IdC075H1Y+FDDknU6SPF4mkxxF7grNduKxTiy5SO4djUJJ+yK4FRNycp5w6zjqZuG/iGpS/TgHged5BfQEG4kVioQhqZPZ13w==\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 132,\"candidatesTokenCount\": 3,\"totalTokenCount\": 256,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 132}],\"thoughtsTokenCount\": 121},\"modelVersion\": \"gemini-3-pro-preview\",\"responseId\": \"HueFaaqiMbK0vdIPhoCD-A8\"}\r\n\r\n" + headers: {} + status: 200 OK + code: 200 + duration: 3.353562s diff --git a/e2e/testdata/cassettes/TestDebug_Title/OpenAI.yaml b/e2e/testdata/cassettes/TestDebug_Title/OpenAI.yaml new file mode 100644 index 000000000..dc30feeae --- /dev/null +++ b/e2e/testdata/cassettes/TestDebug_Title/OpenAI.yaml @@ -0,0 +1,41 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: api.openai.com + body: '{"messages":[{"content":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response.","role":"system"},{"content":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What can you do?\n\n\n","role":"user"}],"model":"gpt-4o","max_tokens":20,"stream_options":{"include_usage":true},"stream":true}' + url: https://api.openai.com/v1/chat/completions + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: |+ + data: {"id":"chatcmpl-D6FG6ZjGi5JLJkFUBgSG6za7wTjXm","object":"chat.completion.chunk","created":1770380994,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_ad98c18a04","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"47RDV4Or0pcUlE"} + + data: {"id":"chatcmpl-D6FG6ZjGi5JLJkFUBgSG6za7wTjXm","object":"chat.completion.chunk","created":1770380994,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_ad98c18a04","choices":[{"index":0,"delta":{"content":"Expl"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"u3Nt48up4kC4"} + + data: {"id":"chatcmpl-D6FG6ZjGi5JLJkFUBgSG6za7wTjXm","object":"chat.completion.chunk","created":1770380994,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_ad98c18a04","choices":[{"index":0,"delta":{"content":"oring"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"eBswDvPwWiW"} + + data: {"id":"chatcmpl-D6FG6ZjGi5JLJkFUBgSG6za7wTjXm","object":"chat.completion.chunk","created":1770380994,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_ad98c18a04","choices":[{"index":0,"delta":{"content":" AI"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Ngs5pHM5KjJ3C"} + + data: {"id":"chatcmpl-D6FG6ZjGi5JLJkFUBgSG6za7wTjXm","object":"chat.completion.chunk","created":1770380994,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_ad98c18a04","choices":[{"index":0,"delta":{"content":" Cap"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Lbila85cKi94"} + + data: {"id":"chatcmpl-D6FG6ZjGi5JLJkFUBgSG6za7wTjXm","object":"chat.completion.chunk","created":1770380994,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_ad98c18a04","choices":[{"index":0,"delta":{"content":"abilities"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"3SK8jBH"} + + data: {"id":"chatcmpl-D6FG6ZjGi5JLJkFUBgSG6za7wTjXm","object":"chat.completion.chunk","created":1770380994,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_ad98c18a04","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"7TyACIGxK8"} + + data: {"id":"chatcmpl-D6FG6ZjGi5JLJkFUBgSG6za7wTjXm","object":"chat.completion.chunk","created":1770380994,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_ad98c18a04","choices":[],"usage":{"prompt_tokens":136,"completion_tokens":5,"total_tokens":141,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"loBiUSCHJDLiDD"} + + data: [DONE] + + headers: {} + status: 200 OK + code: 200 + duration: 794.179417ms diff --git a/e2e/testdata/cassettes/TestDebug_Title/OpenAI_gpt52codex.yaml b/e2e/testdata/cassettes/TestDebug_Title/OpenAI_gpt52codex.yaml new file mode 100644 index 000000000..4c357edda --- /dev/null +++ b/e2e/testdata/cassettes/TestDebug_Title/OpenAI_gpt52codex.yaml @@ -0,0 +1,65 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: api.openai.com + body: '{"max_output_tokens":200,"input":[{"content":[{"text":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response.","type":"input_text"}],"role":"system"},{"content":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What can you do?\n\n\n","role":"user"}],"model":"gpt-5.2-codex","stream":true}' + url: https://api.openai.com/v1/responses + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: |+ + event: response.created + data: {"type":"response.created","response":{"id":"resp_03e30bfd1331da3f006985e1caeb288195843499f6b6ae69f2","object":"response","created_at":1770381770,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":200,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + event: response.in_progress + data: {"type":"response.in_progress","response":{"id":"resp_03e30bfd1331da3f006985e1caeb288195843499f6b6ae69f2","object":"response","created_at":1770381770,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":200,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + event: response.output_item.added + data: {"type":"response.output_item.added","item":{"id":"rs_03e30bfd1331da3f006985e1cb766c8195bd9e07440182f2f5","type":"reasoning","summary":[]},"output_index":0,"sequence_number":2} + + event: response.output_item.done + data: {"type":"response.output_item.done","item":{"id":"rs_03e30bfd1331da3f006985e1cb766c8195bd9e07440182f2f5","type":"reasoning","summary":[]},"output_index":0,"sequence_number":3} + + event: response.output_item.added + data: {"type":"response.output_item.added","item":{"id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":1,"sequence_number":4} + + event: response.content_part.added + data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":5} + + event: response.output_text.delta + data: {"type":"response.output_text.delta","content_index":0,"delta":"AI","item_id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","logprobs":[],"obfuscation":"lTOLvzgepkzKRT","output_index":1,"sequence_number":6} + + event: response.output_text.delta + data: {"type":"response.output_text.delta","content_index":0,"delta":" Assistant","item_id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","logprobs":[],"obfuscation":"WXixJu","output_index":1,"sequence_number":7} + + event: response.output_text.delta + data: {"type":"response.output_text.delta","content_index":0,"delta":" Cap","item_id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","logprobs":[],"obfuscation":"IdMMtNM8Tyrk","output_index":1,"sequence_number":8} + + event: response.output_text.delta + data: {"type":"response.output_text.delta","content_index":0,"delta":"abilities","item_id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","logprobs":[],"obfuscation":"rRHOu5z","output_index":1,"sequence_number":9} + + event: response.output_text.done + data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","logprobs":[],"output_index":1,"sequence_number":10,"text":"AI Assistant Capabilities"} + + event: response.content_part.done + data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"AI Assistant Capabilities"},"sequence_number":11} + + event: response.output_item.done + data: {"type":"response.output_item.done","item":{"id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"AI Assistant Capabilities"}],"role":"assistant"},"output_index":1,"sequence_number":12} + + event: response.completed + data: {"type":"response.completed","response":{"id":"resp_03e30bfd1331da3f006985e1caeb288195843499f6b6ae69f2","object":"response","created_at":1770381770,"status":"completed","background":false,"completed_at":1770381771,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":200,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[{"id":"rs_03e30bfd1331da3f006985e1cb766c8195bd9e07440182f2f5","type":"reasoning","summary":[]},{"id":"msg_03e30bfd1331da3f006985e1cbaf1881959706d0351af38cb3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"AI Assistant Capabilities"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":{"input_tokens":135,"input_tokens_details":{"cached_tokens":0},"output_tokens":27,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":162},"user":null,"metadata":{}},"sequence_number":13} + + headers: {} + status: 200 OK + code: 200 + duration: 368.565292ms diff --git a/e2e/testdata/cassettes/TestDebug_Title/OpenAI_gpt52pro.yaml b/e2e/testdata/cassettes/TestDebug_Title/OpenAI_gpt52pro.yaml new file mode 100644 index 000000000..318e291ca --- /dev/null +++ b/e2e/testdata/cassettes/TestDebug_Title/OpenAI_gpt52pro.yaml @@ -0,0 +1,50 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: api.openai.com + body: '{"max_output_tokens":20,"input":[{"content":[{"text":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response.","type":"input_text"}],"role":"system"},{"content":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What can you do?\n\n\n","role":"user"}],"model":"gpt-5.2-pro","stream":true}' + url: https://api.openai.com/v1/responses + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: |+ + event: response.created + data: {"type":"response.created","response":{"id":"resp_0d2bb1f9a07c98c6006985dec235dc819ab451fb14ee7c8feb","object":"response","created_at":1770380994,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":20,"max_tool_calls":null,"model":"gpt-5.2-pro-2025-12-11","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + event: response.in_progress + data: {"type":"response.in_progress","response":{"id":"resp_0d2bb1f9a07c98c6006985dec235dc819ab451fb14ee7c8feb","object":"response","created_at":1770380994,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":20,"max_tool_calls":null,"model":"gpt-5.2-pro-2025-12-11","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + event: response.output_item.added + data: {"type":"response.output_item.added","item":{"id":"msg_0d2bb1f9a07c98c6006985dec914cc819a96a2fed8dbf74777","type":"message","status":"completed","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + event: response.content_part.added + data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0d2bb1f9a07c98c6006985dec914cc819a96a2fed8dbf74777","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + event: response.output_text.delta + data: {"type":"response.output_text.delta","content_index":0,"delta":"Assistant Capabilities Overview","item_id":"msg_0d2bb1f9a07c98c6006985dec914cc819a96a2fed8dbf74777","logprobs":[],"obfuscation":"4","output_index":0,"sequence_number":4} + + event: response.output_text.done + data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0d2bb1f9a07c98c6006985dec914cc819a96a2fed8dbf74777","logprobs":[],"output_index":0,"sequence_number":5,"text":"Assistant Capabilities Overview"} + + event: response.content_part.done + data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0d2bb1f9a07c98c6006985dec914cc819a96a2fed8dbf74777","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Assistant Capabilities Overview"},"sequence_number":6} + + event: response.output_item.done + data: {"type":"response.output_item.done","item":{"id":"msg_0d2bb1f9a07c98c6006985dec914cc819a96a2fed8dbf74777","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Assistant Capabilities Overview"}],"role":"assistant"},"output_index":0,"sequence_number":7} + + event: response.completed + data: {"type":"response.completed","response":{"id":"resp_0d2bb1f9a07c98c6006985dec235dc819ab451fb14ee7c8feb","object":"response","created_at":1770380994,"status":"completed","background":false,"completed_at":1770381001,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":20,"max_tool_calls":null,"model":"gpt-5.2-pro-2025-12-11","output":[{"id":"msg_0d2bb1f9a07c98c6006985dec914cc819a96a2fed8dbf74777","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Assistant Capabilities Overview"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":{"input_tokens":135,"input_tokens_details":{"cached_tokens":0},"output_tokens":8,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":143},"user":null,"metadata":{}},"sequence_number":8} + + headers: {} + status: 200 OK + code: 200 + duration: 569.464833ms diff --git a/pkg/model/provider/gemini/client.go b/pkg/model/provider/gemini/client.go index 84d668459..2527218c5 100644 --- a/pkg/model/provider/gemini/client.go +++ b/pkg/model/provider/gemini/client.go @@ -327,17 +327,35 @@ func (c *Client) buildConfig() *genai.GenerateContentConfig { // hang or fail. IncludeThoughts=false is also set to ensure no thinking content // is returned. if thinking := c.ModelOptions.Thinking(); thinking != nil && !*thinking { - // Explicitly disable thinking - required for operations like title generation - // where max_tokens is very low and thinking would consume the token budget. - // ThinkingBudget=0 disables thinking for both Gemini 2.5 and 3 models. - config.ThinkingConfig = &genai.ThinkingConfig{ - IncludeThoughts: false, - ThinkingBudget: genai.Ptr(int32(0)), + model := strings.ToLower(c.ModelConfig.Model) + if strings.HasPrefix(model, "gemini-3-") { + // Gemini 3 models require thinking — they reject ThinkingBudget=0. + // Use the lowest level instead and bump MaxOutputTokens so that + // even a tiny caller budget (e.g. 20 for title generation) leaves + // room for the model's internal reasoning. + config.ThinkingConfig = &genai.ThinkingConfig{ + IncludeThoughts: false, + ThinkingLevel: genai.ThinkingLevelLow, + } + const minOutputTokens int32 = 200 + if config.MaxOutputTokens < minOutputTokens { + config.MaxOutputTokens = minOutputTokens + } + slog.Debug("Gemini 3 thinking reduced to low (cannot be fully disabled)", + "model", c.ModelConfig.Model, + "max_output_tokens", config.MaxOutputTokens, + ) + } else { + // Gemini 2.5 and older: ThinkingBudget=0 disables thinking. + config.ThinkingConfig = &genai.ThinkingConfig{ + IncludeThoughts: false, + ThinkingBudget: genai.Ptr(int32(0)), + } + slog.Debug("Gemini thinking explicitly disabled via ModelOptions", + "model", c.ModelConfig.Model, + "max_output_tokens", config.MaxOutputTokens, + ) } - slog.Debug("Gemini thinking explicitly disabled via ModelOptions", - "model", c.ModelConfig.Model, - "max_output_tokens", config.MaxOutputTokens, - ) } else if c.ModelConfig.ThinkingBudget != nil { c.applyThinkingConfig(config) } else { diff --git a/pkg/model/provider/gemini/client_test.go b/pkg/model/provider/gemini/client_test.go index 79cf81767..7c476ec35 100644 --- a/pkg/model/provider/gemini/client_test.go +++ b/pkg/model/provider/gemini/client_test.go @@ -286,24 +286,32 @@ func TestBuildConfig_ThinkingExplicitlyDisabled(t *testing.T) { // Test that when ModelOptions.Thinking() returns false, thinking is explicitly disabled. // This is important for operations like title generation where max_tokens is very low. tests := []struct { - name string - model string - thinkingBudget *latest.ThinkingBudget // Would normally enable thinking + name string + model string + thinkingBudget *latest.ThinkingBudget // Would normally enable thinking + expectBudgetZero bool // Gemini 2.5: ThinkingBudget=0 + expectLevelLow bool // Gemini 3: ThinkingLevelLow (cannot fully disable) + expectMinMaxTokens int32 // Gemini 3: bumped MaxOutputTokens }{ { - name: "gemini-3-flash-preview with thinking budget but disabled via options", - model: "gemini-3-flash-preview", - thinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + name: "gemini-3-flash-preview with thinking budget but disabled via options", + model: "gemini-3-flash-preview", + thinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + expectLevelLow: true, + expectMinMaxTokens: 200, }, { - name: "gemini-2.5-flash with thinking budget but disabled via options", - model: "gemini-2.5-flash", - thinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + name: "gemini-2.5-flash with thinking budget but disabled via options", + model: "gemini-2.5-flash", + thinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + expectBudgetZero: true, }, { - name: "gemini-3-pro with nil thinking budget but disabled via options", - model: "gemini-3-pro", - thinkingBudget: nil, // Even without explicit budget, Gemini 3 may use thinking by default + name: "gemini-3-pro with nil thinking budget but disabled via options", + model: "gemini-3-pro", + thinkingBudget: nil, // Even without explicit budget, Gemini 3 may use thinking by default + expectLevelLow: true, + expectMinMaxTokens: 200, }, } @@ -328,16 +336,23 @@ func TestBuildConfig_ThinkingExplicitlyDisabled(t *testing.T) { config := client.buildConfig() - // ThinkingConfig should be set with IncludeThoughts=false and ThinkingBudget=0 require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be explicitly set when thinking is disabled") - assert.False(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be false when thinking is disabled") - // ThinkingBudget should be 0 to disable thinking completely - require.NotNil(t, config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be set to 0 when thinking is disabled") - assert.Equal(t, int32(0), *config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be 0 when thinking is disabled") + if tt.expectBudgetZero { + // Gemini 2.5: fully disabled via ThinkingBudget=0 + assert.False(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be false") + require.NotNil(t, config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be set to 0") + assert.Equal(t, int32(0), *config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be 0") + assert.Empty(t, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be empty") + } - // ThinkingLevel should be empty/unset - assert.Empty(t, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be empty when thinking is disabled") + if tt.expectLevelLow { + // Gemini 3: cannot fully disable, use lowest level + assert.False(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be false") + assert.Equal(t, genai.ThinkingLevelLow, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be low") + assert.Nil(t, config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should not be set for Gemini 3") + assert.GreaterOrEqual(t, config.MaxOutputTokens, tt.expectMinMaxTokens, "MaxOutputTokens should be bumped") + } }) } } diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go index 309b163c1..1c557ba4a 100644 --- a/pkg/model/provider/openai/client.go +++ b/pkg/model/provider/openai/client.go @@ -324,8 +324,20 @@ func (c *Client) CreateResponseStream( } if maxToken := c.ModelConfig.MaxTokens; maxToken != nil && *maxToken > 0 { - params.MaxOutputTokens = param.NewOpt(*maxToken) - slog.Debug("OpenAI responses request configured with max output tokens", "max_output_tokens", maxToken) + maxTokens := *maxToken + + // Reasoning models consume output tokens on internal reasoning even when + // thinking is explicitly disabled. Bump a small budget so the model has + // headroom for both reasoning and actual text output. + thinkingEnabled := c.ModelOptions.Thinking() == nil || *c.ModelOptions.Thinking() + if isOpenAIReasoningModel(c.ModelConfig.Model) && !thinkingEnabled && maxTokens < 200 { + slog.Debug("Bumping max_output_tokens for reasoning model with thinking disabled", + "model", c.ModelConfig.Model, "original", maxTokens, "adjusted", 200) + maxTokens = 200 + } + + params.MaxOutputTokens = param.NewOpt(maxTokens) + slog.Debug("OpenAI responses request configured with max output tokens", "max_output_tokens", maxTokens) } if len(requestTools) > 0 {