-
Notifications
You must be signed in to change notification settings - Fork 310
Fix: Turns always 0 and ToolCalls always null in run summaries #22106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| //go:build !integration | ||
|
|
||
| package cli | ||
|
|
||
| import ( | ||
| "os" | ||
| "path/filepath" | ||
| "testing" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| // TestCopilotDebugLogTurnsExtraction verifies that Turns are correctly counted from | ||
| // [DEBUG] data: blocks in the Copilot CLI debug log format. | ||
| // | ||
| // This is a regression test for the bug where Turns was always 0 because the parser | ||
| // counted "User:"/"Human:"/"Query:" patterns that do not appear in Copilot CLI debug logs. | ||
| // The fix counts each "[DEBUG] data:" block as one API response (one turn). | ||
| func TestCopilotDebugLogTurnsExtraction(t *testing.T) { | ||
| tempDir := t.TempDir() | ||
|
|
||
| awInfoContent := `{"engine_id": "copilot"}` | ||
| require.NoError(t, os.WriteFile(filepath.Join(tempDir, "aw_info.json"), []byte(awInfoContent), 0644)) | ||
|
|
||
| // Debug log with 2 API responses (2 data blocks) and "Executing tool: bash" between them. | ||
| // The JSON blocks have empty tool_calls (common in production Copilot CLI logs). | ||
| logContent := `2025-09-26T11:13:11.798Z [DEBUG] Starting Copilot CLI: 0.0.400 | ||
| 2025-09-26T11:13:12.575Z [DEBUG] data: | ||
| 2025-09-26T11:13:12.575Z [DEBUG] { | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "choices": [{ | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "message": { | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "role": "assistant", | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "content": null, | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "tool_calls": [] | ||
| 2025-09-26T11:13:12.575Z [DEBUG] } | ||
| 2025-09-26T11:13:12.575Z [DEBUG] }], | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "usage": { | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "prompt_tokens": 1000, | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "completion_tokens": 50, | ||
| 2025-09-26T11:13:12.575Z [DEBUG] "total_tokens": 1050 | ||
| 2025-09-26T11:13:12.575Z [DEBUG] } | ||
| 2025-09-26T11:13:12.575Z [DEBUG] } | ||
| 2025-09-26T11:13:13.000Z [DEBUG] Executing tool: bash | ||
| 2025-09-26T11:13:13.500Z [DEBUG] Tool execution completed | ||
| 2025-09-26T11:13:14.000Z [DEBUG] data: | ||
| 2025-09-26T11:13:14.000Z [DEBUG] { | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "choices": [{ | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "message": { | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "role": "assistant", | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "content": "Task done.", | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "tool_calls": [] | ||
| 2025-09-26T11:13:14.000Z [DEBUG] } | ||
| 2025-09-26T11:13:14.000Z [DEBUG] }], | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "usage": { | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "prompt_tokens": 1200, | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "completion_tokens": 20, | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "total_tokens": 1220 | ||
| 2025-09-26T11:13:14.000Z [DEBUG] } | ||
| 2025-09-26T11:13:14.000Z [DEBUG] } | ||
| 2025-09-26T11:13:14.500Z [DEBUG] Workflow completed` | ||
|
|
||
| require.NoError(t, os.WriteFile(filepath.Join(tempDir, "agent.log"), []byte(logContent), 0644)) | ||
|
|
||
| metrics, err := extractLogMetrics(tempDir, false) | ||
| require.NoError(t, err) | ||
|
|
||
| // Tokens should be accumulated from both data blocks | ||
| assert.Equal(t, 2270, metrics.TokenUsage, "Should accumulate tokens from both data blocks: 1050 + 1220 = 2270") | ||
|
|
||
| // Turns should be 2 (one per [DEBUG] data: block) | ||
| assert.Equal(t, 2, metrics.Turns, | ||
| "Turns should count [DEBUG] data: blocks; got %d", metrics.Turns) | ||
|
|
||
| // Tool calls should be extracted from "Executing tool: bash" line | ||
| assert.NotEmpty(t, metrics.ToolCalls, "ToolCalls should not be null when 'Executing tool:' is present") | ||
| if len(metrics.ToolCalls) > 0 { | ||
| found := false | ||
| for _, tc := range metrics.ToolCalls { | ||
| if tc.Name == "bash" { | ||
| found = true | ||
| assert.Equal(t, 1, tc.CallCount, "bash should have been called once") | ||
| } | ||
| } | ||
| assert.True(t, found, "Expected to find 'bash' in tool calls") | ||
| } | ||
| } | ||
|
|
||
| // TestCopilotDebugLogMultipleToolCalls verifies that multiple "Executing tool:" lines | ||
| // produce correct call counts in ToolCalls. | ||
| func TestCopilotDebugLogMultipleToolCalls(t *testing.T) { | ||
| tempDir := t.TempDir() | ||
|
|
||
| awInfoContent := `{"engine_id": "copilot"}` | ||
| require.NoError(t, os.WriteFile(filepath.Join(tempDir, "aw_info.json"), []byte(awInfoContent), 0644)) | ||
|
|
||
| // Three API response blocks with different tool executions | ||
| logContent := `2025-09-26T11:13:11.798Z [DEBUG] Starting Copilot CLI: 0.0.400 | ||
| 2025-09-26T11:13:12.000Z [DEBUG] data: | ||
| 2025-09-26T11:13:12.000Z [DEBUG] { | ||
| 2025-09-26T11:13:12.000Z [DEBUG] "choices": [{"message": {"tool_calls": []}}], | ||
| 2025-09-26T11:13:12.000Z [DEBUG] "usage": {"prompt_tokens": 500, "completion_tokens": 10, "total_tokens": 510} | ||
| 2025-09-26T11:13:12.000Z [DEBUG] } | ||
| 2025-09-26T11:13:12.500Z [DEBUG] Executing tool: bash | ||
| 2025-09-26T11:13:13.000Z [DEBUG] data: | ||
| 2025-09-26T11:13:13.000Z [DEBUG] { | ||
| 2025-09-26T11:13:13.000Z [DEBUG] "choices": [{"message": {"tool_calls": []}}], | ||
| 2025-09-26T11:13:13.000Z [DEBUG] "usage": {"prompt_tokens": 600, "completion_tokens": 10, "total_tokens": 610} | ||
| 2025-09-26T11:13:13.000Z [DEBUG] } | ||
| 2025-09-26T11:13:13.500Z [DEBUG] Executing tool: bash | ||
| 2025-09-26T11:13:14.000Z [DEBUG] data: | ||
| 2025-09-26T11:13:14.000Z [DEBUG] { | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "choices": [{"message": {"tool_calls": []}}], | ||
| 2025-09-26T11:13:14.000Z [DEBUG] "usage": {"prompt_tokens": 700, "completion_tokens": 20, "total_tokens": 720} | ||
| 2025-09-26T11:13:14.000Z [DEBUG] } | ||
| 2025-09-26T11:13:14.500Z [DEBUG] Executing tool: mcp_github | ||
| 2025-09-26T11:13:15.000Z [DEBUG] Workflow done` | ||
|
|
||
| require.NoError(t, os.WriteFile(filepath.Join(tempDir, "agent.log"), []byte(logContent), 0644)) | ||
|
|
||
| metrics, err := extractLogMetrics(tempDir, false) | ||
| require.NoError(t, err) | ||
|
|
||
| // Turns: 3 data blocks | ||
| assert.Equal(t, 3, metrics.Turns, "Should count 3 turns from 3 data blocks") | ||
|
|
||
| // Tool calls: bash x2, mcp_github x1 | ||
| assert.NotEmpty(t, metrics.ToolCalls, "ToolCalls should not be null") | ||
|
|
||
| toolCounts := make(map[string]int) | ||
| for _, tc := range metrics.ToolCalls { | ||
| toolCounts[tc.Name] = tc.CallCount | ||
| } | ||
| assert.Equal(t, 2, toolCounts["bash"], "bash should have 2 calls") | ||
| assert.Equal(t, 1, toolCounts["mcp_github"], "mcp_github should have 1 call") | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,6 +49,7 @@ func (e *CopilotEngine) parseSessionJSONL(logContent string, verbose bool) (LogM | |
| toolCallMap := make(map[string]*ToolCallInfo) | ||
| var currentSequence []string | ||
| turns := 0 | ||
| assistantMessageCount := 0 // fallback: count assistant messages when num_turns is absent | ||
|
|
||
| lines := strings.Split(logContent, "\n") | ||
| foundSessionEntry := false | ||
|
|
@@ -78,6 +79,9 @@ func (e *CopilotEngine) parseSessionJSONL(logContent string, verbose bool) (LogM | |
| } | ||
|
|
||
| case "assistant": | ||
| // Each assistant message represents one LLM turn | ||
| assistantMessageCount++ | ||
|
|
||
| // Assistant message with potential tool calls | ||
| if entry.Message != nil { | ||
| for _, content := range entry.Message.Content { | ||
|
|
@@ -153,6 +157,14 @@ func (e *CopilotEngine) parseSessionJSONL(logContent string, verbose bool) (LogM | |
| } | ||
| } | ||
|
|
||
| // If turns was not set from num_turns (0 or absent), fall back to counting assistant messages. | ||
| // The Copilot CLI may omit num_turns from the result entry; each assistant message represents | ||
| // one LLM conversation turn. | ||
| if turns == 0 && assistantMessageCount > 0 { | ||
| turns = assistantMessageCount | ||
| copilotLogsLog.Printf("num_turns not available in result entry, using assistant message count as turns: %d", turns) | ||
| } | ||
|
|
||
| // If we found no session entries, return false to indicate fallback needed | ||
| if !foundSessionEntry { | ||
| return metrics, false | ||
|
|
@@ -227,6 +239,15 @@ func (e *CopilotEngine) ParseLogMetrics(logContent string, verbose bool) LogMetr | |
| if strings.Contains(line, "[DEBUG] data:") { | ||
| inDataBlock = true | ||
| currentJSONLines = []string{} | ||
| // Each API response data block represents one LLM conversation turn. | ||
| // Copilot CLI debug logs don't have "User:"/"Human:" patterns, so we | ||
| // count turns based on the number of API responses (data blocks). | ||
| turns++ | ||
| // Save previous sequence before starting new turn | ||
| if len(currentSequence) > 0 { | ||
| metrics.ToolSequences = append(metrics.ToolSequences, currentSequence) | ||
| currentSequence = []string{} | ||
| } | ||
| continue | ||
| } | ||
|
|
||
|
|
@@ -282,17 +303,10 @@ func (e *CopilotEngine) ParseLogMetrics(logContent string, verbose bool) LogMetr | |
| } | ||
| } | ||
|
|
||
| // Count turns based on interaction patterns (adjust based on actual Copilot CLI output) | ||
| if strings.Contains(line, "User:") || strings.Contains(line, "Human:") || strings.Contains(line, "Query:") { | ||
| turns++ | ||
| // Start of a new turn, save previous sequence if any | ||
| if len(currentSequence) > 0 { | ||
| metrics.ToolSequences = append(metrics.ToolSequences, currentSequence) | ||
| currentSequence = []string{} | ||
| } | ||
| } | ||
|
|
||
| // Extract tool calls and add to sequence (adjust based on actual Copilot CLI output format) | ||
| // Extract tool calls and add to sequence and toolCallMap | ||
| // "Executing tool: <name>" lines confirm tool execution and are used to populate | ||
| // both the tool sequence and tool call statistics. This handles the common case where | ||
| // Copilot CLI JSON blocks have empty tool_calls arrays but emit execution log lines. | ||
| if toolName := e.parseCopilotToolCallsWithSequence(line, toolCallMap); toolName != "" { | ||
| currentSequence = append(currentSequence, toolName) | ||
| } | ||
|
|
@@ -403,19 +417,29 @@ func (e *CopilotEngine) processToolCalls(toolCalls []any, toolCallMap map[string | |
| } | ||
| } | ||
|
|
||
| // parseCopilotToolCallsWithSequence extracts tool call information from Copilot CLI log lines and returns tool name | ||
| // parseCopilotToolCallsWithSequence extracts tool call information from Copilot CLI log lines and returns tool name. | ||
| // It also updates toolCallMap with the tool execution count for statistics tracking. | ||
| func (e *CopilotEngine) parseCopilotToolCallsWithSequence(line string, toolCallMap map[string]*ToolCallInfo) string { | ||
| // This method handles simple tool execution log lines for sequence tracking | ||
| // Tool size extraction is now handled by extractToolCallSizes which parses JSON | ||
|
|
||
| // Look for "Executing tool:" pattern in Copilot logs | ||
| if strings.Contains(line, "Executing tool:") { | ||
| // Extract tool name from "Executing tool: <name>" format | ||
| parts := strings.Split(line, "Executing tool:") | ||
| if len(parts) > 1 { | ||
| toolName := strings.TrimSpace(parts[1]) | ||
| // Return the tool name for sequence tracking | ||
| // Size information is handled separately by extractToolCallSizes | ||
| if toolName == "" { | ||
| return "" | ||
| } | ||
| // Update toolCallMap: this captures tool calls from execution log lines. | ||
| // This is the primary source of tool call data in the Copilot CLI debug log | ||
| // format, since JSON response blocks often have empty tool_calls arrays. | ||
| if toolInfo, exists := toolCallMap[toolName]; exists { | ||
| toolInfo.CallCount++ | ||
| } else { | ||
| toolCallMap[toolName] = &ToolCallInfo{ | ||
| Name: toolName, | ||
| CallCount: 1, | ||
| } | ||
| } | ||
|
Comment on lines
+432
to
+442
|
||
| return toolName | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a new "[DEBUG] data:" marker is encountered, the parser resets
currentJSONLineswithout first finalizing/parsing any in-progress JSON block. If Copilot debug logs emit back-to-backdata:blocks (i.e., the nextdata:line immediately follows the previous JSON’s closing}), the previous block’s token usage/tool sizes will be dropped, causing undercounted metrics. Treat a newdata:marker as an implicit end-of-previous-block: ifinDataBlock && len(currentJSONLines)>0, parse/accumulate metrics before resetting, then start the new block and increment turns.