Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions pkg/cli/audit_agentic_analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,55 @@ func TestBuildAuditDataToolUsageMatchesBuildToolUsageInfo(t *testing.T) {

require.Equal(t, expected, auditData.ToolUsage, "buildAuditData tool usage should match buildToolUsageInfo output")
}

// TestDeriveRunAgenticAnalysisFingerprintConsistency verifies that the fingerprint
// produced by deriveRunAgenticAnalysis is consistent when Run.Turns is correctly
// populated from log metrics. This guards against the bug where logs_orchestrator.go
// computed the fingerprint before updating result.Run.Turns from extracted metrics,
// causing different fingerprint values between the logs and audit tools for the same run.
func TestDeriveRunAgenticAnalysisFingerprintConsistency(t *testing.T) {
const metricsTurns = 12

logMetrics := LogMetrics{
Turns: metricsTurns,
ToolCalls: []workflow.ToolCallInfo{
{Name: "bash", CallCount: 5},
{Name: "github_issue_read", CallCount: 3},
},
}

// Simulate the corrected behavior (post-fix): Run.Turns is set from log metrics
// before deriveRunAgenticAnalysis is called, matching what audit.go does.
processedRunFixed := ProcessedRun{
Run: WorkflowRun{
Turns: metricsTurns, // set from metrics.Turns
Duration: 20 * time.Minute,
},
}
_, _, _, _, fpFixed, _ := deriveRunAgenticAnalysis(processedRunFixed, logMetrics)

require.NotNil(t, fpFixed, "fingerprint should not be nil")
assert.Equal(t, "exploratory", fpFixed.ExecutionStyle, "12 turns should produce exploratory execution style")
assert.Equal(t, "heavy", fpFixed.ResourceProfile, "20 min duration should produce heavy resource profile")
assert.Greater(t, fpFixed.AgenticFraction, 0.0, "agentic fraction should be positive when turns > 0")

// Simulate the broken behavior (pre-fix): Run.Turns is zero because the orchestrator
// had not yet updated it from extracted metrics when computing the fingerprint.
processedRunStale := ProcessedRun{
Run: WorkflowRun{
Turns: 0, // stale — NOT updated from metrics.Turns
Duration: 20 * time.Minute,
},
}
_, _, _, _, fpStale, _ := deriveRunAgenticAnalysis(processedRunStale, logMetrics)

require.NotNil(t, fpStale, "fingerprint should not be nil even with zero turns")
assert.Equal(t, "directed", fpStale.ExecutionStyle, "zero turns should produce directed execution style")
assert.InDelta(t, 0.0, fpStale.AgenticFraction, 0.001, "agentic fraction should be zero when Run.Turns is zero")

// Confirm the two results differ — this is exactly the inconsistency the fix resolves.
assert.NotEqual(t, fpFixed.ExecutionStyle, fpStale.ExecutionStyle,
"fingerprints should differ when Run.Turns is stale vs. correctly set from metrics")
assert.NotEqual(t, fpFixed.AgenticFraction, fpStale.AgenticFraction,
"agentic fraction should differ when Run.Turns is stale vs. correctly set from metrics")
}
15 changes: 15 additions & 0 deletions pkg/cli/logs_orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,21 @@ func downloadRunArtifactsConcurrent(ctx context.Context, runs []WorkflowRun, out
}
result.Metrics = metrics

// Update run with metrics so fingerprint computation uses the same data
// as the audit tool, which also derives these fields from extracted log metrics.
result.Run.TokenUsage = metrics.TokenUsage
result.Run.EstimatedCost = metrics.EstimatedCost
result.Run.Turns = metrics.Turns
result.Run.LogsPath = runOutputDir

Comment on lines +686 to +692
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This “sync run fields from extracted metrics” mapping is now duplicated in several places (e.g., pkg/cli/audit.go:266-277, pkg/cli/audit_comparison.go:175-181, and here). Consider centralizing it into a helper to reduce the chance of future drift between tools (this bug was caused by a divergence like that).

Copilot uses AI. Check for mistakes.
// Calculate duration and billable minutes from GitHub API timestamps.
// This mirrors the identical computation in audit.go so that
// processedRun.Run.Duration is consistent across both tools.
if !result.Run.StartedAt.IsZero() && !result.Run.UpdatedAt.IsZero() {
result.Run.Duration = result.Run.UpdatedAt.Sub(result.Run.StartedAt)
result.Run.ActionMinutes = math.Ceil(result.Run.Duration.Minutes())
}
Comment on lines +693 to +699
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says this duration + billable minutes computation “mirrors the identical computation in audit.go”, but audit.go only sets run.Duration (it does not set ActionMinutes). Please adjust the comment to match reality (or add the missing ActionMinutes calculation to audit if parity is intended) so future readers aren’t misled about cross-tool consistency.

Copilot uses AI. Check for mistakes.

// Analyze access logs if available
accessAnalysis, accessErr := analyzeAccessLogs(runOutputDir, verbose)
if accessErr != nil {
Expand Down
Loading