diff --git a/pkg/cli/audit_agentic_analysis_test.go b/pkg/cli/audit_agentic_analysis_test.go index fe0fe8248ab..89a760f6c0b 100644 --- a/pkg/cli/audit_agentic_analysis_test.go +++ b/pkg/cli/audit_agentic_analysis_test.go @@ -324,3 +324,55 @@ func TestBuildAuditDataToolUsageMatchesBuildToolUsageInfo(t *testing.T) { require.Equal(t, expected, auditData.ToolUsage, "buildAuditData tool usage should match buildToolUsageInfo output") } + +// TestDeriveRunAgenticAnalysisFingerprintConsistency verifies that the fingerprint +// produced by deriveRunAgenticAnalysis is consistent when Run.Turns is correctly +// populated from log metrics. This guards against the bug where logs_orchestrator.go +// computed the fingerprint before updating result.Run.Turns from extracted metrics, +// causing different fingerprint values between the logs and audit tools for the same run. +func TestDeriveRunAgenticAnalysisFingerprintConsistency(t *testing.T) { + const metricsTurns = 12 + + logMetrics := LogMetrics{ + Turns: metricsTurns, + ToolCalls: []workflow.ToolCallInfo{ + {Name: "bash", CallCount: 5}, + {Name: "github_issue_read", CallCount: 3}, + }, + } + + // Simulate the corrected behavior (post-fix): Run.Turns is set from log metrics + // before deriveRunAgenticAnalysis is called, matching what audit.go does. + processedRunFixed := ProcessedRun{ + Run: WorkflowRun{ + Turns: metricsTurns, // set from metrics.Turns + Duration: 20 * time.Minute, + }, + } + _, _, _, _, fpFixed, _ := deriveRunAgenticAnalysis(processedRunFixed, logMetrics) + + require.NotNil(t, fpFixed, "fingerprint should not be nil") + assert.Equal(t, "exploratory", fpFixed.ExecutionStyle, "12 turns should produce exploratory execution style") + assert.Equal(t, "heavy", fpFixed.ResourceProfile, "20 min duration should produce heavy resource profile") + assert.Greater(t, fpFixed.AgenticFraction, 0.0, "agentic fraction should be positive when turns > 0") + + // Simulate the broken behavior (pre-fix): Run.Turns is zero because the orchestrator + // had not yet updated it from extracted metrics when computing the fingerprint. + processedRunStale := ProcessedRun{ + Run: WorkflowRun{ + Turns: 0, // stale — NOT updated from metrics.Turns + Duration: 20 * time.Minute, + }, + } + _, _, _, _, fpStale, _ := deriveRunAgenticAnalysis(processedRunStale, logMetrics) + + require.NotNil(t, fpStale, "fingerprint should not be nil even with zero turns") + assert.Equal(t, "directed", fpStale.ExecutionStyle, "zero turns should produce directed execution style") + assert.InDelta(t, 0.0, fpStale.AgenticFraction, 0.001, "agentic fraction should be zero when Run.Turns is zero") + + // Confirm the two results differ — this is exactly the inconsistency the fix resolves. + assert.NotEqual(t, fpFixed.ExecutionStyle, fpStale.ExecutionStyle, + "fingerprints should differ when Run.Turns is stale vs. correctly set from metrics") + assert.NotEqual(t, fpFixed.AgenticFraction, fpStale.AgenticFraction, + "agentic fraction should differ when Run.Turns is stale vs. correctly set from metrics") +} diff --git a/pkg/cli/logs_orchestrator.go b/pkg/cli/logs_orchestrator.go index 1ec5151e7d2..7ba60908c92 100644 --- a/pkg/cli/logs_orchestrator.go +++ b/pkg/cli/logs_orchestrator.go @@ -683,6 +683,21 @@ func downloadRunArtifactsConcurrent(ctx context.Context, runs []WorkflowRun, out } result.Metrics = metrics + // Update run with metrics so fingerprint computation uses the same data + // as the audit tool, which also derives these fields from extracted log metrics. + result.Run.TokenUsage = metrics.TokenUsage + result.Run.EstimatedCost = metrics.EstimatedCost + result.Run.Turns = metrics.Turns + result.Run.LogsPath = runOutputDir + + // Calculate duration and billable minutes from GitHub API timestamps. + // This mirrors the identical computation in audit.go so that + // processedRun.Run.Duration is consistent across both tools. + if !result.Run.StartedAt.IsZero() && !result.Run.UpdatedAt.IsZero() { + result.Run.Duration = result.Run.UpdatedAt.Sub(result.Run.StartedAt) + result.Run.ActionMinutes = math.Ceil(result.Run.Duration.Minutes()) + } + // Analyze access logs if available accessAnalysis, accessErr := analyzeAccessLogs(runOutputDir, verbose) if accessErr != nil {