From 0998ff53e612867a350abcfa19086bbc407296df Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 29 Mar 2026 10:37:49 +0000 Subject: [PATCH 1/2] Initial plan From a9c97dd774725153c89e6ae56998acee7818f74f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 29 Mar 2026 10:53:34 +0000 Subject: [PATCH 2/2] fix: resolve behavior_fingerprint inconsistency between logs and audit tools In the download goroutine in logs_orchestrator.go, extracted log metrics (Turns, TokenUsage, EstimatedCost) were stored in result.Metrics but not applied to result.Run before the fingerprint was computed. This meant processedRun.Run.Turns was 0 (stale from GitHub API) while audit.go correctly set run.Turns = metrics.Turns before computing the fingerprint. Fix: update result.Run fields from extracted metrics immediately after extraction, matching the pattern used in audit.go. Also compute Duration and ActionMinutes from timestamps at this point so all fields are consistent when deriveRunAgenticAnalysis is called. Adds TestDeriveRunAgenticAnalysisFingerprintConsistency to document and guard against this regression. Agent-Logs-Url: https://github.com/github/gh-aw/sessions/db696457-7e7c-4acd-b137-a1839c34f10a Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit_agentic_analysis_test.go | 52 ++++++++++++++++++++++++++ pkg/cli/logs_orchestrator.go | 15 ++++++++ 2 files changed, 67 insertions(+) diff --git a/pkg/cli/audit_agentic_analysis_test.go b/pkg/cli/audit_agentic_analysis_test.go index fe0fe8248ab..89a760f6c0b 100644 --- a/pkg/cli/audit_agentic_analysis_test.go +++ b/pkg/cli/audit_agentic_analysis_test.go @@ -324,3 +324,55 @@ func TestBuildAuditDataToolUsageMatchesBuildToolUsageInfo(t *testing.T) { require.Equal(t, expected, auditData.ToolUsage, "buildAuditData tool usage should match buildToolUsageInfo output") } + +// TestDeriveRunAgenticAnalysisFingerprintConsistency verifies that the fingerprint +// produced by deriveRunAgenticAnalysis is consistent when Run.Turns is correctly +// populated from log metrics. This guards against the bug where logs_orchestrator.go +// computed the fingerprint before updating result.Run.Turns from extracted metrics, +// causing different fingerprint values between the logs and audit tools for the same run. +func TestDeriveRunAgenticAnalysisFingerprintConsistency(t *testing.T) { + const metricsTurns = 12 + + logMetrics := LogMetrics{ + Turns: metricsTurns, + ToolCalls: []workflow.ToolCallInfo{ + {Name: "bash", CallCount: 5}, + {Name: "github_issue_read", CallCount: 3}, + }, + } + + // Simulate the corrected behavior (post-fix): Run.Turns is set from log metrics + // before deriveRunAgenticAnalysis is called, matching what audit.go does. + processedRunFixed := ProcessedRun{ + Run: WorkflowRun{ + Turns: metricsTurns, // set from metrics.Turns + Duration: 20 * time.Minute, + }, + } + _, _, _, _, fpFixed, _ := deriveRunAgenticAnalysis(processedRunFixed, logMetrics) + + require.NotNil(t, fpFixed, "fingerprint should not be nil") + assert.Equal(t, "exploratory", fpFixed.ExecutionStyle, "12 turns should produce exploratory execution style") + assert.Equal(t, "heavy", fpFixed.ResourceProfile, "20 min duration should produce heavy resource profile") + assert.Greater(t, fpFixed.AgenticFraction, 0.0, "agentic fraction should be positive when turns > 0") + + // Simulate the broken behavior (pre-fix): Run.Turns is zero because the orchestrator + // had not yet updated it from extracted metrics when computing the fingerprint. + processedRunStale := ProcessedRun{ + Run: WorkflowRun{ + Turns: 0, // stale — NOT updated from metrics.Turns + Duration: 20 * time.Minute, + }, + } + _, _, _, _, fpStale, _ := deriveRunAgenticAnalysis(processedRunStale, logMetrics) + + require.NotNil(t, fpStale, "fingerprint should not be nil even with zero turns") + assert.Equal(t, "directed", fpStale.ExecutionStyle, "zero turns should produce directed execution style") + assert.InDelta(t, 0.0, fpStale.AgenticFraction, 0.001, "agentic fraction should be zero when Run.Turns is zero") + + // Confirm the two results differ — this is exactly the inconsistency the fix resolves. + assert.NotEqual(t, fpFixed.ExecutionStyle, fpStale.ExecutionStyle, + "fingerprints should differ when Run.Turns is stale vs. correctly set from metrics") + assert.NotEqual(t, fpFixed.AgenticFraction, fpStale.AgenticFraction, + "agentic fraction should differ when Run.Turns is stale vs. correctly set from metrics") +} diff --git a/pkg/cli/logs_orchestrator.go b/pkg/cli/logs_orchestrator.go index 1ec5151e7d2..7ba60908c92 100644 --- a/pkg/cli/logs_orchestrator.go +++ b/pkg/cli/logs_orchestrator.go @@ -683,6 +683,21 @@ func downloadRunArtifactsConcurrent(ctx context.Context, runs []WorkflowRun, out } result.Metrics = metrics + // Update run with metrics so fingerprint computation uses the same data + // as the audit tool, which also derives these fields from extracted log metrics. + result.Run.TokenUsage = metrics.TokenUsage + result.Run.EstimatedCost = metrics.EstimatedCost + result.Run.Turns = metrics.Turns + result.Run.LogsPath = runOutputDir + + // Calculate duration and billable minutes from GitHub API timestamps. + // This mirrors the identical computation in audit.go so that + // processedRun.Run.Duration is consistent across both tools. + if !result.Run.StartedAt.IsZero() && !result.Run.UpdatedAt.IsZero() { + result.Run.Duration = result.Run.UpdatedAt.Sub(result.Run.StartedAt) + result.Run.ActionMinutes = math.Ceil(result.Run.Duration.Minutes()) + } + // Analyze access logs if available accessAnalysis, accessErr := analyzeAccessLogs(runOutputDir, verbose) if accessErr != nil {