diff --git a/pkg/cli/context_cancellation_test.go b/pkg/cli/context_cancellation_test.go index 22e697b5727..a2af027255c 100644 --- a/pkg/cli/context_cancellation_test.go +++ b/pkg/cli/context_cancellation_test.go @@ -71,7 +71,7 @@ func TestDownloadWorkflowLogsWithCancellation(t *testing.T) { cancel() // Try to download logs with a cancelled context - err := DownloadWorkflowLogs(ctx, "", 10, "", "", "/tmp/test-logs", "", "", 0, 0, "", false, false, false, false, false, false, false, 0, "", "", false, false, "", nil) + err := DownloadWorkflowLogs(ctx, "", 10, "", "", "/tmp/test-logs", "", "", 0, 0, "", false, false, false, false, false, false, false, 0, "", "", false, false, "", nil, nil) // Should return context.Canceled error assert.ErrorIs(t, err, context.Canceled, "Should return context.Canceled error when context is cancelled") @@ -111,7 +111,7 @@ func TestDownloadWorkflowLogsTimeoutRespected(t *testing.T) { start := time.Now() // Use a workflow name that doesn't exist to avoid actual network calls - _ = DownloadWorkflowLogs(ctx, "nonexistent-workflow-12345", 100, "", "", "/tmp/test-logs", "", "", 0, 0, "", false, false, false, false, false, false, false, 1, "", "", false, false, "", nil) + _ = DownloadWorkflowLogs(ctx, "nonexistent-workflow-12345", 100, "", "", "/tmp/test-logs", "", "", 0, 0, "", false, false, false, false, false, false, false, 1, "", "", false, false, "", nil, nil) elapsed := time.Since(start) // Should complete within reasonable time (give 5 seconds buffer for test overhead) diff --git a/pkg/cli/logs_ci_scenario_test.go b/pkg/cli/logs_ci_scenario_test.go index 205b1048bdf..c80b6427c14 100644 --- a/pkg/cli/logs_ci_scenario_test.go +++ b/pkg/cli/logs_ci_scenario_test.go @@ -55,6 +55,7 @@ func TestLogsJSONOutputWithNoRuns(t *testing.T) { false, // train "", // format nil, // artifactSets + nil, // excludeWorkflows ) // Restore stdout and read output diff --git a/pkg/cli/logs_command.go b/pkg/cli/logs_command.go index 6431075dc6c..12489f17aff 100644 --- a/pkg/cli/logs_command.go +++ b/pkg/cli/logs_command.go @@ -78,6 +78,10 @@ Examples: ` + string(constants.CLIExtensionPrefix) + ` logs --filtered-integrity # Filter logs containing items that were filtered by gateway integrity checks ` + string(constants.CLIExtensionPrefix) + ` logs --no-staged # Exclude staged workflow runs from results + # Excluding specific workflows + ` + string(constants.CLIExtensionPrefix) + ` logs --exclude ci-tests # Exclude a specific workflow from results + ` + string(constants.CLIExtensionPrefix) + ` logs --exclude ci-tests,nightly-build # Exclude multiple workflows + # Run ID range filtering ` + string(constants.CLIExtensionPrefix) + ` logs --after-run-id 1000 # Filter runs after run ID 1000 ` + string(constants.CLIExtensionPrefix) + ` logs --before-run-id 2000 # Filter runs before run ID 2000 @@ -155,6 +159,7 @@ Examples: train, _ := cmd.Flags().GetBool("train") format, _ := cmd.Flags().GetString("format") artifacts, _ := cmd.Flags().GetStringSlice("artifacts") + excludeWorkflows, _ := cmd.Flags().GetStringSlice("exclude") // Resolve relative dates to absolute dates for GitHub CLI now := time.Now() @@ -189,7 +194,33 @@ Examples: logsCommandLog.Printf("Executing logs download: workflow=%s, count=%d, engine=%s, train=%v", workflowName, count, engine, train) - return DownloadWorkflowLogs(cmd.Context(), workflowName, count, startDate, endDate, outputDir, engine, ref, beforeRunID, afterRunID, repoOverride, verbose, toolGraph, noStaged, firewallOnly, noFirewall, parse, jsonOutput, timeout, summaryFile, safeOutputType, filteredIntegrity, train, format, artifacts) + return DownloadWorkflowLogsWithOptions(cmd.Context(), DownloadWorkflowLogsOptions{ + WorkflowName: workflowName, + Count: count, + StartDate: startDate, + EndDate: endDate, + OutputDir: outputDir, + Engine: engine, + Ref: ref, + BeforeRunID: beforeRunID, + AfterRunID: afterRunID, + RepoOverride: repoOverride, + Verbose: verbose, + ToolGraph: toolGraph, + NoStaged: noStaged, + FirewallOnly: firewallOnly, + NoFirewall: noFirewall, + Parse: parse, + JSONOutput: jsonOutput, + Timeout: timeout, + SummaryFile: summaryFile, + SafeOutputType: safeOutputType, + FilteredIntegrity: filteredIntegrity, + Train: train, + Format: format, + ArtifactSets: artifacts, + ExcludeWorkflows: excludeWorkflows, + }) }, } @@ -217,6 +248,7 @@ Examples: logsCmd.Flags().String("format", "", "Output format for cross-run audit report: pretty, markdown (generates security audit report instead of default metrics table)") logsCmd.Flags().Int("last", 0, "Alias for --count: number of recent runs to download") logsCmd.Flags().StringSlice("artifacts", nil, "Artifact sets to download (default: all). Valid sets: "+strings.Join(ValidArtifactSetNames(), ", ")) + logsCmd.Flags().StringSlice("exclude", nil, "Workflow names or IDs to exclude from results (comma-separated or repeated flag). Excluded runs are skipped before artifact download, saving API resources.") logsCmd.MarkFlagsMutuallyExclusive("firewall", "no-firewall") // Register completions for logs command diff --git a/pkg/cli/logs_command_test.go b/pkg/cli/logs_command_test.go index 935bd4adc22..0591b09da1c 100644 --- a/pkg/cli/logs_command_test.go +++ b/pkg/cli/logs_command_test.go @@ -264,3 +264,131 @@ func TestLogsCommandHelpText(t *testing.T) { assert.Contains(t, safeOutputFlag.Usage, "noop", "safe-output flag help should mention noop") assert.Contains(t, safeOutputFlag.Usage, "report-incomplete", "safe-output flag help should mention report-incomplete") } + +func TestLogsCommandExcludeFlag(t *testing.T) { + cmd := NewLogsCommand() + flags := cmd.Flags() + + // Verify the --exclude flag exists + excludeFlag := flags.Lookup("exclude") + require.NotNil(t, excludeFlag, "Should have 'exclude' flag") + assert.Equal(t, "stringSlice", excludeFlag.Value.Type(), "exclude should be stringSlice type") + assert.Equal(t, "[]", excludeFlag.DefValue, "exclude should default to empty slice") + assert.Contains(t, excludeFlag.Usage, "exclude", "exclude flag usage should describe its purpose") +} + +func TestIsWorkflowExcluded(t *testing.T) { + tests := []struct { + name string + workflowName string + excludes []string + expected bool + }{ + { + name: "exact match", + workflowName: "Weekly Research", + excludes: []string{"Weekly Research"}, + expected: true, + }, + { + name: "case-insensitive match", + workflowName: "Weekly Research", + excludes: []string{"weekly research"}, + expected: true, + }, + { + name: "slug match - id matches display name", + workflowName: "Weekly Research", + excludes: []string{"weekly-research"}, + expected: true, + }, + { + name: "slug match - multi-word id", + workflowName: "CI Failure Doctor", + excludes: []string{"ci-failure-doctor"}, + expected: true, + }, + { + name: "no match", + workflowName: "Weekly Research", + excludes: []string{"Nightly Build"}, + expected: false, + }, + { + name: "empty excludes", + workflowName: "Weekly Research", + excludes: nil, + expected: false, + }, + { + name: "multiple excludes, one matches", + workflowName: "CI Tests", + excludes: []string{"Weekly Research", "CI Tests", "Nightly Build"}, + expected: true, + }, + { + name: "multiple excludes, none match", + workflowName: "My Workflow", + excludes: []string{"Weekly Research", "CI Tests"}, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isWorkflowExcluded(tt.workflowName, tt.excludes) + assert.Equal(t, tt.expected, result, "isWorkflowExcluded(%q, %v) should be %v", tt.workflowName, tt.excludes, tt.expected) + }) + } +} + +func TestResolveExcludeWorkflows(t *testing.T) { + tests := []struct { + name string + excludes []string + expected []string + }{ + { + name: "nil input returns nil", + excludes: nil, + expected: nil, + }, + { + name: "empty slice returns nil", + excludes: []string{}, + expected: nil, + }, + { + name: "blank entries are dropped", + excludes: []string{" ", ""}, + expected: []string{}, + }, + { + name: "unknown workflow kept as raw value", + excludes: []string{"no-such-workflow"}, + expected: []string{"no-such-workflow"}, + }, + { + name: "whitespace is trimmed from raw values", + excludes: []string{" my-workflow "}, + expected: []string{"my-workflow"}, + }, + { + name: "multiple unknown workflows all kept", + excludes: []string{"alpha", "beta"}, + expected: []string{"alpha", "beta"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := resolveExcludeWorkflows(tt.excludes, false) + if tt.expected == nil { + assert.Nil(t, result, "resolveExcludeWorkflows(%v) should return nil", tt.excludes) + } else { + require.NotNil(t, result, "resolveExcludeWorkflows(%v) should not return nil", tt.excludes) + assert.Equal(t, tt.expected, result, "resolveExcludeWorkflows(%v) should return %v", tt.excludes, tt.expected) + } + }) + } +} diff --git a/pkg/cli/logs_download_test.go b/pkg/cli/logs_download_test.go index 28b50f85587..c54a1b577a9 100644 --- a/pkg/cli/logs_download_test.go +++ b/pkg/cli/logs_download_test.go @@ -21,7 +21,7 @@ func TestDownloadWorkflowLogs(t *testing.T) { // Test the DownloadWorkflowLogs function // This should either fail with auth error (if not authenticated) // or succeed with no results (if authenticated but no workflows match) - err := DownloadWorkflowLogs(context.Background(), "", 1, "", "", "./test-logs", "", "", 0, 0, "", false, false, false, false, false, false, false, 0, "summary.json", "", false, false, "", nil) + err := DownloadWorkflowLogs(context.Background(), "", 1, "", "", "./test-logs", "", "", 0, 0, "", false, false, false, false, false, false, false, 0, "summary.json", "", false, false, "", nil, nil) // If GitHub CLI is authenticated, the function may succeed but find no results // If not authenticated, it should return an auth error @@ -393,7 +393,7 @@ func TestDownloadWorkflowLogsWithEngineFilter(t *testing.T) { if !tt.expectError { // For valid engines, test that the function can be called without panic // It may still fail with auth errors, which is expected - err := DownloadWorkflowLogs(context.Background(), "", 1, "", "", "./test-logs", tt.engine, "", 0, 0, "", false, false, false, false, false, false, false, 0, "summary.json", "", false, false, "", nil) + err := DownloadWorkflowLogs(context.Background(), "", 1, "", "", "./test-logs", tt.engine, "", 0, 0, "", false, false, false, false, false, false, false, 0, "summary.json", "", false, false, "", nil, nil) // Clean up any created directories os.RemoveAll("./test-logs") diff --git a/pkg/cli/logs_json_stderr_order_test.go b/pkg/cli/logs_json_stderr_order_test.go index 3443ad4cb16..06474a0c5b5 100644 --- a/pkg/cli/logs_json_stderr_order_test.go +++ b/pkg/cli/logs_json_stderr_order_test.go @@ -62,6 +62,7 @@ func TestLogsJSONOutputBeforeStderr(t *testing.T) { false, // train "", // format nil, // artifactSets + nil, // excludeWorkflows ) // Close writers first @@ -187,6 +188,7 @@ func TestLogsJSONAndStderrRedirected(t *testing.T) { false, // train "", // format nil, // artifactSets + nil, // excludeWorkflows ) // Close the writer diff --git a/pkg/cli/logs_orchestrator.go b/pkg/cli/logs_orchestrator.go index bff1d6d514c..cbb202c21ed 100644 --- a/pkg/cli/logs_orchestrator.go +++ b/pkg/cli/logs_orchestrator.go @@ -34,9 +34,98 @@ func getMaxConcurrentDownloads() int { return envutil.GetIntFromEnv("GH_AW_MAX_CONCURRENT_DOWNLOADS", MaxConcurrentDownloads, 1, 100, logsOrchestratorLog) } -// DownloadWorkflowLogs downloads and analyzes workflow logs with metrics -func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, startDate, endDate, outputDir, engine, ref string, beforeRunID, afterRunID int64, repoOverride string, verbose bool, toolGraph bool, noStaged bool, firewallOnly bool, noFirewall bool, parse bool, jsonOutput bool, timeout int, summaryFile string, safeOutputType string, filteredIntegrity bool, train bool, format string, artifactSets []string) error { - logsOrchestratorLog.Printf("Starting workflow log download: workflow=%s, count=%d, startDate=%s, endDate=%s, outputDir=%s, summaryFile=%s, safeOutputType=%s, filteredIntegrity=%v, train=%v, format=%s, artifactSets=%v", workflowName, count, startDate, endDate, outputDir, summaryFile, safeOutputType, filteredIntegrity, train, format, artifactSets) +// DownloadWorkflowLogsOptions groups all configuration for DownloadWorkflowLogs. +// Using a struct avoids a long positional parameter list and makes future additions +// non-breaking at call sites. +type DownloadWorkflowLogsOptions struct { + WorkflowName string + Count int + StartDate string + EndDate string + OutputDir string + Engine string + Ref string + BeforeRunID int64 + AfterRunID int64 + RepoOverride string + Verbose bool + ToolGraph bool + NoStaged bool + FirewallOnly bool + NoFirewall bool + Parse bool + JSONOutput bool + Timeout int + SummaryFile string + SafeOutputType string + FilteredIntegrity bool + Train bool + Format string + ArtifactSets []string + ExcludeWorkflows []string +} + +// DownloadWorkflowLogs downloads and analyzes workflow logs with metrics. +// It is a thin wrapper around DownloadWorkflowLogsWithOptions that maps the +// positional arguments into DownloadWorkflowLogsOptions. +func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, startDate, endDate, outputDir, engine, ref string, beforeRunID, afterRunID int64, repoOverride string, verbose bool, toolGraph bool, noStaged bool, firewallOnly bool, noFirewall bool, parse bool, jsonOutput bool, timeout int, summaryFile string, safeOutputType string, filteredIntegrity bool, train bool, format string, artifactSets []string, excludeWorkflows []string) error { + return DownloadWorkflowLogsWithOptions(ctx, DownloadWorkflowLogsOptions{ + WorkflowName: workflowName, + Count: count, + StartDate: startDate, + EndDate: endDate, + OutputDir: outputDir, + Engine: engine, + Ref: ref, + BeforeRunID: beforeRunID, + AfterRunID: afterRunID, + RepoOverride: repoOverride, + Verbose: verbose, + ToolGraph: toolGraph, + NoStaged: noStaged, + FirewallOnly: firewallOnly, + NoFirewall: noFirewall, + Parse: parse, + JSONOutput: jsonOutput, + Timeout: timeout, + SummaryFile: summaryFile, + SafeOutputType: safeOutputType, + FilteredIntegrity: filteredIntegrity, + Train: train, + Format: format, + ArtifactSets: artifactSets, + ExcludeWorkflows: excludeWorkflows, + }) +} + +// DownloadWorkflowLogsWithOptions downloads and analyzes workflow logs with metrics. +func DownloadWorkflowLogsWithOptions(ctx context.Context, opts DownloadWorkflowLogsOptions) error { + workflowName := opts.WorkflowName + count := opts.Count + startDate := opts.StartDate + endDate := opts.EndDate + outputDir := opts.OutputDir + engine := opts.Engine + ref := opts.Ref + beforeRunID := opts.BeforeRunID + afterRunID := opts.AfterRunID + repoOverride := opts.RepoOverride + verbose := opts.Verbose + toolGraph := opts.ToolGraph + noStaged := opts.NoStaged + firewallOnly := opts.FirewallOnly + noFirewall := opts.NoFirewall + parse := opts.Parse + jsonOutput := opts.JSONOutput + timeout := opts.Timeout + summaryFile := opts.SummaryFile + safeOutputType := opts.SafeOutputType + filteredIntegrity := opts.FilteredIntegrity + train := opts.Train + format := opts.Format + artifactSets := opts.ArtifactSets + excludeWorkflows := opts.ExcludeWorkflows + logsOrchestratorLog.Printf("Starting workflow log download: workflow=%s, count=%d, startDate=%s, endDate=%s, outputDir=%s, summaryFile=%s, safeOutputType=%s, filteredIntegrity=%v, train=%v, format=%s, artifactSets=%v, excludeWorkflows=%v", workflowName, count, startDate, endDate, outputDir, summaryFile, safeOutputType, filteredIntegrity, train, format, artifactSets, excludeWorkflows) // Validate and resolve artifact sets into a concrete filter (list of artifact base names). if err := ValidateArtifactSets(artifactSets); err != nil { @@ -50,6 +139,18 @@ func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, s } } + // Resolve excluded workflow names to display names (for matching against WorkflowRun.WorkflowName). + // Each entry in excludeWorkflows may be a workflow ID (e.g., "weekly-research") or a display name + // (e.g., "Weekly Research"). We try to resolve each to its canonical display name; if resolution + // fails (e.g., no .lock.yml files present), we fall back to case-insensitive matching of the raw value. + resolvedExcludes := resolveExcludeWorkflows(excludeWorkflows, verbose) + if len(resolvedExcludes) > 0 { + logsOrchestratorLog.Printf("Exclude filter active: %v", resolvedExcludes) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage("Exclude filter: skipping workflows: "+strings.Join(resolvedExcludes, ", "))) + } + } + // Ensure .github/aw/logs/.gitignore exists on every invocation if err := ensureLogsGitignore(); err != nil { // Log but don't fail - this is not critical for downloading logs @@ -193,6 +294,23 @@ func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, s // forcing us to scan the entire batch. batchProcessed := 0 runsRemaining := runs + + // Apply exclude filter before chunking to avoid downloading artifacts for excluded workflows. + if len(resolvedExcludes) > 0 { + filteredRuns := make([]WorkflowRun, 0, len(runsRemaining)) + for _, run := range runsRemaining { + if isWorkflowExcluded(run.WorkflowName, resolvedExcludes) { + logsOrchestratorLog.Printf("Skipping run %d: workflow '%s' is in exclude list", run.DatabaseID, run.WorkflowName) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Skipping run %d: workflow '%s' is excluded by --exclude", run.DatabaseID, run.WorkflowName))) + } + continue + } + filteredRuns = append(filteredRuns, run) + } + runsRemaining = filteredRuns + } + for len(runsRemaining) > 0 && len(processedRuns) < count { remainingNeeded := count - len(processedRuns) if remainingNeeded <= 0 { diff --git a/pkg/cli/logs_utils.go b/pkg/cli/logs_utils.go index 269c9746ad6..a6286a6d23e 100644 --- a/pkg/cli/logs_utils.go +++ b/pkg/cli/logs_utils.go @@ -15,6 +15,7 @@ import ( "github.com/github/gh-aw/pkg/console" "github.com/github/gh-aw/pkg/logger" + "github.com/github/gh-aw/pkg/workflow" ) var logsUtilsLog = logger.New("cli:logs_utils") @@ -83,3 +84,68 @@ func getAgenticWorkflowNames(verbose bool) ([]string, error) { return workflowNames, nil } + +// resolveExcludeWorkflows resolves a list of workflow names (IDs or display names) to their +// canonical display names for matching against WorkflowRun.WorkflowName. +// It loads all local workflows once and resolves all excludes against an in-memory map, +// avoiding repeated filesystem scans. +// If a name cannot be resolved (e.g., no .lock.yml files present or the workflow is from another +// repo), the raw value is kept so that isWorkflowExcluded can still attempt a slug-based match. +func resolveExcludeWorkflows(excludes []string, verbose bool) []string { + if len(excludes) == 0 { + return nil + } + + // Load all local workflows once to build a fast lookup map. + // Keys are both the display name (case-insensitive) and workflow ID (case-insensitive), + // and the value is the canonical display name. + displayByKey := make(map[string]string) + allWorkflows, err := workflow.GetAllWorkflows() + if err != nil { + logsUtilsLog.Printf("Could not load workflow list for exclude resolution: %v", err) + } else { + for _, wf := range allWorkflows { + displayByKey[strings.ToLower(wf.DisplayName)] = wf.DisplayName + displayByKey[strings.ToLower(wf.WorkflowID)] = wf.DisplayName + } + } + + resolved := make([]string, 0, len(excludes)) + for _, name := range excludes { + name = strings.TrimSpace(name) + if name == "" { + continue + } + // Attempt to resolve via the in-memory map (display name or workflow ID lookup). + if displayName, ok := displayByKey[strings.ToLower(name)]; ok { + logsUtilsLog.Printf("Resolved exclude workflow '%s' -> '%s'", name, displayName) + resolved = append(resolved, displayName) + } else { + // Cannot resolve - keep the raw value so isWorkflowExcluded can try slug matching. + logsUtilsLog.Printf("Could not resolve exclude workflow '%s' (using raw value for matching)", name) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Exclude: using '%s' for matching (could not resolve to a known workflow)", name))) + } + resolved = append(resolved, name) + } + } + return resolved +} + +// isWorkflowExcluded reports whether a workflow display name matches any entry in the exclude list. +// Matching is case-insensitive to be resilient to capitalization differences. +// As a fallback it also compares a slugified form of the display name (spaces→hyphens) against +// each exclude entry, so that e.g. --exclude weekly-research matches a run named "Weekly Research" +// when local lock files are absent and resolution could not convert the ID to a display name. +func isWorkflowExcluded(workflowName string, excludes []string) bool { + lowerName := strings.ToLower(workflowName) + // Slug form: lowercase display name with spaces replaced by hyphens (mirrors common workflow ID convention). + slugName := strings.ReplaceAll(lowerName, " ", "-") + for _, ex := range excludes { + lowerEx := strings.ToLower(ex) + if lowerEx == lowerName || lowerEx == slugName { + return true + } + } + return false +} diff --git a/pkg/cli/mcp_tools_privileged.go b/pkg/cli/mcp_tools_privileged.go index f15314f8d94..70dbf0b718c 100644 --- a/pkg/cli/mcp_tools_privileged.go +++ b/pkg/cli/mcp_tools_privileged.go @@ -44,6 +44,7 @@ func registerLogsTool(server *mcp.Server, execCmd execCmdFunc, actor string, val Timeout int `json:"timeout,omitempty" jsonschema:"Maximum time in minutes to spend downloading logs (default: 1 for MCP server)"` MaxTokens int `json:"max_tokens,omitempty" jsonschema:"Deprecated: accepted for backward compatibility but ignored. Output is always written to a file."` Artifacts []string `json:"artifacts,omitempty" jsonschema:"Artifact sets to download (default: all). Valid sets: all, activation, agent, detection, firewall, github-api, mcp"` + ExcludeWorkflows []string `json:"exclude_workflows,omitempty" jsonschema:"Workflow names or IDs to exclude from results. Excluded runs are skipped before artifact download, saving API resources."` } // Generate schema with elicitation defaults @@ -168,6 +169,9 @@ from where the previous request stopped due to timeout.`, if len(args.Artifacts) > 0 { cmdArgs = append(cmdArgs, "--artifacts", strings.Join(args.Artifacts, ",")) } + if len(args.ExcludeWorkflows) > 0 { + cmdArgs = append(cmdArgs, "--exclude", strings.Join(args.ExcludeWorkflows, ",")) + } cmdArgs = appendRepoFlagFromEnv(cmdArgs)