From 544b42b6b1750a9eb6782f91c4e3bfbb98f6cb3c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 18:22:55 +0000 Subject: [PATCH 1/3] feat: add --stdin flag to logs and audit commands Agent-Logs-Url: https://github.com/github/gh-aw/sessions/da607e9a-8783-4979-8e69-79acf091003f Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit.go | 33 +++- pkg/cli/logs_command.go | 48 ++++++ pkg/cli/logs_orchestrator.go | 315 ++++++++++++++++++++++++++++++++++- pkg/cli/stdin.go | 26 +++ pkg/cli/stdin_test.go | 74 ++++++++ 5 files changed, 488 insertions(+), 8 deletions(-) create mode 100644 pkg/cli/stdin.go create mode 100644 pkg/cli/stdin_test.go diff --git a/pkg/cli/audit.go b/pkg/cli/audit.go index b3fefa6f9d5..662fe03b336 100644 --- a/pkg/cli/audit.go +++ b/pkg/cli/audit.go @@ -63,7 +63,7 @@ Examples: ` + string(constants.CLIExtensionPrefix) + ` audit 1234567890 1234567891 # Diff two runs (base vs comparison) ` + string(constants.CLIExtensionPrefix) + ` audit 1234567890 1234567891 1234567892 # Diff base against multiple runs ` + string(constants.CLIExtensionPrefix) + ` audit 1234567890 1234567891 --format markdown # Markdown diff output for PR comments`, - Args: cobra.MinimumNArgs(1), + Args: cobra.ArbitraryArgs, RunE: func(cmd *cobra.Command, args []string) error { outputDir, _ := cmd.Flags().GetString("output") verbose, _ := cmd.Flags().GetBool("verbose") @@ -71,6 +71,36 @@ Examples: parse, _ := cmd.Flags().GetBool("parse") repoFlag, _ := cmd.Flags().GetString("repo") artifacts, _ := cmd.Flags().GetStringSlice("artifacts") + stdin, _ := cmd.Flags().GetBool("stdin") + + // When --stdin is provided, read run IDs/URLs from stdin instead of positional args. + if stdin { + if len(args) > 0 { + return errors.New(console.FormatErrorWithSuggestions( + "positional arguments are not allowed with --stdin", + []string{"Remove the run ID arguments, or omit --stdin to use positional arguments"}, + )) + } + stdinURLs, err := readRunIDsFromStdin(os.Stdin) + if err != nil { + return fmt.Errorf("failed to read run IDs from stdin: %w", err) + } + if len(stdinURLs) == 0 { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("No run IDs or URLs provided on stdin")) + return nil + } + args = stdinURLs + } + + if len(args) == 0 { + return errors.New(console.FormatErrorWithSuggestions( + "at least one run ID or URL is required", + []string{ + "Provide a run ID or URL as a positional argument", + "Use --stdin to read run IDs from stdin (one per line)", + }, + )) + } if len(args) == 1 { // Single run: existing audit behavior @@ -122,6 +152,7 @@ Examples: cmd.Flags().Bool("parse", false, "Run JavaScript parsers on agent logs and firewall logs, writing Markdown to log.md and firewall.md") cmd.Flags().String("format", "pretty", "Diff output format for multi-run mode: pretty, markdown") cmd.Flags().StringSlice("artifacts", nil, "Artifact sets to download (default: all). Valid sets: "+strings.Join(ValidArtifactSetNames(), ", ")) + cmd.Flags().Bool("stdin", false, "Read workflow run IDs or URLs from stdin (one per line) instead of positional arguments") // Register completions for audit command RegisterDirFlagCompletion(cmd, "output") diff --git a/pkg/cli/logs_command.go b/pkg/cli/logs_command.go index 6edbf8cbd94..a1feb0a20b6 100644 --- a/pkg/cli/logs_command.go +++ b/pkg/cli/logs_command.go @@ -12,6 +12,7 @@ package cli import ( "errors" "fmt" + "os" "strings" "time" @@ -106,6 +107,52 @@ Examples: RunE: func(cmd *cobra.Command, args []string) error { logsCommandLog.Printf("Starting logs command: args=%d", len(args)) + stdin, _ := cmd.Flags().GetBool("stdin") + + // When --stdin is provided, read run IDs/URLs from stdin and bypass GitHub API discovery. + if stdin { + if len(args) > 0 { + return errors.New(console.FormatErrorWithSuggestions( + "positional arguments are not allowed with --stdin", + []string{"Remove the workflow name argument, or omit --stdin to use the normal discovery mode"}, + )) + } + logsCommandLog.Printf("Reading run IDs from stdin") + runURLs, err := readRunIDsFromStdin(os.Stdin) + if err != nil { + return fmt.Errorf("failed to read run IDs from stdin: %w", err) + } + + outputDir, _ := cmd.Flags().GetString("output") + engine, _ := cmd.Flags().GetString("engine") + repoOverride, _ := cmd.Flags().GetString("repo") + verbose, _ := cmd.Flags().GetBool("verbose") + toolGraph, _ := cmd.Flags().GetBool("tool-graph") + noStaged, _ := cmd.Flags().GetBool("no-staged") + firewallOnly, _ := cmd.Flags().GetBool("firewall") + noFirewall, _ := cmd.Flags().GetBool("no-firewall") + parse, _ := cmd.Flags().GetBool("parse") + jsonOutput, _ := cmd.Flags().GetBool("json") + timeout, _ := cmd.Flags().GetInt("timeout") + summaryFile, _ := cmd.Flags().GetString("summary-file") + safeOutputType, _ := cmd.Flags().GetString("safe-output") + filteredIntegrity, _ := cmd.Flags().GetBool("filtered-integrity") + train, _ := cmd.Flags().GetBool("train") + format, _ := cmd.Flags().GetString("format") + artifacts, _ := cmd.Flags().GetStringSlice("artifacts") + + if engine != "" { + logsCommandLog.Printf("Validating engine parameter: %s", engine) + registry := workflow.GetGlobalEngineRegistry() + if !registry.IsValidEngine(engine) { + supportedEngines := registry.GetSupportedEngines() + return fmt.Errorf("invalid engine value '%s'. Must be one of: %s", engine, strings.Join(supportedEngines, ", ")) + } + } + + return DownloadWorkflowLogsFromStdin(cmd.Context(), runURLs, outputDir, engine, repoOverride, verbose, toolGraph, noStaged, firewallOnly, noFirewall, parse, jsonOutput, timeout, summaryFile, safeOutputType, filteredIntegrity, train, format, artifacts) + } + var workflowName string if len(args) > 0 && args[0] != "" { logsCommandLog.Printf("Resolving workflow name from argument: %s", args[0]) @@ -225,6 +272,7 @@ Examples: logsCmd.Flags().Int("last", 0, "Alias for --count: number of recent runs to download") logsCmd.Flags().StringSlice("artifacts", nil, "Artifact sets to download (default: all). Valid sets: "+strings.Join(ValidArtifactSetNames(), ", ")) logsCmd.Flags().String("after", "", "Remove locally cached run folders created before this date (cache cleanup). Use deltas like -1w or -1mo, or an absolute date YYYY-MM-DD. For example, --after -1w removes folders older than 1 week.") + logsCmd.Flags().Bool("stdin", false, "Read workflow run IDs or URLs from stdin (one per line) instead of discovering runs via the GitHub API") logsCmd.MarkFlagsMutuallyExclusive("firewall", "no-firewall") // Register completions for logs command diff --git a/pkg/cli/logs_orchestrator.go b/pkg/cli/logs_orchestrator.go index 4c7f23eb8f5..cdb30b8a4a5 100644 --- a/pkg/cli/logs_orchestrator.go +++ b/pkg/cli/logs_orchestrator.go @@ -23,6 +23,7 @@ import ( "github.com/github/gh-aw/pkg/constants" "github.com/github/gh-aw/pkg/envutil" "github.com/github/gh-aw/pkg/logger" + "github.com/github/gh-aw/pkg/parser" "github.com/github/gh-aw/pkg/workflow" ) @@ -522,13 +523,6 @@ func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, s processedRuns = processedRuns[:count] } - // Update MissingToolCount, MissingDataCount, and NoopCount in runs - for i := range processedRuns { - processedRuns[i].Run.MissingToolCount = len(processedRuns[i].MissingTools) - processedRuns[i].Run.MissingDataCount = len(processedRuns[i].MissingData) - processedRuns[i].Run.NoopCount = len(processedRuns[i].Noops) - } - // Build continuation data if timeout was reached and there are processed runs var continuation *ContinuationData if timeoutReached && len(processedRuns) > 0 { @@ -549,6 +543,20 @@ func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, s } } + return renderLogsOutput(processedRuns, outputDir, summaryFile, format, jsonOutput, toolGraph, train, continuation, verbose) +} + +// renderLogsOutput finalizes processedRuns and renders them in the appropriate output +// format: JSON, console metrics table, or cross-run audit report (pretty/markdown). +// continuation is optional and only set when a timeout was reached during a paginated download. +func renderLogsOutput(processedRuns []ProcessedRun, outputDir, summaryFile, format string, jsonOutput, toolGraph, train bool, continuation *ContinuationData, verbose bool) error { + // Update MissingToolCount, MissingDataCount, and NoopCount in runs + for i := range processedRuns { + processedRuns[i].Run.MissingToolCount = len(processedRuns[i].MissingTools) + processedRuns[i].Run.MissingDataCount = len(processedRuns[i].MissingData) + processedRuns[i].Run.NoopCount = len(processedRuns[i].Noops) + } + // Build structured logs data logsOrchestratorLog.Printf("Building logs data from %d processed runs (continuation=%t)", len(processedRuns), continuation != nil) logsData := buildLogsData(processedRuns, outputDir, continuation) @@ -620,3 +628,296 @@ func DownloadWorkflowLogs(ctx context.Context, workflowName string, count int, s return nil } + +// DownloadWorkflowLogsFromStdin fetches and processes workflow run logs for runs +// provided as IDs or URLs, bypassing the GitHub API run-discovery step. +// This is used when the --stdin flag is passed to the logs command. +func DownloadWorkflowLogsFromStdin(ctx context.Context, runURLs []string, outputDir, engine, repoOverride string, verbose, toolGraph, noStaged, firewallOnly, noFirewall bool, parse, jsonOutput bool, timeout int, summaryFile, safeOutputType string, filteredIntegrity, train bool, format string, artifactSets []string) error { + logsOrchestratorLog.Printf("Starting stdin log download: runs=%d, outputDir=%s", len(runURLs), outputDir) + + if err := ValidateArtifactSets(artifactSets); err != nil { + return err + } + artifactFilter := ResolveArtifactFilter(artifactSets) + if len(artifactFilter) > 0 { + logsOrchestratorLog.Printf("Artifact filter active: %v", artifactFilter) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage("Artifact filter: downloading only "+strings.Join(artifactFilter, ", "))) + } + } + + if err := ensureLogsGitignore(); err != nil { + logsOrchestratorLog.Printf("Failed to ensure logs .gitignore: %v", err) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to ensure .github/aw/logs/.gitignore: %v", err))) + } + } + + select { + case <-ctx.Done(): + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Operation cancelled")) + return ctx.Err() + default: + } + + if len(runURLs) == 0 { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("No run IDs or URLs provided on stdin")) + return nil + } + + // Parse owner/repo from --repo override if provided + var ownerOverride, repoNameOverride string + if repoOverride != "" { + parts := strings.SplitN(repoOverride, "/", 2) + if len(parts) != 2 || parts[0] == "" || parts[1] == "" { + return fmt.Errorf("invalid repository format '%s': expected 'owner/repo'", repoOverride) + } + ownerOverride = parts[0] + repoNameOverride = parts[1] + } + + // Start timeout timer if specified + var startTime time.Time + if timeout > 0 { + startTime = time.Now() + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Timeout set to %d minutes", timeout))) + } + } + + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Fetching metadata for %d runs from stdin...", len(runURLs)))) + } + + // Build WorkflowRun objects by fetching metadata for each provided URL + var runs []WorkflowRun + for _, rawURL := range runURLs { + select { + case <-ctx.Done(): + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Operation cancelled")) + return ctx.Err() + default: + } + + if timeout > 0 && time.Since(startTime).Seconds() >= float64(timeout)*60 { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Timeout reached before all run metadata could be fetched")) + break + } + + components, err := parser.ParseRunURLExtended(rawURL) + if err != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Skipping invalid run %q: %v", rawURL, err))) + continue + } + + // Prefer owner/repo embedded in the URL; fall back to --repo override + owner := components.Owner + repo := components.Repo + host := components.Host + if owner == "" { + owner = ownerOverride + repo = repoNameOverride + } + + run, err := fetchWorkflowRunMetadata(ctx, components.Number, owner, repo, host, verbose) + if err != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Skipping run %d: failed to fetch metadata: %v", components.Number, err))) + continue + } + runs = append(runs, run) + } + + if len(runs) == 0 { + if jsonOutput { + logsData := buildLogsData([]ProcessedRun{}, outputDir, nil) + if err := renderLogsJSON(logsData); err != nil { + return fmt.Errorf("failed to render JSON output: %w", err) + } + } + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("No valid runs could be loaded from stdin")) + return nil + } + + // Download artifacts for all runs concurrently + downloadResults := downloadRunArtifactsConcurrent(ctx, runs, outputDir, verbose, len(runs), repoOverride, artifactFilter) + + // Process download results applying the same filters as DownloadWorkflowLogs + var processedRuns []ProcessedRun + for _, result := range downloadResults { + if result.Skipped { + if verbose && result.Error != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Skipping run %d: %v", result.Run.DatabaseID, result.Error))) + } + continue + } + + if result.Error != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to download artifacts for run %d: %v", result.Run.DatabaseID, result.Error))) + continue + } + + awInfoPath := filepath.Join(result.LogsPath, "aw_info.json") + var awInfo *AwInfo + var awInfoErr error + if engine != "" || noStaged || firewallOnly || noFirewall { + awInfo, awInfoErr = parseAwInfo(awInfoPath, verbose) + } + + if engine != "" { + detectedEngine := extractEngineFromAwInfo(awInfoPath, verbose) + var engineMatches bool + if detectedEngine != nil { + registry := workflow.GetGlobalEngineRegistry() + for _, supportedEngine := range constants.AgenticEngines { + if testEngine, err := registry.GetEngine(supportedEngine); err == nil && testEngine == detectedEngine { + engineMatches = (supportedEngine == engine) + break + } + } + } + if !engineMatches { + logsOrchestratorLog.Printf("Skipping run %d: engine filter=%s, no match detected", result.Run.DatabaseID, engine) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Skipping run %d: engine does not match filter '%s'", result.Run.DatabaseID, engine))) + } + continue + } + } + + if noStaged { + var isStaged bool + if awInfoErr == nil && awInfo != nil { + isStaged = awInfo.Staged + } + if isStaged { + logsOrchestratorLog.Printf("Skipping run %d: staged workflow filtered by --no-staged", result.Run.DatabaseID) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Skipping run %d: workflow is staged (filtered by --no-staged)", result.Run.DatabaseID))) + } + continue + } + } + + if firewallOnly || noFirewall { + var hasFirewall bool + if awInfoErr == nil && awInfo != nil { + hasFirewall = awInfo.Steps.Firewall != "" + } + if firewallOnly && !hasFirewall { + logsOrchestratorLog.Printf("Skipping run %d: no firewall detected, filtered by --firewall", result.Run.DatabaseID) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Skipping run %d: workflow does not use firewall (filtered by --firewall)", result.Run.DatabaseID))) + } + continue + } + if noFirewall && hasFirewall { + logsOrchestratorLog.Printf("Skipping run %d: firewall detected, filtered by --no-firewall", result.Run.DatabaseID) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Skipping run %d: workflow uses firewall (filtered by --no-firewall)", result.Run.DatabaseID))) + } + continue + } + } + + if safeOutputType != "" { + hasSafeOutputType, checkErr := runContainsSafeOutputType(result.LogsPath, safeOutputType, verbose) + if checkErr != nil && verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to check safe output type for run %d: %v", result.Run.DatabaseID, checkErr))) + } + if !hasSafeOutputType { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Skipping run %d: no '%s' safe output messages found", result.Run.DatabaseID, safeOutputType))) + } + continue + } + } + + if filteredIntegrity { + hasFiltered, checkErr := runHasDifcFilteredItems(result.LogsPath, verbose) + if checkErr != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to check DIFC filtered items for run %d: %v", result.Run.DatabaseID, checkErr))) + continue + } + if !hasFiltered { + logsOrchestratorLog.Printf("Skipping run %d: no DIFC filtered items found", result.Run.DatabaseID) + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Skipping run %d: no DIFC integrity-filtered items found in gateway logs", result.Run.DatabaseID))) + } + continue + } + } + + run := result.Run + run.TokenUsage = result.Metrics.TokenUsage + run.EstimatedCost = result.Metrics.EstimatedCost + run.Turns = result.Metrics.Turns + run.AvgTimeBetweenTurns = result.Metrics.AvgTimeBetweenTurns + run.ErrorCount = 0 + run.WarningCount = 0 + run.LogsPath = result.LogsPath + + if result.TokenUsage != nil && result.TokenUsage.TotalEffectiveTokens > 0 { + run.EffectiveTokens = result.TokenUsage.TotalEffectiveTokens + } + if failedJobCount, err := fetchJobStatuses(run.DatabaseID, verbose); err == nil { + run.ErrorCount += failedJobCount + } + if !run.StartedAt.IsZero() && !run.UpdatedAt.IsZero() { + run.Duration = run.UpdatedAt.Sub(run.StartedAt) + run.ActionMinutes = math.Ceil(run.Duration.Minutes()) + } + + processedRun := ProcessedRun{ + Run: run, + AwContext: result.AwContext, + TaskDomain: result.TaskDomain, + BehaviorFingerprint: result.BehaviorFingerprint, + AgenticAssessments: result.AgenticAssessments, + AccessAnalysis: result.AccessAnalysis, + FirewallAnalysis: result.FirewallAnalysis, + RedactedDomainsAnalysis: result.RedactedDomainsAnalysis, + MissingTools: result.MissingTools, + MissingData: result.MissingData, + Noops: result.Noops, + MCPFailures: result.MCPFailures, + MCPToolUsage: result.MCPToolUsage, + TokenUsage: result.TokenUsage, + GitHubRateLimitUsage: result.GitHubRateLimitUsage, + JobDetails: result.JobDetails, + } + processedRuns = append(processedRuns, processedRun) + + if parse { + detectedEngine := extractEngineFromAwInfo(awInfoPath, verbose) + if err := parseAgentLog(result.LogsPath, detectedEngine, verbose); err != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to parse log for run %d: %v", run.DatabaseID, err))) + } else { + logMdPath := filepath.Join(result.LogsPath, "log.md") + if _, err := os.Stat(logMdPath); err == nil { + fmt.Fprintln(os.Stderr, console.FormatSuccessMessage(fmt.Sprintf("✓ Parsed log for run %d → %s", run.DatabaseID, logMdPath))) + } + } + if err := parseFirewallLogs(result.LogsPath, verbose); err != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to parse firewall logs for run %d: %v", run.DatabaseID, err))) + } else { + firewallMdPath := filepath.Join(result.LogsPath, "firewall.md") + if _, err := os.Stat(firewallMdPath); err == nil { + fmt.Fprintln(os.Stderr, console.FormatSuccessMessage(fmt.Sprintf("✓ Parsed firewall logs for run %d → %s", run.DatabaseID, firewallMdPath))) + } + } + } + } + + if len(processedRuns) == 0 { + if jsonOutput { + logsData := buildLogsData([]ProcessedRun{}, outputDir, nil) + if err := renderLogsJSON(logsData); err != nil { + return fmt.Errorf("failed to render JSON output: %w", err) + } + } + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("No workflow runs with artifacts found matching the specified criteria")) + return nil + } + + return renderLogsOutput(processedRuns, outputDir, summaryFile, format, jsonOutput, toolGraph, train, nil, verbose) +} diff --git a/pkg/cli/stdin.go b/pkg/cli/stdin.go new file mode 100644 index 00000000000..ce465007fc0 --- /dev/null +++ b/pkg/cli/stdin.go @@ -0,0 +1,26 @@ +package cli + +import ( + "bufio" + "fmt" + "io" + "strings" +) + +// readRunIDsFromStdin reads workflow run IDs or URLs from r, one per line. +// Blank lines and lines starting with '#' are ignored. +func readRunIDsFromStdin(r io.Reader) ([]string, error) { + var runIDs []string + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + runIDs = append(runIDs, line) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failed to read from stdin: %w", err) + } + return runIDs, nil +} diff --git a/pkg/cli/stdin_test.go b/pkg/cli/stdin_test.go new file mode 100644 index 00000000000..01ec0a37edf --- /dev/null +++ b/pkg/cli/stdin_test.go @@ -0,0 +1,74 @@ +//go:build !integration + +package cli + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReadRunIDsFromStdin(t *testing.T) { + tests := []struct { + name string + input string + expected []string + }{ + { + name: "single run ID", + input: "1234567890\n", + expected: []string{"1234567890"}, + }, + { + name: "multiple run IDs", + input: "1234567890\n9876543210\n1111111111\n", + expected: []string{"1234567890", "9876543210", "1111111111"}, + }, + { + name: "run URLs", + input: "https://github.com/owner/repo/actions/runs/1234567890\nhttps://github.com/owner/repo/actions/runs/9876543210\n", + expected: []string{"https://github.com/owner/repo/actions/runs/1234567890", "https://github.com/owner/repo/actions/runs/9876543210"}, + }, + { + name: "blank lines are ignored", + input: "\n1234567890\n\n9876543210\n\n", + expected: []string{"1234567890", "9876543210"}, + }, + { + name: "comment lines are ignored", + input: "# This is a comment\n1234567890\n# Another comment\n9876543210\n", + expected: []string{"1234567890", "9876543210"}, + }, + { + name: "lines are trimmed", + input: " 1234567890 \n 9876543210 \n", + expected: []string{"1234567890", "9876543210"}, + }, + { + name: "empty input", + input: "", + expected: nil, + }, + { + name: "only blank lines and comments", + input: "\n# comment\n\n# another\n", + expected: nil, + }, + { + name: "no trailing newline", + input: "1234567890", + expected: []string{"1234567890"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := strings.NewReader(tt.input) + got, err := readRunIDsFromStdin(r) + require.NoError(t, err, "readRunIDsFromStdin should not return an error") + assert.Equal(t, tt.expected, got, "URLs should match expected values") + }) + } +} From 6df93236ea88c7ae10ca98852080ed6f1272d0b6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 29 Apr 2026 20:32:04 +0000 Subject: [PATCH 2/3] docs(adr): add draft ADR-29170 for stdin input mode on logs and audit commands Co-Authored-By: Claude Sonnet 4.6 --- ...-input-mode-for-logs-and-audit-commands.md | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 docs/adr/29170-stdin-input-mode-for-logs-and-audit-commands.md diff --git a/docs/adr/29170-stdin-input-mode-for-logs-and-audit-commands.md b/docs/adr/29170-stdin-input-mode-for-logs-and-audit-commands.md new file mode 100644 index 00000000000..4cf88b4724d --- /dev/null +++ b/docs/adr/29170-stdin-input-mode-for-logs-and-audit-commands.md @@ -0,0 +1,76 @@ +# ADR-29170: Stdin Input Mode for Logs and Audit Commands + +**Date**: 2026-04-29 +**Status**: Draft +**Deciders**: pelikhan, Copilot + +--- + +## Part 1 — Narrative (Human-Friendly) + +### Context + +The `gh aw logs` and `gh aw audit` commands discover workflow runs by querying the GitHub API based on filters (workflow name, count, date range). This API-based discovery is unsuitable when a user already knows the exact run IDs they want to process — for example, when scripting batch analyses, piping output from another tool, or replaying a saved list of run IDs. There was no way to bypass discovery and supply run IDs directly without using positional arguments, which do not integrate naturally with shell pipelines. + +### Decision + +We will add a `--stdin` flag to both `gh aw logs` and `gh aw audit` that reads workflow run IDs or URLs from standard input (one per line), bypassing the GitHub API run-discovery step entirely. This approach follows Unix pipeline conventions and allows users to compose `gh aw logs` and `gh aw audit` with other shell tools. The `--stdin` flag is mutually exclusive with positional arguments on both commands. + +### Alternatives Considered + +#### Alternative 1: Positional Arguments Only (Status Quo) + +Users can already supply one or more run IDs as positional arguments (e.g., `gh aw audit 1234 5678`). This works for a small, known set of runs typed interactively but does not support piping from other commands or reading from files without shell substitution (`$(cat ids.txt)`). Shell substitution has argument-count limits and breaks easily with large lists. + +#### Alternative 2: File-Path Flag (`--file path/to/ids.txt`) + +A `--file` flag could accept a path to a text file containing run IDs. This is more explicit and reproducible (the file path can be version-controlled), but it is less composable in shell pipelines and requires writing intermediate files. Stdin is more idiomatic for Unix-style tools and is already the conventional mechanism for streaming data into CLI commands. + +### Consequences + +#### Positive +- Enables Unix-style composition: users can pipe output from other `gh` or shell commands directly into `gh aw logs` and `gh aw audit`. +- Bypasses GitHub API run-discovery quota, making batch processing of known run IDs cheaper and faster. +- The stdin parsing helper (`readRunIDsFromStdin`) is a small, fully-tested utility reused by both commands. + +#### Negative +- A parallel orchestration path (`DownloadWorkflowLogsFromStdin`) largely replicates the filtering and rendering logic of `DownloadWorkflowLogs`, increasing the maintenance surface. +- Some time-based and count-based flags (`--after`, `--count`, `--date`, workflow-name filtering) are silently ignored in stdin mode; this could surprise users who supply them alongside `--stdin`. +- Numeric-only run IDs require an explicit `--repo owner/repo` flag in stdin mode, because there is no workflow-name context from which to infer the repository. + +#### Neutral +- The `cobra.MinimumNArgs(1)` constraint on `audit` is replaced with `cobra.ArbitraryArgs` plus manual validation; the effective behavior is unchanged for positional-args usage. +- Blank lines and `#`-prefixed comment lines in stdin input are silently skipped, which is consistent with common Unix text-file conventions. + +--- + +## Part 2 — Normative Specification (RFC 2119) + +> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119). + +### Stdin Flag Behaviour + +1. Both `gh aw logs` and `gh aw audit` **MUST** accept a `--stdin` boolean flag that, when set, reads workflow run IDs or URLs from standard input instead of discovering runs via the GitHub API. +2. Each line read from stdin **MUST** be trimmed of leading and trailing whitespace before processing. +3. Blank lines and lines whose first non-whitespace character is `#` **MUST** be silently ignored. +4. The `--stdin` flag and positional run-ID arguments **MUST NOT** be used together; implementations **MUST** return an error if both are supplied simultaneously. +5. If stdin produces zero valid entries after filtering, the command **SHOULD** emit a warning to stderr and exit successfully (status 0) rather than treating empty input as an error. + +### Input Format + +1. Stdin **MUST** accept both numeric run IDs (e.g., `1234567890`) and full GitHub Actions run URLs (e.g., `https://github.com/owner/repo/actions/runs/1234567890`). +2. When a numeric-only run ID is supplied and no owner/repo is encoded in the input, implementations **MUST** require the `--repo owner/repo` flag and **MUST** return an error if it is absent. +3. Implementations **SHOULD** accept GHES run URLs in addition to github.com URLs, consistent with existing positional-argument handling. + +### Flag Interactions + +1. Content-filtering flags (`--engine`, `--firewall`, `--no-firewall`, `--safe-output`, `--filtered-integrity`, `--no-staged`) **MUST** apply to runs supplied via stdin in the same way they apply to runs discovered via the GitHub API. +2. Discovery-scoping flags that are meaningless without API discovery (`--count`, `--date`, `--after`, workflow-name positional argument) **SHOULD NOT** silently take effect in stdin mode; implementations **SHOULD** document that these flags are ignored when `--stdin` is set. + +### Conformance + +An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/25131761595) workflow. The PR author must review, complete, and finalize this document before the PR can merge.* From aa68af624d56f48e02f23f4acad36ee0c333427c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 21:02:20 +0000 Subject: [PATCH 3/3] fix: address reviewer feedback on --stdin flag implementation Agent-Logs-Url: https://github.com/github/gh-aw/sessions/5176ce98-d935-4b8c-a868-018e6e7df922 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit_test.go | 31 +++++++++++++++++++++++++++++++ pkg/cli/logs_command_test.go | 22 ++++++++++++++++++++++ pkg/cli/logs_orchestrator.go | 34 ++++++++++++++++++++++++++-------- pkg/cli/logs_run_processor.go | 29 +++++++++++++++++++++-------- 4 files changed, 100 insertions(+), 16 deletions(-) diff --git a/pkg/cli/audit_test.go b/pkg/cli/audit_test.go index d430c737325..70449d9e437 100644 --- a/pkg/cli/audit_test.go +++ b/pkg/cli/audit_test.go @@ -1064,3 +1064,34 @@ func TestRunAuditMulti_Validation(t *testing.T) { }) } } + +func TestAuditCommandStdinFlag(t *testing.T) { + cmd := NewAuditCommand() + flags := cmd.Flags() + + // --stdin flag must be registered + stdinFlag := flags.Lookup("stdin") + require.NotNil(t, stdinFlag, "Should have 'stdin' flag") + assert.Equal(t, "bool", stdinFlag.Value.Type(), "--stdin should be a boolean flag") + assert.Equal(t, "false", stdinFlag.DefValue, "--stdin should default to false") +} + +func TestAuditCommandStdinRejectsPositionalArgs(t *testing.T) { + cmd := NewAuditCommand() + cmd.SetArgs([]string{"1234567890", "--stdin"}) + cmd.SetOut(nil) + cmd.SetErr(nil) + err := cmd.Execute() + require.Error(t, err, "audit --stdin with a positional arg should return an error") + assert.Contains(t, err.Error(), "positional arguments are not allowed with --stdin", "error message should explain the conflict") +} + +func TestAuditCommandRequiresArgsOrStdin(t *testing.T) { + cmd := NewAuditCommand() + cmd.SetArgs([]string{}) + cmd.SetOut(nil) + cmd.SetErr(nil) + err := cmd.Execute() + require.Error(t, err, "audit with no args and no --stdin should return an error") + assert.Contains(t, err.Error(), "at least one run ID or URL is required", "error message should prompt for required input") +} diff --git a/pkg/cli/logs_command_test.go b/pkg/cli/logs_command_test.go index a1158166673..90f27c9a6c6 100644 --- a/pkg/cli/logs_command_test.go +++ b/pkg/cli/logs_command_test.go @@ -268,3 +268,25 @@ func TestLogsCommandHelpText(t *testing.T) { assert.Contains(t, safeOutputFlag.Usage, "noop", "safe-output flag help should mention noop") assert.Contains(t, safeOutputFlag.Usage, "report-incomplete", "safe-output flag help should mention report-incomplete") } + +func TestLogsCommandStdinFlag(t *testing.T) { + cmd := NewLogsCommand() + flags := cmd.Flags() + + // --stdin flag must be registered + stdinFlag := flags.Lookup("stdin") + require.NotNil(t, stdinFlag, "Should have 'stdin' flag") + assert.Equal(t, "bool", stdinFlag.Value.Type(), "--stdin should be a boolean flag") + assert.Equal(t, "false", stdinFlag.DefValue, "--stdin should default to false") +} + +func TestLogsCommandStdinRejectsPositionalArgs(t *testing.T) { + cmd := NewLogsCommand() + cmd.SetArgs([]string{"my-workflow", "--stdin"}) + // Suppress output so test output stays clean + cmd.SetOut(nil) + cmd.SetErr(nil) + err := cmd.Execute() + require.Error(t, err, "logs --stdin with a positional arg should return an error") + assert.Contains(t, err.Error(), "positional arguments are not allowed with --stdin", "error message should explain the conflict") +} diff --git a/pkg/cli/logs_orchestrator.go b/pkg/cli/logs_orchestrator.go index cdb30b8a4a5..58ea9ec5a96 100644 --- a/pkg/cli/logs_orchestrator.go +++ b/pkg/cli/logs_orchestrator.go @@ -665,15 +665,25 @@ func DownloadWorkflowLogsFromStdin(ctx context.Context, runURLs []string, output return nil } - // Parse owner/repo from --repo override if provided - var ownerOverride, repoNameOverride string + // Parse owner/repo (and optional GHES host) from --repo override if provided. + // Accepted formats: "owner/repo" or "HOST/owner/repo". + var hostOverride, ownerOverride, repoNameOverride string if repoOverride != "" { - parts := strings.SplitN(repoOverride, "/", 2) - if len(parts) != 2 || parts[0] == "" || parts[1] == "" { - return fmt.Errorf("invalid repository format '%s': expected 'owner/repo'", repoOverride) + parts := strings.SplitN(repoOverride, "/", 3) + switch len(parts) { + case 3: // HOST/owner/repo + if parts[0] == "" || parts[1] == "" || parts[2] == "" { + return fmt.Errorf("invalid repository format '%s': expected '[HOST/]owner/repo'", repoOverride) + } + hostOverride, ownerOverride, repoNameOverride = parts[0], parts[1], parts[2] + case 2: // owner/repo + if parts[0] == "" || parts[1] == "" { + return fmt.Errorf("invalid repository format '%s': expected '[HOST/]owner/repo'", repoOverride) + } + ownerOverride, repoNameOverride = parts[0], parts[1] + default: + return fmt.Errorf("invalid repository format '%s': expected '[HOST/]owner/repo'", repoOverride) } - ownerOverride = parts[0] - repoNameOverride = parts[1] } // Start timeout timer if specified @@ -710,13 +720,21 @@ func DownloadWorkflowLogsFromStdin(ctx context.Context, runURLs []string, output continue } - // Prefer owner/repo embedded in the URL; fall back to --repo override + // Prefer owner/repo embedded in the URL; fall back to --repo override. + // If neither source provides owner, the run cannot be fetched — return an + // actionable error rather than silently continuing with a broken API call. owner := components.Owner repo := components.Repo host := components.Host if owner == "" { owner = ownerOverride repo = repoNameOverride + if host == "" { + host = hostOverride + } + } + if owner == "" { + return fmt.Errorf("run %q does not include repository information; pass --repo owner/repo or provide full run URLs", rawURL) } run, err := fetchWorkflowRunMetadata(ctx, components.Number, owner, repo, host, verbose) diff --git a/pkg/cli/logs_run_processor.go b/pkg/cli/logs_run_processor.go index 945d5e63923..82dff7e455f 100644 --- a/pkg/cli/logs_run_processor.go +++ b/pkg/cli/logs_run_processor.go @@ -22,6 +22,7 @@ import ( "github.com/github/gh-aw/pkg/console" "github.com/github/gh-aw/pkg/constants" + "github.com/github/gh-aw/pkg/parser" "github.com/github/gh-aw/pkg/stringutil" "github.com/sourcegraph/conc/pool" ) @@ -64,13 +65,16 @@ func downloadRunArtifactsConcurrent(ctx context.Context, runs []WorkflowRun, out // Get configured max concurrent downloads (default or from environment variable) maxConcurrent := getMaxConcurrentDownloads() - // Parse repoOverride into owner/repo once for cross-repo artifact download - var dlOwner, dlRepo string + // Parse repoOverride into host/owner/repo once for cross-repo artifact download. + // Accepted formats: "owner/repo" or "HOST/owner/repo". + var dlHost, dlOwner, dlRepo string if repoOverride != "" { - parts := strings.SplitN(repoOverride, "/", 2) - if len(parts) == 2 { - dlOwner = parts[0] - dlRepo = parts[1] + parts := strings.SplitN(repoOverride, "/", 3) + switch len(parts) { + case 3: // HOST/owner/repo + dlHost, dlOwner, dlRepo = parts[0], parts[1], parts[2] + case 2: // owner/repo + dlOwner, dlRepo = parts[0], parts[1] } } @@ -136,8 +140,17 @@ func downloadRunArtifactsConcurrent(ctx context.Context, runs []WorkflowRun, out return result, nil } - // No cached summary or version mismatch - download and process - err := downloadRunArtifacts(ctx, run.DatabaseID, runOutputDir, verbose, dlOwner, dlRepo, "", artifactFilter) + // No cached summary or version mismatch - download and process. + // Use the global owner/repo/host override from --repo when available. + // When the global override is absent (stdin mode with per-run URLs), derive + // the context from run.URL so each run downloads from the correct repository. + runOwner, runRepo, runHost := dlOwner, dlRepo, dlHost + if runOwner == "" && run.URL != "" { + if c, parseErr := parser.ParseRunURLExtended(run.URL); parseErr == nil && c.Owner != "" { + runOwner, runRepo, runHost = c.Owner, c.Repo, c.Host + } + } + err := downloadRunArtifacts(ctx, run.DatabaseID, runOutputDir, verbose, runOwner, runRepo, runHost, artifactFilter) result := DownloadResult{ Run: run,