diff --git a/.changeset/patch-parse-firewall-logs-logs-audit.md b/.changeset/patch-parse-firewall-logs-logs-audit.md new file mode 100644 index 00000000000..65593f733db --- /dev/null +++ b/.changeset/patch-parse-firewall-logs-logs-audit.md @@ -0,0 +1,5 @@ +--- +"gh-aw": patch +--- + +Add --parse support for firewall logs in logs and audit commands diff --git a/.github/workflows/research.lock.yml b/.github/workflows/research.lock.yml index 02a146845d3..5221c844191 100644 --- a/.github/workflows/research.lock.yml +++ b/.github/workflows/research.lock.yml @@ -2340,23 +2340,71 @@ jobs: } + const timestamp = fields[0]; + + if (!/^\d+(\.\d+)?$/.test(timestamp)) { + + return null; + + } + + const clientIpPort = fields[1]; + + if (clientIpPort !== "-" && !/^[\d.]+:\d+$/.test(clientIpPort)) { + + return null; + + } + + const domain = fields[2]; + + if (domain !== "-" && !/^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)*:\d+$/.test(domain)) { + + return null; + + } + + const destIpPort = fields[3]; + + if (destIpPort !== "-" && !/^[\d.]+:\d+$/.test(destIpPort)) { + + return null; + + } + + const status = fields[6]; + + if (status !== "-" && !/^\d+$/.test(status)) { + + return null; + + } + + const decision = fields[7]; + + if (decision !== "-" && !decision.includes(":")) { + + return null; + + } + return { - timestamp: fields[0], + timestamp: timestamp, - clientIpPort: fields[1], + clientIpPort: clientIpPort, - domain: fields[2], + domain: domain, - destIpPort: fields[3], + destIpPort: destIpPort, proto: fields[4], method: fields[5], - status: fields[6], + status: status, - decision: fields[7], + decision: decision, url: fields[8], @@ -2398,19 +2446,25 @@ jobs: let summary = "# 🔥 Firewall Blocked Requests\n\n"; - if (deniedRequests > 0) { + const validDeniedDomains = deniedDomains.filter(domain => domain !== "-"); + + const validDeniedRequests = validDeniedDomains.reduce((sum, domain) => sum + (requestsByDomain.get(domain)?.denied || 0), 0); + + if (validDeniedRequests > 0) { + + summary += `**${validDeniedRequests}** request${validDeniedRequests !== 1 ? "s" : ""} blocked across **${validDeniedDomains.length}** unique domain${validDeniedDomains.length !== 1 ? "s" : ""}`; - summary += `**${deniedRequests}** request${deniedRequests !== 1 ? "s" : ""} blocked across **${deniedDomains.length}** unique domain${deniedDomains.length !== 1 ? "s" : ""}`; + summary += ` (${totalRequests > 0 ? Math.round((validDeniedRequests / totalRequests) * 100) : 0}% of total traffic)\n\n`; - summary += ` (${totalRequests > 0 ? Math.round((deniedRequests / totalRequests) * 100) : 0}% of total traffic)\n\n`; + summary += "
\n"; - summary += "## 🚫 Blocked Domains\n\n"; + summary += "🚫 Blocked Domains (click to expand)\n\n"; summary += "| Domain | Blocked Requests |\n"; summary += "|--------|------------------|\n"; - for (const domain of deniedDomains) { + for (const domain of validDeniedDomains) { const stats = requestsByDomain.get(domain); @@ -2418,7 +2472,7 @@ jobs: } - summary += "\n"; + summary += "\n
\n\n"; } else { diff --git a/docs/src/content/docs/reference/frontmatter-full.md b/docs/src/content/docs/reference/frontmatter-full.md index 63fee1beaa3..baa6431e861 100644 --- a/docs/src/content/docs/reference/frontmatter-full.md +++ b/docs/src/content/docs/reference/frontmatter-full.md @@ -683,15 +683,6 @@ env: "example-value" # Feature flags to enable experimental or optional features in the workflow. Each # feature is specified as a key with a boolean value. # (optional) -# -# Available features: -# firewall: Enable AWF (Agent Workflow Firewall) for network egress control -# with domain allowlisting. Currently only supported for the Copilot -# engine. AWF is sourced from https://github.com/githubnext/gh-aw-firewall -# -# Example: -# features: -# firewall: true features: {} diff --git a/pkg/cli/audit.go b/pkg/cli/audit.go index 2b30a77379e..c11cdd46d39 100644 --- a/pkg/cli/audit.go +++ b/pkg/cli/audit.go @@ -51,7 +51,7 @@ Examples: ` + constants.CLIExtensionPrefix + ` audit https://github.example.com/owner/repo/actions/runs/1234567890 # Audit from GitHub Enterprise ` + constants.CLIExtensionPrefix + ` audit 1234567890 -o ./audit-reports # Custom output directory ` + constants.CLIExtensionPrefix + ` audit 1234567890 -v # Verbose output - ` + constants.CLIExtensionPrefix + ` audit 1234567890 --parse # Parse agent logs and generate log.md`, + ` + constants.CLIExtensionPrefix + ` audit 1234567890 --parse # Parse agent logs and firewall logs, generating log.md and firewall.md`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { runIDOrURL := args[0] @@ -78,7 +78,7 @@ Examples: // Add flags to audit command auditCmd.Flags().StringP("output", "o", "./logs", "Output directory for downloaded logs and artifacts") auditCmd.Flags().Bool("json", false, "Output audit report as JSON instead of formatted console tables") - auditCmd.Flags().Bool("parse", false, "Run JavaScript parser on agent logs and write markdown to log.md") + auditCmd.Flags().Bool("parse", false, "Run JavaScript parsers on agent logs and firewall logs, writing markdown to log.md and firewall.md") return auditCmd } @@ -333,6 +333,19 @@ func AuditWorkflowRun(runInfo RunURLInfo, outputDir string, verbose bool, parse } else if verbose { fmt.Fprintln(os.Stderr, console.FormatInfoMessage("No engine detected (aw_info.json missing or invalid); skipping agent log rendering")) } + + // Also parse firewall logs if they exist + if err := parseFirewallLogs(runOutputDir, verbose); err != nil { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to parse firewall logs for run %d: %v", runInfo.RunID, err))) + } + } else { + // Show success message if firewall.md was created + firewallMdPath := filepath.Join(runOutputDir, "firewall.md") + if _, err := os.Stat(firewallMdPath); err == nil { + fmt.Fprintln(os.Stderr, console.FormatSuccessMessage(fmt.Sprintf("✓ Parsed firewall logs for run %d → %s", runInfo.RunID, firewallMdPath))) + } + } } // Save run summary for caching future audit runs diff --git a/pkg/cli/logs.go b/pkg/cli/logs.go index 28797563f99..c95eb0f724f 100644 --- a/pkg/cli/logs.go +++ b/pkg/cli/logs.go @@ -418,7 +418,7 @@ Examples: logsCmd.Flags().Int64("after-run-id", 0, "Filter runs with database ID after this value (exclusive)") logsCmd.Flags().Bool("tool-graph", false, "Generate Mermaid tool sequence graph from agent logs") logsCmd.Flags().Bool("no-staged", false, "Filter out staged workflow runs (exclude runs with staged: true in aw_info.json)") - logsCmd.Flags().Bool("parse", false, "Run JavaScript parser on agent logs and write markdown to log.md") + logsCmd.Flags().Bool("parse", false, "Run JavaScript parsers on agent logs and firewall logs, writing markdown to log.md and firewall.md") logsCmd.Flags().Bool("json", false, "Output logs data as JSON instead of formatted console tables") logsCmd.Flags().Int("timeout", 0, "Maximum time in seconds to spend downloading logs (0 = no timeout)") @@ -647,6 +647,17 @@ func DownloadWorkflowLogs(workflowName string, count int, startDate, endDate, ou fmt.Fprintln(os.Stderr, console.FormatSuccessMessage(fmt.Sprintf("✓ Parsed log for run %d → %s", run.DatabaseID, logMdPath))) } } + + // Also parse firewall logs if they exist + if err := parseFirewallLogs(result.LogsPath, verbose); err != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to parse firewall logs for run %d: %v", run.DatabaseID, err))) + } else { + // Show success message if firewall.md was created + firewallMdPath := filepath.Join(result.LogsPath, "firewall.md") + if _, err := os.Stat(firewallMdPath); err == nil { + fmt.Fprintln(os.Stderr, console.FormatSuccessMessage(fmt.Sprintf("✓ Parsed firewall logs for run %d → %s", run.DatabaseID, firewallMdPath))) + } + } } } @@ -2555,3 +2566,205 @@ require = function(name) { return nil } + +// parseFirewallLogs runs the JavaScript firewall log parser and writes markdown to firewall.md +func parseFirewallLogs(runDir string, verbose bool) error { + // Get the firewall log parser script + jsScript := workflow.GetLogParserScript("parse_firewall_logs") + if jsScript == "" { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Failed to get firewall log parser script")) + } + return nil + } + + // Check if squid logs directory exists in the run directory + // The logs could be in workflow-logs subdirectory or directly in the run directory + squidLogsDir := filepath.Join(runDir, "squid-logs") + + // Also check for squid logs in workflow-logs directory + workflowLogsSquidDir := filepath.Join(runDir, "workflow-logs", "squid-logs") + + // Determine which directory to use + var logsDir string + if dirExists(squidLogsDir) { + logsDir = squidLogsDir + } else if dirExists(workflowLogsSquidDir) { + logsDir = workflowLogsSquidDir + } else { + // No firewall logs found - this is not an error, just skip parsing + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("No firewall logs found in %s, skipping firewall log parsing", filepath.Base(runDir)))) + } + return nil + } + + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Found firewall logs in %s", logsDir))) + } + + // Create a temporary directory for running the parser + tempDir, err := os.MkdirTemp("", "firewall_log_parser") + if err != nil { + return fmt.Errorf("failed to create temp dir: %w", err) + } + defer os.RemoveAll(tempDir) + + // Create a Node.js script that mimics the GitHub Actions environment + // The firewall parser expects logs in /tmp/gh-aw/squid-logs-{workflow}/ + // We'll set GITHUB_WORKFLOW to a value that makes the parser look in our temp directory + nodeScript := fmt.Sprintf(` +const fs = require('fs'); +const path = require('path'); + +// Mock @actions/core for the parser +const core = { + summary: { + addRaw: function(content) { + this._content = content; + return this; + }, + write: function() { + console.log(this._content); + }, + _content: '' + }, + setFailed: function(message) { + console.error('FAILED:', message); + process.exit(1); + }, + info: function(message) { + // Silent in CLI mode + } +}; + +// Set up environment +// We'll use a custom workflow name that points to our temp directory +process.env.GITHUB_WORKFLOW = 'temp-workflow'; + +// Override require to provide our mock +const originalRequire = require; +require = function(name) { + if (name === '@actions/core') { + return core; + } + return originalRequire.apply(this, arguments); +}; + +// Monkey-patch the main function to use our logs directory +const originalMain = function() { + const fs = require("fs"); + const path = require("path"); + + try { + // Use our custom logs directory instead of /tmp/gh-aw/squid-logs-* + const squidLogsDir = '%s'; + + if (!fs.existsSync(squidLogsDir)) { + core.info('No firewall logs directory found at: ' + squidLogsDir); + return; + } + + // Find all .log files + const files = fs.readdirSync(squidLogsDir).filter(file => file.endsWith(".log")); + + if (files.length === 0) { + core.info('No firewall log files found in: ' + squidLogsDir); + return; + } + + core.info('Found ' + files.length + ' firewall log file(s)'); + + // Parse all log files and aggregate results + let totalRequests = 0; + let allowedRequests = 0; + let deniedRequests = 0; + const allowedDomains = new Set(); + const deniedDomains = new Set(); + const requestsByDomain = new Map(); + + for (const file of files) { + const filePath = path.join(squidLogsDir, file); + core.info('Parsing firewall log: ' + file); + + const content = fs.readFileSync(filePath, "utf8"); + const lines = content.split("\n").filter(line => line.trim()); + + for (const line of lines) { + const entry = parseFirewallLogLine(line); + if (!entry) { + continue; + } + + totalRequests++; + + // Determine if request was allowed or denied + const isAllowed = isRequestAllowed(entry.decision, entry.status); + + if (isAllowed) { + allowedRequests++; + allowedDomains.add(entry.domain); + } else { + deniedRequests++; + deniedDomains.add(entry.domain); + } + + // Track request count per domain + if (!requestsByDomain.has(entry.domain)) { + requestsByDomain.set(entry.domain, { allowed: 0, denied: 0 }); + } + const domainStats = requestsByDomain.get(entry.domain); + if (isAllowed) { + domainStats.allowed++; + } else { + domainStats.denied++; + } + } + } + + // Generate step summary + const summary = generateFirewallSummary({ + totalRequests, + allowedRequests, + deniedRequests, + allowedDomains: Array.from(allowedDomains).sort(), + deniedDomains: Array.from(deniedDomains).sort(), + requestsByDomain, + }); + + core.summary.addRaw(summary).write(); + core.info("Firewall log summary generated successfully"); + } catch (error) { + core.setFailed(error instanceof Error ? error : String(error)); + } +}; + +// Execute the parser script to get helper functions +%s + +// Replace main() call with our custom version +originalMain(); +`, logsDir, jsScript) + + // Write the Node.js script + nodeFile := filepath.Join(tempDir, "parser.js") + if err := os.WriteFile(nodeFile, []byte(nodeScript), 0644); err != nil { + return fmt.Errorf("failed to write node script: %w", err) + } + + // Execute the Node.js script + cmd := exec.Command("node", "parser.js") + cmd.Dir = tempDir + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to execute firewall parser script: %w\nOutput: %s", err, string(output)) + } + + // Write the output to firewall.md in the run directory + firewallMdPath := filepath.Join(runDir, "firewall.md") + if err := os.WriteFile(firewallMdPath, []byte(strings.TrimSpace(string(output))), 0644); err != nil { + return fmt.Errorf("failed to write firewall.md: %w", err) + } + + return nil +} diff --git a/pkg/cli/logs_firewall_parse_test.go b/pkg/cli/logs_firewall_parse_test.go new file mode 100644 index 00000000000..5d36e24b1e7 --- /dev/null +++ b/pkg/cli/logs_firewall_parse_test.go @@ -0,0 +1,151 @@ +package cli + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseFirewallLogs(t *testing.T) { + // Create a temporary directory for the test + tempDir := t.TempDir() + + // Create a mock squid-logs directory + squidLogsDir := filepath.Join(tempDir, "squid-logs") + if err := os.MkdirAll(squidLogsDir, 0755); err != nil { + t.Fatalf("Failed to create squid-logs directory: %v", err) + } + + // Create a mock firewall log file with valid log entries + logPath := filepath.Join(squidLogsDir, "access.log") + mockLogContent := `1234567890.123 10.0.0.1:12345 example.com:443 192.168.1.1:443 TCP CONNECT 200 TCP_TUNNEL:HIER_DIRECT https://example.com/ "Mozilla/5.0" +1234567891.456 10.0.0.2:23456 blocked.com:443 192.168.1.2:443 TCP CONNECT 403 TCP_DENIED:HIER_NONE https://blocked.com/ "Mozilla/5.0" +1234567892.789 10.0.0.3:34567 allowed.com:443 192.168.1.3:443 TCP CONNECT 200 TCP_TUNNEL:HIER_DIRECT https://allowed.com/ "Mozilla/5.0"` + if err := os.WriteFile(logPath, []byte(mockLogContent), 0644); err != nil { + t.Fatalf("Failed to create mock firewall log: %v", err) + } + + // Run the parser + err := parseFirewallLogs(tempDir, true) + if err != nil { + t.Fatalf("parseFirewallLogs failed: %v", err) + } + + // Check that firewall.md was created + firewallMdPath := filepath.Join(tempDir, "firewall.md") + if _, err := os.Stat(firewallMdPath); os.IsNotExist(err) { + t.Fatalf("firewall.md was not created") + } + + // Read the content and verify it's not empty + content, err := os.ReadFile(firewallMdPath) + if err != nil { + t.Fatalf("Failed to read firewall.md: %v", err) + } + + if len(content) == 0 { + t.Fatalf("firewall.md is empty") + } + + // The content should contain markdown formatting + contentStr := string(content) + if !strings.Contains(contentStr, "# 🔥 Firewall Blocked Requests") { + t.Errorf("firewall.md doesn't contain expected header") + } + + // Should mention blocked domains + if !strings.Contains(contentStr, "blocked.com") { + t.Errorf("firewall.md doesn't mention blocked.com") + } + + t.Logf("Generated firewall.md:\n%s", contentStr) +} + +func TestParseFirewallLogsInWorkflowLogsSubdir(t *testing.T) { + // Create a temporary directory for the test + tempDir := t.TempDir() + + // Create squid-logs in workflow-logs subdirectory (alternative location) + workflowLogsDir := filepath.Join(tempDir, "workflow-logs") + squidLogsDir := filepath.Join(workflowLogsDir, "squid-logs") + if err := os.MkdirAll(squidLogsDir, 0755); err != nil { + t.Fatalf("Failed to create workflow-logs/squid-logs directory: %v", err) + } + + // Create a mock firewall log file + logPath := filepath.Join(squidLogsDir, "access.log") + mockLogContent := `1234567890.123 10.0.0.1:12345 api.github.com:443 192.168.1.1:443 TCP CONNECT 200 TCP_TUNNEL:HIER_DIRECT https://api.github.com/ "gh-cli/1.0"` + if err := os.WriteFile(logPath, []byte(mockLogContent), 0644); err != nil { + t.Fatalf("Failed to create mock firewall log: %v", err) + } + + // Run the parser + err := parseFirewallLogs(tempDir, true) + if err != nil { + t.Fatalf("parseFirewallLogs failed: %v", err) + } + + // Check that firewall.md was created + firewallMdPath := filepath.Join(tempDir, "firewall.md") + if _, err := os.Stat(firewallMdPath); os.IsNotExist(err) { + t.Fatalf("firewall.md was not created") + } + + // Read the content + content, err := os.ReadFile(firewallMdPath) + if err != nil { + t.Fatalf("Failed to read firewall.md: %v", err) + } + + contentStr := string(content) + t.Logf("Generated firewall.md:\n%s", contentStr) +} + +func TestParseFirewallLogsNoLogs(t *testing.T) { + // Create a temporary directory without any firewall logs + tempDir := t.TempDir() + + // Run the parser - should not fail, just skip + err := parseFirewallLogs(tempDir, true) + if err != nil { + t.Fatalf("parseFirewallLogs should not fail when no logs present: %v", err) + } + + // Check that firewall.md was NOT created + firewallMdPath := filepath.Join(tempDir, "firewall.md") + if _, err := os.Stat(firewallMdPath); !os.IsNotExist(err) { + t.Errorf("firewall.md should not be created when no logs are present") + } +} + +func TestParseFirewallLogsEmptyDirectory(t *testing.T) { + // Create a temporary directory for the test + tempDir := t.TempDir() + + // Create an empty squid-logs directory + squidLogsDir := filepath.Join(tempDir, "squid-logs") + if err := os.MkdirAll(squidLogsDir, 0755); err != nil { + t.Fatalf("Failed to create squid-logs directory: %v", err) + } + + // Run the parser - should handle empty directory gracefully + err := parseFirewallLogs(tempDir, false) + if err != nil { + t.Fatalf("parseFirewallLogs should handle empty directory: %v", err) + } + + // Check that firewall.md was created (with message about no logs) + firewallMdPath := filepath.Join(tempDir, "firewall.md") + if _, err := os.Stat(firewallMdPath); os.IsNotExist(err) { + // It's okay if it wasn't created - the parser might skip empty directories + t.Logf("firewall.md was not created for empty directory (expected)") + } else { + // If it was created, it should mention no logs + content, err := os.ReadFile(firewallMdPath) + if err == nil { + contentStr := string(content) + t.Logf("Generated firewall.md for empty directory:\n%s", contentStr) + } + } +}