From 46a3878e8455716d81243d5c1cad25a07006e35a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Mar 2026 06:07:47 +0000 Subject: [PATCH 1/3] Initial plan From 4ca98de5edfe066e7c90235f38fe4c9ab0e7e9f3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Mar 2026 06:11:16 +0000 Subject: [PATCH 2/3] chore: initial plan for pre-compiling heredoc regexp patterns Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Agent-Logs-Url: https://github.com/github/gh-aw/sessions/17eb5e04-8cc0-460c-b362-b294e4f0d287 --- .github/agents/agentic-workflows.agent.md | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/agents/agentic-workflows.agent.md b/.github/agents/agentic-workflows.agent.md index 8454ad9e67b..c0f21877e1b 100644 --- a/.github/agents/agentic-workflows.agent.md +++ b/.github/agents/agentic-workflows.agent.md @@ -174,5 +174,4 @@ gh aw compile --validate - Workflows must be compiled to `.lock.yml` files before running in GitHub Actions - **Bash tools are enabled by default** - Don't restrict bash commands unnecessarily since workflows are sandboxed by the AWF - Follow security best practices: minimal permissions, explicit network access, no template injection -- **Network configuration**: Use ecosystem identifiers (`node`, `python`, `go`, etc.) or explicit FQDNs in `network.allowed`. Bare shorthands like `npm` or `pypi` are **not** valid. See https://github.com/github/gh-aw/blob/main/.github/aw/network.md for the full list of valid ecosystem identifiers and domain patterns. - **Single-file output**: When creating a workflow, produce exactly **one** workflow `.md` file. Do not create separate documentation files (architecture docs, runbooks, usage guides, etc.). If documentation is needed, add a brief `## Usage` section inside the workflow file itself. From 97cc5fa836d62ddb372595dfe85de15506bcf0ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 21 Mar 2026 06:16:47 +0000 Subject: [PATCH 3/3] perf: pre-compile heredoc regexp patterns at package level Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Agent-Logs-Url: https://github.com/github/gh-aw/sessions/17eb5e04-8cc0-460c-b362-b294e4f0d287 --- .github/agents/agentic-workflows.agent.md | 1 + pkg/workflow/template_injection_validation.go | 47 +++++++++++-------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/.github/agents/agentic-workflows.agent.md b/.github/agents/agentic-workflows.agent.md index c0f21877e1b..8454ad9e67b 100644 --- a/.github/agents/agentic-workflows.agent.md +++ b/.github/agents/agentic-workflows.agent.md @@ -174,4 +174,5 @@ gh aw compile --validate - Workflows must be compiled to `.lock.yml` files before running in GitHub Actions - **Bash tools are enabled by default** - Don't restrict bash commands unnecessarily since workflows are sandboxed by the AWF - Follow security best practices: minimal permissions, explicit network access, no template injection +- **Network configuration**: Use ecosystem identifiers (`node`, `python`, `go`, etc.) or explicit FQDNs in `network.allowed`. Bare shorthands like `npm` or `pypi` are **not** valid. See https://github.com/github/gh-aw/blob/main/.github/aw/network.md for the full list of valid ecosystem identifiers and domain patterns. - **Single-file output**: When creating a workflow, produce exactly **one** workflow `.md` file. Do not create separate documentation files (architecture docs, runbooks, usage guides, etc.). If documentation is needed, add a brief `## Usage` section inside the workflow file itself. diff --git a/pkg/workflow/template_injection_validation.go b/pkg/workflow/template_injection_validation.go index 40e35276210..0f006b8458f 100644 --- a/pkg/workflow/template_injection_validation.go +++ b/pkg/workflow/template_injection_validation.go @@ -154,32 +154,41 @@ func extractRunBlocks(data any) []string { return runBlocks } -// removeHeredocContent removes heredoc sections from shell commands -// Heredocs (e.g., cat > file << 'EOF' ... EOF) are safe for template expressions -// because the content is written to files, not executed in the shell -func removeHeredocContent(content string) string { - // Match common heredoc patterns with known delimiter suffixes - // Since Go regex doesn't support backreferences, we match common heredoc delimiter suffixes explicitly - // Matches both exact delimiters (EOF) and prefixed delimiters (GH_AW_SAFE_OUTPUTS_CONFIG_EOF) - commonDelimiterSuffixes := []string{"EOF", "EOL", "END", "HEREDOC", "JSON", "YAML", "SQL"} +// heredocPattern holds pre-compiled regexp patterns for a single heredoc delimiter suffix. +type heredocPattern struct { + quoted *regexp.Regexp + unquoted *regexp.Regexp +} - result := content - for _, suffix := range commonDelimiterSuffixes { +// heredocPatterns are compiled once at program start for performance. +// Each entry covers one of the common delimiter suffixes used by heredocs in shell scripts. +// Since Go regex doesn't support backreferences, we match common heredoc delimiter suffixes explicitly. +// Matches both exact delimiters (EOF) and prefixed delimiters (GH_AW_SAFE_OUTPUTS_CONFIG_EOF). +var heredocPatterns = func() []heredocPattern { + suffixes := []string{"EOF", "EOL", "END", "HEREDOC", "JSON", "YAML", "SQL"} + patterns := make([]heredocPattern, len(suffixes)) + for i, suffix := range suffixes { // Pattern for quoted delimiter ending with suffix: << 'PREFIX_SUFFIX' or << "PREFIX_SUFFIX" - // The pattern matches any prefix followed by the suffix (e.g., GH_AW_CONFIG_EOF) // \w* matches zero or more word characters (allowing both exact match and prefixes) // (?ms) enables multiline and dotall modes, .*? is non-greedy // \s*\w*%s\s*$ allows for leading/trailing whitespace on the closing delimiter - quotedPattern := fmt.Sprintf(`(?ms)<<\s*['"]\w*%s['"].*?\n\s*\w*%s\s*$`, suffix, suffix) - quotedRegex := regexp.MustCompile(quotedPattern) - result = quotedRegex.ReplaceAllString(result, "# heredoc removed") - - // Pattern for unquoted delimiter ending with suffix: << PREFIX_SUFFIX - unquotedPattern := fmt.Sprintf(`(?ms)<<\s*\w*%s.*?\n\s*\w*%s\s*$`, suffix, suffix) - unquotedRegex := regexp.MustCompile(unquotedPattern) - result = unquotedRegex.ReplaceAllString(result, "# heredoc removed") + patterns[i] = heredocPattern{ + quoted: regexp.MustCompile(fmt.Sprintf(`(?ms)<<\s*['"]\w*%s['"].*?\n\s*\w*%s\s*$`, suffix, suffix)), + unquoted: regexp.MustCompile(fmt.Sprintf(`(?ms)<<\s*\w*%s.*?\n\s*\w*%s\s*$`, suffix, suffix)), + } } + return patterns +}() +// removeHeredocContent removes heredoc sections from shell commands. +// Heredocs (e.g., cat > file << 'EOF' ... EOF) are safe for template expressions +// because the content is written to files, not executed in the shell. +func removeHeredocContent(content string) string { + result := content + for _, p := range heredocPatterns { + result = p.quoted.ReplaceAllString(result, "# heredoc removed") + result = p.unquoted.ReplaceAllString(result, "# heredoc removed") + } return result }