From 358c2b2f9cd9bba380a16db04f6ae193b9e952f6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:57:44 +0000 Subject: [PATCH 1/4] Initial plan From 93b59a55aadae0ba46d908c9d5e0f0f263e209d6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 18:14:20 +0000 Subject: [PATCH 2/4] fix: pass allowed domains to compute text sanitize step Agent-Logs-Url: https://github.com/github/gh-aw/sessions/d87f1617-a210-4ab3-b929-466d293873cb Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../compiler_activation_job_builder.go | 13 +++- pkg/workflow/compute_text_lazy_test.go | 69 +++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index 91c33c1dd3c..17f6b12a6c9 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -235,10 +235,21 @@ func (c *Compiler) addActivationRepositoryAndOutputSteps(ctx *activationJobBuild ctx.steps = append(ctx.steps, " - name: Compute current body text\n") ctx.steps = append(ctx.steps, " id: sanitized\n") ctx.steps = append(ctx.steps, fmt.Sprintf(" uses: %s\n", getCachedActionPin("actions/github-script", data))) - if len(data.Bots) > 0 { + var domainsStr string + if data.SafeOutputs != nil && len(data.SafeOutputs.AllowedDomains) > 0 { + domainsStr = c.computeExpandedAllowedDomainsForSanitization(data) + } else { + domainsStr = c.computeAllowedDomainsForSanitization(data) + } + if len(data.Bots) > 0 || domainsStr != "" { ctx.steps = append(ctx.steps, " env:\n") + } + if len(data.Bots) > 0 { ctx.steps = append(ctx.steps, formatYAMLEnv(" ", "GH_AW_ALLOWED_BOTS", strings.Join(data.Bots, ","))) } + if domainsStr != "" { + ctx.steps = append(ctx.steps, formatYAMLEnv(" ", "GH_AW_ALLOWED_DOMAINS", domainsStr)) + } ctx.steps = append(ctx.steps, " with:\n") ctx.steps = append(ctx.steps, " script: |\n") ctx.steps = append(ctx.steps, generateGitHubScriptWithRequire("compute_text.cjs")) diff --git a/pkg/workflow/compute_text_lazy_test.go b/pkg/workflow/compute_text_lazy_test.go index 76715ce10a1..5d5e5860e10 100644 --- a/pkg/workflow/compute_text_lazy_test.go +++ b/pkg/workflow/compute_text_lazy_test.go @@ -414,6 +414,75 @@ func TestHasContentContext(t *testing.T) { } } +func TestComputeTextStepIncludesAllowedDomainsEnv(t *testing.T) { + tempDir, err := os.MkdirTemp("", "compute-text-allowed-domains-test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + workflowContent := `--- +on: + issues: + types: [opened] + bots: ["dependabot[bot]"] +engine: copilot +strict: false +network: + allowed: + - cnn.com +safe-outputs: + create-issue: + allowed-domains: + - bbc.com +--- + +# Test Workflow + +Use the incoming text: "${{ steps.sanitized.outputs.text }}" +` + + workflowPath := filepath.Join(tempDir, "compute-text-allowed-domains.md") + if err := os.WriteFile(workflowPath, []byte(workflowContent), 0644); err != nil { + t.Fatalf("Failed to write workflow file: %v", err) + } + + compiler := NewCompiler() + if err := compiler.CompileWorkflow(workflowPath); err != nil { + t.Fatalf("Failed to compile workflow: %v", err) + } + + lockPath := stringutil.MarkdownToLockFile(workflowPath) + lockContent, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("Failed to read compiled workflow: %v", err) + } + + lockStr := string(lockContent) + sanitizedIdx := strings.Index(lockStr, "id: sanitized") + if sanitizedIdx == -1 { + t.Fatal("Expected compiled workflow to contain sanitized step") + } + + afterSanitized := lockStr[sanitizedIdx:] + sanitizedBlock, _, found := strings.Cut(afterSanitized, "\n with:\n") + if !found { + t.Fatal("Expected sanitized step to contain a with section") + } + if !strings.Contains(sanitizedBlock, "GH_AW_ALLOWED_BOTS: \"dependabot[bot]\"") { + t.Errorf("Expected sanitized step env to contain GH_AW_ALLOWED_BOTS, got:\n%s", sanitizedBlock) + } + if !strings.Contains(sanitizedBlock, "GH_AW_ALLOWED_DOMAINS:") { + t.Errorf("Expected sanitized step env to contain GH_AW_ALLOWED_DOMAINS, got:\n%s", sanitizedBlock) + } + if !strings.Contains(sanitizedBlock, "cnn.com") { + t.Errorf("Expected sanitized step GH_AW_ALLOWED_DOMAINS to include network.allowed domain cnn.com, got:\n%s", sanitizedBlock) + } + if !strings.Contains(sanitizedBlock, "bbc.com") { + t.Errorf("Expected sanitized step GH_AW_ALLOWED_DOMAINS to include safe-outputs.allowed-domains domain bbc.com, got:\n%s", sanitizedBlock) + } +} + func TestComputeTextContextBasedInsertion(t *testing.T) { // Create a temporary directory for the test tempDir, err := os.MkdirTemp("", "compute-text-context-test") From 161e22312343d38f8c3bd80f59498435ee532b54 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 18:25:19 +0000 Subject: [PATCH 3/4] test: harden compute_text allowed-domains env coverage Agent-Logs-Url: https://github.com/github/gh-aw/sessions/d87f1617-a210-4ab3-b929-466d293873cb Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../compiler_activation_job_builder.go | 12 +-- pkg/workflow/compute_text_lazy_test.go | 73 +++++++++++++++---- 2 files changed, 67 insertions(+), 18 deletions(-) diff --git a/pkg/workflow/compiler_activation_job_builder.go b/pkg/workflow/compiler_activation_job_builder.go index 17f6b12a6c9..f00e4a3015a 100644 --- a/pkg/workflow/compiler_activation_job_builder.go +++ b/pkg/workflow/compiler_activation_job_builder.go @@ -241,14 +241,16 @@ func (c *Compiler) addActivationRepositoryAndOutputSteps(ctx *activationJobBuild } else { domainsStr = c.computeAllowedDomainsForSanitization(data) } - if len(data.Bots) > 0 || domainsStr != "" { - ctx.steps = append(ctx.steps, " env:\n") - } + var envLines []string if len(data.Bots) > 0 { - ctx.steps = append(ctx.steps, formatYAMLEnv(" ", "GH_AW_ALLOWED_BOTS", strings.Join(data.Bots, ","))) + envLines = append(envLines, formatYAMLEnv(" ", "GH_AW_ALLOWED_BOTS", strings.Join(data.Bots, ","))) } if domainsStr != "" { - ctx.steps = append(ctx.steps, formatYAMLEnv(" ", "GH_AW_ALLOWED_DOMAINS", domainsStr)) + envLines = append(envLines, formatYAMLEnv(" ", "GH_AW_ALLOWED_DOMAINS", domainsStr)) + } + if len(envLines) > 0 { + ctx.steps = append(ctx.steps, " env:\n") + ctx.steps = append(ctx.steps, envLines...) } ctx.steps = append(ctx.steps, " with:\n") ctx.steps = append(ctx.steps, " script: |\n") diff --git a/pkg/workflow/compute_text_lazy_test.go b/pkg/workflow/compute_text_lazy_test.go index 5d5e5860e10..4ccffa89d39 100644 --- a/pkg/workflow/compute_text_lazy_test.go +++ b/pkg/workflow/compute_text_lazy_test.go @@ -459,27 +459,74 @@ Use the incoming text: "${{ steps.sanitized.outputs.text }}" } lockStr := string(lockContent) - sanitizedIdx := strings.Index(lockStr, "id: sanitized") - if sanitizedIdx == -1 { + lines := strings.Split(lockStr, "\n") + sanitizedIndex := -1 + for i, line := range lines { + if strings.TrimSpace(line) == "id: sanitized" { + sanitizedIndex = i + break + } + } + if sanitizedIndex == -1 { t.Fatal("Expected compiled workflow to contain sanitized step") } - afterSanitized := lockStr[sanitizedIdx:] - sanitizedBlock, _, found := strings.Cut(afterSanitized, "\n with:\n") - if !found { + withIndex := -1 + for i := sanitizedIndex + 1; i < len(lines); i++ { + if strings.TrimSpace(lines[i]) == "with:" { + withIndex = i + break + } + } + if withIndex == -1 { t.Fatal("Expected sanitized step to contain a with section") } - if !strings.Contains(sanitizedBlock, "GH_AW_ALLOWED_BOTS: \"dependabot[bot]\"") { - t.Errorf("Expected sanitized step env to contain GH_AW_ALLOWED_BOTS, got:\n%s", sanitizedBlock) + + sanitizedLines := lines[sanitizedIndex+1 : withIndex] + envIndex := -1 + envIndent := 0 + for i, line := range sanitizedLines { + if strings.TrimSpace(line) == "env:" { + envIndex = i + envIndent = len(line) - len(strings.TrimLeft(line, " ")) + break + } + } + if envIndex == -1 { + t.Fatalf("Expected sanitized step to contain an env block, got:\n%s", strings.Join(sanitizedLines, "\n")) + } + + envLines := make([]string, 0, 2) + for i := envIndex + 1; i < len(sanitizedLines); i++ { + line := sanitizedLines[i] + if strings.TrimSpace(line) == "" { + continue + } + indent := len(line) - len(strings.TrimLeft(line, " ")) + if indent <= envIndent { + break + } + envLines = append(envLines, strings.TrimSpace(line)) + } + if len(envLines) == 0 { + t.Fatalf("Expected sanitized step env block to contain variables, got:\n%s", strings.Join(sanitizedLines, "\n")) + } + envBlock := strings.Join(envLines, "\n") + + if !strings.Contains(envBlock, "GH_AW_ALLOWED_BOTS:") { + t.Errorf("Expected sanitized step env to contain GH_AW_ALLOWED_BOTS, got:\n%s", strings.Join(sanitizedLines, "\n")) + } + if !strings.Contains(envBlock, "dependabot[bot]") { + t.Errorf("Expected sanitized step GH_AW_ALLOWED_BOTS value to include dependabot[bot], got:\n%s", strings.Join(sanitizedLines, "\n")) } - if !strings.Contains(sanitizedBlock, "GH_AW_ALLOWED_DOMAINS:") { - t.Errorf("Expected sanitized step env to contain GH_AW_ALLOWED_DOMAINS, got:\n%s", sanitizedBlock) + if !strings.Contains(envBlock, "GH_AW_ALLOWED_DOMAINS:") { + t.Errorf("Expected sanitized step env to contain GH_AW_ALLOWED_DOMAINS, got:\n%s", strings.Join(sanitizedLines, "\n")) } - if !strings.Contains(sanitizedBlock, "cnn.com") { - t.Errorf("Expected sanitized step GH_AW_ALLOWED_DOMAINS to include network.allowed domain cnn.com, got:\n%s", sanitizedBlock) + if !strings.Contains(envBlock, "cnn.com") { + t.Errorf("Expected sanitized step GH_AW_ALLOWED_DOMAINS to include network.allowed domain cnn.com, got:\n%s", strings.Join(sanitizedLines, "\n")) } - if !strings.Contains(sanitizedBlock, "bbc.com") { - t.Errorf("Expected sanitized step GH_AW_ALLOWED_DOMAINS to include safe-outputs.allowed-domains domain bbc.com, got:\n%s", sanitizedBlock) + if !strings.Contains(envBlock, "bbc.com") { + t.Errorf("Expected sanitized step GH_AW_ALLOWED_DOMAINS to include safe-outputs.allowed-domains domain bbc.com, got:\n%s", strings.Join(sanitizedLines, "\n")) } } From 1a2e10c76f66e381d2d8a2381f477fbff03ddca1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 21 Apr 2026 18:33:56 +0000 Subject: [PATCH 4/4] docs(adr): add draft ADR-27639 for unifying activation sanitize allowed-domains Co-Authored-By: Claude Sonnet 4.6 --- ...ify-activation-sanitize-allowed-domains.md | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 docs/adr/27639-unify-activation-sanitize-allowed-domains.md diff --git a/docs/adr/27639-unify-activation-sanitize-allowed-domains.md b/docs/adr/27639-unify-activation-sanitize-allowed-domains.md new file mode 100644 index 00000000000..23c2f5e3551 --- /dev/null +++ b/docs/adr/27639-unify-activation-sanitize-allowed-domains.md @@ -0,0 +1,69 @@ +# ADR-27639: Unify Allowed-Domains Configuration for Activation Input Sanitization + +**Date**: 2026-04-21 +**Status**: Draft +**Deciders**: pelikhan + +--- + +## Part 1 — Narrative (Human-Friendly) + +### Context + +The workflow compiler generates a GitHub Actions job that processes incoming text (issues, PR bodies, comments) before passing it to the agent. This job includes a `sanitized` step (`id: sanitized`) that redacts URLs not in an allowed-domains list, guarding against prompt-injection via untrusted URLs in user-submitted content. However, this step was hard-wired to use only the default domain allow-list, ignoring any domains the workflow author had configured via `network.allowed` and `safe-outputs.allowed-domains`. Output sanitization already consumed those user-configured domains correctly. The asymmetry meant that URLs explicitly permitted by the workflow author were being silently redacted from the agent's input, causing unexpected agent behavior. + +### Decision + +We will pass the same computed allowed-domains value to the `sanitized` (input) step that is already used for output sanitization. When `safe-outputs.allowed-domains` is configured, we use `computeExpandedAllowedDomainsForSanitization`; otherwise we use `computeAllowedDomainsForSanitization`. This brings input-side sanitization into parity with output-side sanitization, honoring the workflow author's intent consistently across both directions. + +### Alternatives Considered + +#### Alternative 1: Remove Input Sanitization from the Activation Step + +Drop URL sanitization from the `sanitized` step entirely and rely solely on output sanitization. This eliminates the inconsistency by having only one sanitization pass. It was rejected because input sanitization provides a meaningful defense-in-depth layer: by stripping untrusted URLs before the agent processes the text, it reduces the attack surface for prompt-injection even if the agent generates output that later passes through output sanitization. + +#### Alternative 2: Introduce a Separate `input-sanitization.allowed-domains` Config Key + +Add a new, explicitly scoped configuration field for input-side sanitization rather than reusing the existing output-side allow-list. This was rejected because it would require workflow authors to duplicate their domain allowances in two places to achieve consistent behavior, increasing cognitive overhead and the risk of accidental divergence with no clear benefit. + +### Consequences + +#### Positive +- Workflow authors' `network.allowed` and `safe-outputs.allowed-domains` settings now apply symmetrically to both input and output, eliminating silent URL redaction from the agent's incoming context. +- No new configuration surface is required; the fix reuses existing compiler helpers already tested for output sanitization. + +#### Negative +- Domains listed in `safe-outputs.allowed-domains` now implicitly affect input sanitization, even though the field's name suggests an output-only concern. This may surprise workflow authors who intended `safe-outputs.allowed-domains` to scope only what the agent is allowed to reference in its outputs. +- The env-var assembly block in `addActivationRepositoryAndOutputSteps` is slightly more complex, accumulating lines conditionally rather than emitting them inline. + +#### Neutral +- The `GH_AW_ALLOWED_BOTS` env var emission is unchanged in behavior; it was refactored into the same slice-accumulation pattern as part of this change for consistency. +- New regression test `TestComputeTextStepIncludesAllowedDomainsEnv` verifies that both `GH_AW_ALLOWED_BOTS` and `GH_AW_ALLOWED_DOMAINS` appear in the compiled `sanitized` step with the expected domain values. + +--- + +## Part 2 — Normative Specification (RFC 2119) + +> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119). + +### Activation Input Sanitization — Allowed-Domains Wiring + +1. The compiled `sanitized` step **MUST** receive the same allowed-domains value that is used for output sanitization in the same compiled workflow. +2. When `safe-outputs.allowed-domains` is non-empty, the compiler **MUST** compute the allowed-domains value using `computeExpandedAllowedDomainsForSanitization`. +3. When `safe-outputs.allowed-domains` is absent or empty, the compiler **MUST** compute the allowed-domains value using `computeAllowedDomainsForSanitization`. +4. If the computed allowed-domains string is non-empty, the compiler **MUST** emit a `GH_AW_ALLOWED_DOMAINS` environment variable in the `sanitized` step's `env` block. +5. The compiler **MUST NOT** emit `GH_AW_ALLOWED_DOMAINS` when the computed allowed-domains string is empty. +6. The compiler **MUST NOT** use a hardcoded or default-only domain list for the `sanitized` step when user-configured domains are present. + +### Activation Input Sanitization — Bot Allowlist + +1. If the workflow's `bots` list is non-empty, the compiler **MUST** emit `GH_AW_ALLOWED_BOTS` in the `sanitized` step's `env` block as a comma-separated string. +2. The `GH_AW_ALLOWED_BOTS` env var **MUST NOT** be emitted when the `bots` list is empty. + +### Conformance + +An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/24739465880) workflow. The PR author must review, complete, and finalize this document before the PR can merge.*