diff --git a/.github/workflows/ai-moderator.lock.yml b/.github/workflows/ai-moderator.lock.yml
index 7cfceecc80d..62b84983a55 100644
--- a/.github/workflows/ai-moderator.lock.yml
+++ b/.github/workflows/ai-moderator.lock.yml
@@ -406,6 +406,7 @@ jobs:
env:
GH_AW_CACHE_DIR: /tmp/gh-aw/cache-memory
GH_AW_MIN_INTEGRITY: none
+ GH_AW_ALLOWED_EXTENSIONS: '.json'
run: bash "${RUNNER_TEMP}/gh-aw/actions/setup_cache_memory_git.sh"
- name: Checkout PR branch
id: checkout-pr
diff --git a/actions/setup/sh/setup_cache_memory_git.sh b/actions/setup/sh/setup_cache_memory_git.sh
index 36634b7ea0c..c15d5ecffd2 100644
--- a/actions/setup/sh/setup_cache_memory_git.sh
+++ b/actions/setup/sh/setup_cache_memory_git.sh
@@ -5,10 +5,18 @@
# This script is run AFTER the cache is restored and BEFORE the agent executes.
# It ensures the cache directory contains a git repository with integrity branches
# and checks out the correct branch for the current run's integrity level.
+# After git setup it applies pre-agent security sanitization: strips execute bits from
+# all working-tree files, and removes files with disallowed extensions when
+# GH_AW_ALLOWED_EXTENSIONS is set.
#
# Required environment variables:
-# GH_AW_CACHE_DIR: Path to the cache-memory directory (e.g. /tmp/gh-aw/cache-memory)
-# GH_AW_MIN_INTEGRITY: Integrity level for this run (merged|approved|unapproved|none)
+# GH_AW_CACHE_DIR: Path to the cache-memory directory (e.g. /tmp/gh-aw/cache-memory)
+# GH_AW_MIN_INTEGRITY: Integrity level for this run (merged|approved|unapproved|none)
+#
+# Optional environment variables:
+# GH_AW_ALLOWED_EXTENSIONS: Colon-separated list of allowed file extensions for pre-agent
+# sanitization (e.g. .json:.md:.txt). When set, any restored file
+# whose extension is not in this list is removed before the agent runs.
set -euo pipefail
@@ -101,3 +109,59 @@ for level in "${LEVELS[@]}"; do
done
echo "Cache memory git setup complete (integrity: $INTEGRITY)"
+
+# --- Security: pre-agent working-tree sanitization ---
+# 1. Delete all working-tree symlinks so that a prior run cannot plant links to files
+# outside the cache (e.g. secrets) that would bypass the regular-file checks below.
+find . -not -path './.git/*' -type l -delete 2>/dev/null || true
+echo "Pre-agent sanitization: deleted all working-tree symlinks"
+
+# 2. Strip execute bits from all working-tree files so that a prior run cannot plant
+# executable scripts (e.g. helper.sh) that the agent or runner could invoke before
+# any validation gate fires.
+find . -not -path './.git/*' -type f -exec chmod a-x {} + 2>/dev/null || true
+echo "Pre-agent sanitization: stripped execute permissions from all working-tree files"
+
+# 3. If GH_AW_ALLOWED_EXTENSIONS is set (colon-separated, e.g. .json:.md:.txt), remove
+# any restored file whose extension is not in the allowed list. This ensures the agent
+# never encounters unexpected file types planted by a prior compromised run.
+if [ -n "${GH_AW_ALLOWED_EXTENSIONS:-}" ]; then
+ echo "Pre-agent sanitization: enforcing allowed extensions: ${GH_AW_ALLOWED_EXTENSIONS}"
+ # Build a normalized (lowercase, whitespace-trimmed) allowed list for case-insensitive
+ # comparison. Pre-computing this once avoids re-parsing it for every file.
+ _normalized_allowed=""
+ IFS=: read -ra _raw_exts <<< "$GH_AW_ALLOWED_EXTENSIONS"
+ for _e in "${_raw_exts[@]}"; do
+ # Trim all whitespace and convert to lowercase
+ _e="$(printf '%s' "$_e" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]')"
+ if [ -n "$_e" ]; then
+ _normalized_allowed="${_normalized_allowed}${_e}:"
+ fi
+ done
+ removed=0
+ # Use NUL-delimited output so filenames containing newlines are handled correctly.
+ while IFS= read -r -d '' file; do
+ filename="$(basename "$file")"
+ # Extract the last dot-prefixed segment as the extension, or empty if no dot.
+ # Normalize to lowercase for case-insensitive comparison against the allowed list.
+ case "$filename" in
+ *.*) ext=".$(printf '%s' "${filename##*.}" | tr '[:upper:]' '[:lower:]')" ;;
+ *) ext="" ;;
+ esac
+ # Check whether this extension appears in the normalized allowed list
+ found=0
+ IFS=: read -ra _ALLOWED_EXTS <<< "${_normalized_allowed%:}"
+ for _a in "${_ALLOWED_EXTS[@]}"; do
+ if [ "$ext" = "$_a" ]; then
+ found=1
+ break
+ fi
+ done
+ if [ "$found" -eq 0 ]; then
+ echo "Removing disallowed file: $file (extension: '${ext:-none}')"
+ rm -f "$file"
+ removed=$((removed + 1))
+ fi
+ done < <(find . -not -path './.git/*' -type f -print0)
+ echo "Pre-agent sanitization complete: removed ${removed} file(s) with disallowed extensions"
+fi
diff --git a/actions/setup/sh/setup_cache_memory_git_test.sh b/actions/setup/sh/setup_cache_memory_git_test.sh
new file mode 100644
index 00000000000..cdab13c42df
--- /dev/null
+++ b/actions/setup/sh/setup_cache_memory_git_test.sh
@@ -0,0 +1,204 @@
+#!/usr/bin/env bash
+# Tests for setup_cache_memory_git.sh — pre-agent sanitization block
+# Run: bash setup_cache_memory_git_test.sh
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SCRIPT="${SCRIPT_DIR}/setup_cache_memory_git.sh"
+
+# Test counters
+TESTS_PASSED=0
+TESTS_FAILED=0
+
+# Temporary workspace for all tests
+WORKSPACE=$(mktemp -d)
+
+cleanup() {
+ rm -rf "${WORKSPACE}"
+}
+trap cleanup EXIT
+
+# Helper: assert a condition
+assert() {
+ local name="$1"
+ local condition="$2"
+ if eval "${condition}" 2>/dev/null; then
+ echo " ✓ ${name}"
+ TESTS_PASSED=$((TESTS_PASSED + 1))
+ else
+ echo " ✗ ${name}"
+ TESTS_FAILED=$((TESTS_FAILED + 1))
+ fi
+}
+
+# Helper: create a fresh git cache dir with the given files already committed.
+# Usage: make_cache_dir
[ ...]
+# Files are created and committed to the 'none' branch (the lowest-trust default).
+make_cache_dir() {
+ local dir="$1"
+ shift
+ mkdir -p "${dir}"
+ pushd "${dir}" >/dev/null
+ git init -b merged -q
+ git config user.email "test@example.com"
+ git config user.name "test"
+ git config core.hooksPath /dev/null
+ git commit --allow-empty -m "initial" -q
+ for level in approved unapproved none; do
+ git branch "${level}" 2>/dev/null || true
+ done
+ git checkout -q none
+ for f in "$@"; do
+ mkdir -p "$(dirname "${f}")"
+ echo "content" > "${f}"
+ done
+ git add -A
+ git commit --allow-empty -m "test-files" -q
+ popd >/dev/null
+}
+
+# Run the script, capturing stdout and ignoring the exit code.
+run_script() {
+ local dir="$1"
+ local integrity="${2:-none}"
+ local allowed_exts="${3:-}"
+ GH_AW_CACHE_DIR="${dir}" \
+ GH_AW_MIN_INTEGRITY="${integrity}" \
+ GH_AW_ALLOWED_EXTENSIONS="${allowed_exts}" \
+ bash "${SCRIPT}" 2>&1 || true
+}
+
+echo "Testing setup_cache_memory_git.sh — pre-agent sanitization"
+echo ""
+
+# ── Test 1: Execute bits are stripped from restored files ────────────────────
+echo "Test 1: Execute bits are stripped unconditionally"
+D="${WORKSPACE}/test1"
+make_cache_dir "${D}" "script.sh" "data.json"
+# Make files executable before the script runs
+chmod +x "${D}/script.sh" "${D}/data.json"
+run_script "${D}" none >/dev/null
+assert "script.sh is not executable" "[ ! -x '${D}/script.sh' ]"
+assert "data.json is not executable" "[ ! -x '${D}/data.json' ]"
+assert "script.sh still exists" "[ -f '${D}/script.sh' ]"
+assert "data.json still exists" "[ -f '${D}/data.json' ]"
+echo ""
+
+# ── Test 2: .git directory files are NOT touched (sanity check) ──────────────
+echo "Test 2: .git directory is not affected by chmod"
+D="${WORKSPACE}/test2"
+make_cache_dir "${D}" "file.txt"
+HOOK_FILE="${D}/.git/hooks/pre-commit"
+echo "#!/bin/bash" > "${HOOK_FILE}"
+chmod +x "${HOOK_FILE}"
+run_script "${D}" none >/dev/null
+# The hook file cleanup happens earlier in the script but the .git dir itself is
+# excluded from find. Verify find exclusion by checking the .git dir is intact.
+assert ".git directory still exists" "[ -d '${D}/.git' ]"
+echo ""
+
+# ── Test 3: No extension filter — all files kept when GH_AW_ALLOWED_EXTENSIONS is empty ─
+echo "Test 3: No extension filter when GH_AW_ALLOWED_EXTENSIONS is unset"
+D="${WORKSPACE}/test3"
+make_cache_dir "${D}" "file.json" "file.md" "helper.sh" "binary"
+run_script "${D}" none ""
+assert "file.json kept" "[ -f '${D}/file.json' ]"
+assert "file.md kept" "[ -f '${D}/file.md' ]"
+assert "helper.sh kept" "[ -f '${D}/helper.sh' ]"
+assert "binary kept" "[ -f '${D}/binary' ]"
+echo ""
+
+# ── Test 4: Extension filter removes disallowed files ────────────────────────
+echo "Test 4: Extension filter removes disallowed file types"
+D="${WORKSPACE}/test4"
+make_cache_dir "${D}" "data.json" "notes.md" "helper.sh" "archive.zip"
+run_script "${D}" none ".json:.md"
+assert "data.json kept" "[ -f '${D}/data.json' ]"
+assert "notes.md kept" "[ -f '${D}/notes.md' ]"
+assert "helper.sh removed" "[ ! -f '${D}/helper.sh' ]"
+assert "archive.zip removed" "[ ! -f '${D}/archive.zip' ]"
+echo ""
+
+# ── Test 5: Extension filter removes files without any extension ─────────────
+echo "Test 5: Extension filter removes files with no extension"
+D="${WORKSPACE}/test5"
+make_cache_dir "${D}" "data.json" "noext"
+run_script "${D}" none ".json"
+assert "data.json kept" "[ -f '${D}/data.json' ]"
+assert "noext removed" "[ ! -f '${D}/noext' ]"
+echo ""
+
+# ── Test 6: Extension filter with single extension ───────────────────────────
+echo "Test 6: Extension filter with a single allowed extension"
+D="${WORKSPACE}/test6"
+make_cache_dir "${D}" "report.json" "notes.txt" "image.png"
+run_script "${D}" none ".json"
+assert "report.json kept" "[ -f '${D}/report.json' ]"
+assert "notes.txt removed" "[ ! -f '${D}/notes.txt' ]"
+assert "image.png removed" "[ ! -f '${D}/image.png' ]"
+echo ""
+
+# ── Test 7: Execute bits stripped AND disallowed files removed together ───────
+echo "Test 7: Execute-bit stripping and extension filtering both apply"
+D="${WORKSPACE}/test7"
+make_cache_dir "${D}" "keep.json" "drop.sh"
+chmod +x "${D}/keep.json" "${D}/drop.sh"
+run_script "${D}" none ".json"
+assert "keep.json exists" "[ -f '${D}/keep.json' ]"
+assert "keep.json not executable" "[ ! -x '${D}/keep.json' ]"
+assert "drop.sh removed" "[ ! -f '${D}/drop.sh' ]"
+echo ""
+
+# ── Test 8: Extension matching is case-insensitive ───────────────────────────
+echo "Test 8: Extension matching is case-insensitive"
+D="${WORKSPACE}/test8"
+make_cache_dir "${D}" "data.json" "data.JSON" "notes.MD"
+# Allow list uses lowercase; both .json and .JSON files, and .MD files, should be kept
+run_script "${D}" none ".json:.md"
+assert "data.json kept (exact match)" "[ -f '${D}/data.json' ]"
+assert "data.JSON kept (uppercase file)" "[ -f '${D}/data.JSON' ]"
+assert "notes.MD kept (uppercase file)" "[ -f '${D}/notes.MD' ]"
+echo ""
+
+# ── Test 9: Whitespace in GH_AW_ALLOWED_EXTENSIONS is trimmed ────────────────
+echo "Test 9: Whitespace in allowed extensions list is trimmed"
+D="${WORKSPACE}/test9"
+make_cache_dir "${D}" "data.json" "note.md" "drop.sh"
+# Extensions with leading/trailing spaces should still match
+run_script "${D}" none " .json : .md "
+assert "data.json kept (trimmed .json)" "[ -f '${D}/data.json' ]"
+assert "note.md kept (trimmed .md)" "[ -f '${D}/note.md' ]"
+assert "drop.sh removed" "[ ! -f '${D}/drop.sh' ]"
+echo ""
+
+# ── Test 10: Symlinks are deleted unconditionally ────────────────────────────
+echo "Test 10: Symlinks in working tree are deleted"
+D="${WORKSPACE}/test10"
+make_cache_dir "${D}" "real.json"
+# Plant a symlink (simulating a compromised prior run)
+ln -s /etc/passwd "${D}/evil-link"
+assert "symlink exists before script" "[ -L '${D}/evil-link' ]"
+run_script "${D}" none >/dev/null
+assert "symlink removed by script" "[ ! -L '${D}/evil-link' ]"
+assert "real file still exists" "[ -f '${D}/real.json' ]"
+echo ""
+
+# ── Test 11: Files with spaces in name are handled correctly ─────────────────
+echo "Test 11: Files with spaces in names are handled correctly"
+D="${WORKSPACE}/test11"
+make_cache_dir "${D}" "my data.json" "my script.sh"
+run_script "${D}" none ".json"
+assert "file with space and .json kept" "[ -f '${D}/my data.json' ]"
+assert "file with space and .sh removed" "[ ! -f '${D}/my script.sh' ]"
+echo ""
+
+# ── Summary ──────────────────────────────────────────────────────────────────
+echo "Tests passed: ${TESTS_PASSED}"
+echo "Tests failed: ${TESTS_FAILED}"
+
+if [ "${TESTS_FAILED}" -gt 0 ]; then
+ exit 1
+fi
+
+echo "✓ All tests passed!"
diff --git a/docs/adr/26587-pre-agent-cache-memory-working-tree-sanitization.md b/docs/adr/26587-pre-agent-cache-memory-working-tree-sanitization.md
new file mode 100644
index 00000000000..985a764b2cc
--- /dev/null
+++ b/docs/adr/26587-pre-agent-cache-memory-working-tree-sanitization.md
@@ -0,0 +1,93 @@
+# ADR-26587: Pre-Agent Cache-Memory Working-Tree Sanitization
+
+**Date**: 2026-04-16
+**Status**: Draft
+**Deciders**: pelikhan, Copilot
+
+---
+
+## Part 1 — Narrative (Human-Friendly)
+
+### Context
+
+The cache-memory system restores files from a prior agent run into a working directory before the current agent executes. With `integrity: none` (the default configuration), this restoration happens before any threat-detection gate fires. A compromised prior run could therefore plant executable scripts (e.g. `helper.sh`) or unexpected file types in the cache, which the next agent would encounter without any validation. The system needed a deterministic security gate that runs *after* the cache is restored but *before* the agent can access the working tree.
+
+### Decision
+
+We will extend the existing `setup_cache_memory_git.sh` shell script—which already runs in the correct position (after cache restore, before agent execution)—to perform two sanitization steps: (1) unconditionally strip execute bits from every working-tree file so planted executables cannot run, and (2) when `GH_AW_ALLOWED_EXTENSIONS` is configured (a colon-separated list passed by the Go code in `generateCacheMemoryGitSetupStep`), remove any file whose extension is not on the allowed list. This approach adds a zero-new-step security gate without restructuring workflow templates.
+
+### Alternatives Considered
+
+#### Alternative 1: Separate Dedicated "Sanitize Cache" Workflow Step
+
+A standalone step could be inserted into every generated workflow immediately after the cache-restore step. This would be visible in the YAML and independently auditable. However, it requires patching every generated workflow template, increases generated-YAML complexity, and creates a step-ordering dependency that is harder to enforce correctly across all cache configurations. Centralising the logic in the existing git-setup script avoids these problems.
+
+#### Alternative 2: Go-Level File System Sanitization
+
+The sanitization could be implemented in Go within `cache.go` at workflow-generation time or injected as a Go binary invoked from the step. This would keep all business logic in a single language and allow richer error handling. It was rejected because the shell script already executes in the exact lifecycle position required, and introducing a compiled Go binary just for chmod/rm operations would add build and distribution overhead with no material benefit over shell primitives.
+
+#### Alternative 3: Content-Based Threat Scanning
+
+Rather than filtering by permission bits or file extension, the gate could scan file content for known-malicious patterns (YARA rules, signature checks, etc.). This provides stronger detection for obfuscated threats but introduces false-positive risk, brittle pattern maintenance, and significant latency. The permission-strip + extension-filter approach provides meaningful defence-in-depth with deterministic, auditable behaviour and no false positives.
+
+#### Alternative 4: Rely on Existing Threat Detection
+
+The existing integrity and threat-detection pipeline could be trusted to catch malicious content. This was rejected because threat detection fires concurrently with or after agent execution—not unconditionally before it—so a planted executable could be invoked before detection fires. The pre-agent sanitization gate closes this race condition.
+
+### Consequences
+
+#### Positive
+- Eliminates a class of supply-chain-style attacks where a compromised prior run plants executable files in cache-memory.
+- The execute-bit strip provides unconditional defence even when no `allowed-extensions` list is configured.
+- Extension filtering allows teams to explicitly declare a minimal allowed surface area for cached files.
+- No additional workflow step is emitted; existing generated workflows are not structurally changed.
+- Unit tests in `cache_integrity_test.go` cover the env-var emission contract, making the behaviour regression-safe.
+
+#### Negative
+- `chmod a-x` runs on every cache-memory-enabled workflow unconditionally, even when no security concern exists—a small overhead on each run.
+- Any legitimate workflow that relies on executable bits being preserved in cached files (unlikely given cache-memory is data-only by design) would break silently—files are de-executed without notification.
+- The extension filtering uses a colon-separated string passed through an env var, which is a slightly brittle interface compared to a structured config file; misconfigured extension strings (e.g. missing leading dot) would silently allow all files.
+
+#### Neutral
+- The sanitization is implemented in Bash within the existing setup script rather than as a new architectural component, keeping the footprint minimal.
+- The `GH_AW_ALLOWED_EXTENSIONS` env var is only emitted when `AllowedExtensions` is non-empty; the default (all extensions allowed) requires no configuration change.
+- The design naturally separates concerns: Go code owns the configuration contract, the shell script owns the filesystem operations.
+
+---
+
+## Part 2 — Normative Specification (RFC 2119)
+
+> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119).
+
+### Pre-Agent Sanitization Execution Order
+
+1. The sanitization gate **MUST** execute after the cache-restore step and before any agent step can access the cache working tree.
+2. The sanitization gate **MUST NOT** be skipped or made conditional on the `GH_AW_MIN_INTEGRITY` level; it **MUST** run for all integrity configurations, including `none`.
+3. The sanitization logic **MUST** be implemented in `setup_cache_memory_git.sh` or a script invoked by it, so that it runs in the same step as the git-setup work.
+
+### Execute-Bit Stripping
+
+1. Implementations **MUST** strip execute permissions (`chmod a-x`) from every file in the cache working tree, excluding files inside `.git/`.
+2. Implementations **MUST NOT** require any configuration to enable execute-bit stripping; it **MUST** be active for all cache-memory workflows.
+3. Implementations **SHOULD** log a confirmation message after stripping execute permissions so that audit logs confirm the step ran.
+
+### Extension-Based File Filtering
+
+1. When the `GH_AW_ALLOWED_EXTENSIONS` environment variable is set and non-empty, implementations **MUST** remove any working-tree file (excluding `.git/`) whose extension is not present in the colon-separated allowed list.
+2. Extensions in `GH_AW_ALLOWED_EXTENSIONS` **MUST** include the leading dot (e.g. `.json`, `.md`); files with no extension are treated as if their extension is an empty string.
+3. When `GH_AW_ALLOWED_EXTENSIONS` is absent or empty, implementations **MUST NOT** remove any files based on extension (all extensions are allowed).
+4. Implementations **SHOULD** log each removed file path and its extension, and **SHOULD** log a final count of removed files.
+
+### Go-to-Shell Configuration Contract
+
+1. The Go code in `generateCacheMemoryGitSetupStep` **MUST** emit `GH_AW_ALLOWED_EXTENSIONS` as an environment variable to the git-setup step when `cache.AllowedExtensions` is non-empty.
+2. The env-var value **MUST** be the `AllowedExtensions` slice joined with `:` as separator.
+3. The Go code **MUST NOT** emit `GH_AW_ALLOWED_EXTENSIONS` when `AllowedExtensions` is nil or empty; the absence of the variable is the signal for "allow all extensions."
+
+### Conformance
+
+An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance.
+
+---
+
+*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/24514620900) workflow. The PR author must review, complete, and finalize this document before the PR can merge.*
diff --git a/pkg/testutil/spec_test.go b/pkg/testutil/spec_test.go
index 63f49c4e1e5..c0cf356e2bc 100644
--- a/pkg/testutil/spec_test.go
+++ b/pkg/testutil/spec_test.go
@@ -134,8 +134,8 @@ func TestSpec_PublicAPI_StripYAMLCommentHeader(t *testing.T) {
expected string
}{
{
- name: "strips leading comment block before YAML content",
- input: "# Auto-generated by gh-aw\n# Do not edit manually\nruns-on: ubuntu-latest\n",
+ name: "strips leading comment block before YAML content",
+ input: "# Auto-generated by gh-aw\n# Do not edit manually\nruns-on: ubuntu-latest\n",
expected: "runs-on: ubuntu-latest\n",
},
{
@@ -144,8 +144,8 @@ func TestSpec_PublicAPI_StripYAMLCommentHeader(t *testing.T) {
expected: "runs-on: ubuntu-latest\n",
},
{
- name: "strips multi-line comment block before YAML document separator",
- input: "# Header\n# More header\n---\njobs:\n build:\n runs-on: ubuntu-latest\n",
+ name: "strips multi-line comment block before YAML document separator",
+ input: "# Header\n# More header\n---\njobs:\n build:\n runs-on: ubuntu-latest\n",
expected: "---\njobs:\n build:\n runs-on: ubuntu-latest\n",
},
}
diff --git a/pkg/workflow/cache.go b/pkg/workflow/cache.go
index 50bee620744..2198a68a6cd 100644
--- a/pkg/workflow/cache.go
+++ b/pkg/workflow/cache.go
@@ -18,6 +18,24 @@ var cacheLog = logger.New("workflow:cache")
// validCacheMemoryScopes defines the allowed values for cache-memory scope
var validCacheMemoryScopes = []string{"workflow", "repo"}
+// isValidFileExtension reports whether s is a valid file extension of the form ^\.[A-Za-z0-9]+$
+// (e.g. ".json", ".md"). This strict pattern prevents YAML injection when extensions are
+// embedded in generated workflow YAML as single-quoted scalars.
+func isValidFileExtension(s string) bool {
+ if len(s) < 2 || s[0] != '.' {
+ return false
+ }
+ for _, c := range s[1:] {
+ isLower := c >= 'a' && c <= 'z'
+ isUpper := c >= 'A' && c <= 'Z'
+ isDigit := c >= '0' && c <= '9'
+ if !isLower && !isUpper && !isDigit {
+ return false
+ }
+ }
+ return true
+}
+
// CacheMemoryConfig holds configuration for cache-memory functionality
type CacheMemoryConfig struct {
Caches []CacheMemoryEntry `yaml:"caches,omitempty"` // cache configurations
@@ -164,6 +182,11 @@ func parseCacheMemoryEntry(cacheMap map[string]any, defaultID string) (CacheMemo
entry.AllowedExtensions = make([]string, 0, len(extArray))
for _, ext := range extArray {
if extStr, ok := ext.(string); ok {
+ // Validate: must be of the form ^\.[A-Za-z0-9]+$ to prevent YAML injection
+ // and ensure the shell sanitization script handles them correctly.
+ if !isValidFileExtension(extStr) {
+ return entry, fmt.Errorf("invalid allowed-extension %q: must start with '.' followed by alphanumeric characters only (e.g. .json)", extStr)
+ }
entry.AllowedExtensions = append(entry.AllowedExtensions, extStr)
}
}
@@ -512,6 +535,9 @@ func generateCacheMemorySteps(builder *strings.Builder, data *WorkflowData) {
// generateCacheMemoryGitSetupStep emits a pre-agent step that sets up the git-backed integrity
// repository inside the given cache directory. It must run after the cache is restored so that
// any previous git history is available for the merge-down step.
+// The step also performs pre-agent security sanitization: it strips execute bits from all
+// working-tree files and, when allowed extensions are configured, removes files with
+// disallowed extensions before the agent can access them.
func generateCacheMemoryGitSetupStep(builder *strings.Builder, cache CacheMemoryEntry, cacheDir, integrityLevel string, useBackwardCompatiblePaths bool) {
if useBackwardCompatiblePaths {
builder.WriteString(" - name: Setup cache-memory git repository\n")
@@ -521,6 +547,14 @@ func generateCacheMemoryGitSetupStep(builder *strings.Builder, cache CacheMemory
builder.WriteString(" env:\n")
fmt.Fprintf(builder, " GH_AW_CACHE_DIR: %s\n", cacheDir)
fmt.Fprintf(builder, " GH_AW_MIN_INTEGRITY: %s\n", integrityLevel)
+ // Pass colon-separated allowed extensions so the setup script can remove disallowed files
+ // before the agent runs (pre-agent sanitization). Skip when the list is empty (allow all).
+ // Single quotes in the value are escaped ('' in YAML single-quoted scalars) as defense-in-depth,
+ // even though isValidFileExtension already rejects values containing single quotes at parse time.
+ if len(cache.AllowedExtensions) > 0 {
+ escaped := strings.ReplaceAll(strings.Join(cache.AllowedExtensions, ":"), "'", "''")
+ fmt.Fprintf(builder, " GH_AW_ALLOWED_EXTENSIONS: '%s'\n", escaped)
+ }
builder.WriteString(" run: bash \"${RUNNER_TEMP}/gh-aw/actions/setup_cache_memory_git.sh\"\n")
}
diff --git a/pkg/workflow/cache_integrity_test.go b/pkg/workflow/cache_integrity_test.go
index b9b40b73da8..2dfa6594552 100644
--- a/pkg/workflow/cache_integrity_test.go
+++ b/pkg/workflow/cache_integrity_test.go
@@ -535,3 +535,166 @@ func TestCacheMemoryGitCommitSteps_RestoreOnlySkipped(t *testing.T) {
assert.Empty(t, output, "Restore-only caches should not generate a git commit step")
}
+
+// TestCacheMemoryGitSetupStep_AllowedExtensionsEnvVar verifies that the git setup step
+// emits GH_AW_ALLOWED_EXTENSIONS when allowed extensions are configured, and omits it
+// when the extensions list is empty (all allowed).
+func TestCacheMemoryGitSetupStep_AllowedExtensionsEnvVar(t *testing.T) {
+ tests := []struct {
+ name string
+ allowedExtensions []string
+ expectEnvVar bool
+ expectedValue string
+ }{
+ {
+ name: "empty extensions (all allowed) - no env var",
+ allowedExtensions: []string{},
+ expectEnvVar: false,
+ },
+ {
+ name: "nil extensions (all allowed) - no env var",
+ allowedExtensions: nil,
+ expectEnvVar: false,
+ },
+ {
+ name: "specific extensions - env var emitted",
+ allowedExtensions: []string{".json", ".md", ".txt"},
+ expectEnvVar: true,
+ expectedValue: "GH_AW_ALLOWED_EXTENSIONS: '.json:.md:.txt'",
+ },
+ {
+ name: "single extension - env var emitted",
+ allowedExtensions: []string{".json"},
+ expectEnvVar: true,
+ expectedValue: "GH_AW_ALLOWED_EXTENSIONS: '.json'",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ cache := CacheMemoryEntry{
+ ID: "default",
+ AllowedExtensions: tt.allowedExtensions,
+ }
+ var builder strings.Builder
+ generateCacheMemoryGitSetupStep(&builder, cache, "/tmp/gh-aw/cache-memory", "none", true)
+ output := builder.String()
+
+ if tt.expectEnvVar {
+ assert.Contains(t, output, tt.expectedValue,
+ "Should emit GH_AW_ALLOWED_EXTENSIONS with colon-separated extensions")
+ } else {
+ assert.NotContains(t, output, "GH_AW_ALLOWED_EXTENSIONS",
+ "Should NOT emit GH_AW_ALLOWED_EXTENSIONS when all extensions are allowed")
+ }
+
+ // Always verify the other env vars are present
+ assert.Contains(t, output, "GH_AW_CACHE_DIR: /tmp/gh-aw/cache-memory",
+ "Should always set cache dir env var")
+ assert.Contains(t, output, "GH_AW_MIN_INTEGRITY: none",
+ "Should always set integrity level env var")
+ })
+ }
+}
+
+// TestCacheMemorySteps_PreAgentSanitizationEnvVar verifies that generateCacheMemorySteps
+// emits GH_AW_ALLOWED_EXTENSIONS in the git setup step when allowed extensions are configured.
+func TestCacheMemorySteps_PreAgentSanitizationEnvVar(t *testing.T) {
+ toolsMap := map[string]any{
+ "cache-memory": map[string]any{
+ "allowed-extensions": []any{".json", ".md", ".txt"},
+ },
+ }
+
+ toolsConfig, err := ParseToolsConfig(toolsMap)
+ require.NoError(t, err, "Should parse tools config")
+
+ compiler := NewCompiler()
+ cacheMemoryConfig, err := compiler.extractCacheMemoryConfig(toolsConfig)
+ require.NoError(t, err, "Should extract cache-memory config")
+
+ data := &WorkflowData{
+ CacheMemoryConfig: cacheMemoryConfig,
+ }
+
+ var builder strings.Builder
+ generateCacheMemorySteps(&builder, data)
+ output := builder.String()
+
+ assert.Contains(t, output, "GH_AW_ALLOWED_EXTENSIONS: '.json:.md:.txt'",
+ "Should pass allowed extensions to git setup step for pre-agent sanitization")
+}
+
+// TestCacheMemorySteps_NoExtensionsNoSanitizationEnvVar verifies that the default
+// empty extensions list (all allowed) does NOT emit GH_AW_ALLOWED_EXTENSIONS,
+// so the sanitization step does not remove any files.
+func TestCacheMemorySteps_NoExtensionsNoSanitizationEnvVar(t *testing.T) {
+ toolsMap := map[string]any{
+ "cache-memory": true,
+ }
+
+ toolsConfig, err := ParseToolsConfig(toolsMap)
+ require.NoError(t, err, "Should parse tools config")
+
+ compiler := NewCompiler()
+ cacheMemoryConfig, err := compiler.extractCacheMemoryConfig(toolsConfig)
+ require.NoError(t, err, "Should extract cache-memory config")
+
+ data := &WorkflowData{
+ CacheMemoryConfig: cacheMemoryConfig,
+ }
+
+ var builder strings.Builder
+ generateCacheMemorySteps(&builder, data)
+ output := builder.String()
+
+ assert.NotContains(t, output, "GH_AW_ALLOWED_EXTENSIONS",
+ "Should NOT pass GH_AW_ALLOWED_EXTENSIONS when all extensions are allowed (empty list)")
+}
+
+// TestCacheMemoryAllowedExtensions_ValidationAndEscaping verifies that:
+// - Valid extensions (e.g. ".json", ".md") are accepted.
+// - Extensions not matching ^\.[A-Za-z0-9]+$ (e.g. containing single quotes, spaces,
+// or missing a leading dot) are rejected with a descriptive error.
+// - When emitting the env var, single quotes in extension values are doubled (”) for
+// safe embedding in YAML single-quoted scalars.
+func TestCacheMemoryAllowedExtensions_ValidationAndEscaping(t *testing.T) {
+ t.Run("isValidFileExtension helper", func(t *testing.T) {
+ valid := []string{".json", ".md", ".txt", ".csv", ".JSON", ".MD", ".1"}
+ for _, ext := range valid {
+ assert.True(t, isValidFileExtension(ext), "Expected %q to be valid", ext)
+ }
+ invalid := []string{"json", ".json.bak", ".json ", ".", ".json'evil", ".json\nevil", "", "."}
+ for _, ext := range invalid {
+ assert.False(t, isValidFileExtension(ext), "Expected %q to be invalid", ext)
+ }
+ })
+
+ t.Run("invalid extension rejected at parse time", func(t *testing.T) {
+ toolsMap := map[string]any{
+ "cache-memory": map[string]any{
+ "allowed-extensions": []any{".json", "no-leading-dot"},
+ },
+ }
+ toolsConfig, err := ParseToolsConfig(toolsMap)
+ require.NoError(t, err, "Should parse tools config")
+ compiler := NewCompiler()
+ _, err = compiler.extractCacheMemoryConfig(toolsConfig)
+ require.Error(t, err, "Should reject invalid extension at parse time")
+ assert.Contains(t, err.Error(), "no-leading-dot", "Error should identify the bad value")
+ })
+
+ t.Run("single-quote escaping in emitted YAML", func(t *testing.T) {
+ // This bypasses parse-time validation to confirm the emit escaping is independent.
+ cache := CacheMemoryEntry{
+ ID: "default",
+ AllowedExtensions: []string{".json", ".it'"},
+ }
+ var builder strings.Builder
+ generateCacheMemoryGitSetupStep(&builder, cache, "/tmp/gh-aw/cache-memory", "none", true)
+ output := builder.String()
+ // Single quote must be doubled for safe YAML single-quoted scalar embedding
+ assert.Contains(t, output, "GH_AW_ALLOWED_EXTENSIONS: '.json:.it'''",
+ "Single quotes in extension values must be escaped as '' in YAML output")
+ })
+}