Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ai-moderator.lock.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 66 additions & 2 deletions actions/setup/sh/setup_cache_memory_git.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,18 @@
# This script is run AFTER the cache is restored and BEFORE the agent executes.
# It ensures the cache directory contains a git repository with integrity branches
# and checks out the correct branch for the current run's integrity level.
# After git setup it applies pre-agent security sanitization: strips execute bits from
# all working-tree files, and removes files with disallowed extensions when
# GH_AW_ALLOWED_EXTENSIONS is set.
#
# Required environment variables:
# GH_AW_CACHE_DIR: Path to the cache-memory directory (e.g. /tmp/gh-aw/cache-memory)
# GH_AW_MIN_INTEGRITY: Integrity level for this run (merged|approved|unapproved|none)
# GH_AW_CACHE_DIR: Path to the cache-memory directory (e.g. /tmp/gh-aw/cache-memory)
# GH_AW_MIN_INTEGRITY: Integrity level for this run (merged|approved|unapproved|none)
#
# Optional environment variables:
# GH_AW_ALLOWED_EXTENSIONS: Colon-separated list of allowed file extensions for pre-agent
# sanitization (e.g. .json:.md:.txt). When set, any restored file
# whose extension is not in this list is removed before the agent runs.

set -euo pipefail

Expand Down Expand Up @@ -101,3 +109,59 @@ for level in "${LEVELS[@]}"; do
done

echo "Cache memory git setup complete (integrity: $INTEGRITY)"

# --- Security: pre-agent working-tree sanitization ---
# 1. Delete all working-tree symlinks so that a prior run cannot plant links to files
# outside the cache (e.g. secrets) that would bypass the regular-file checks below.
find . -not -path './.git/*' -type l -delete 2>/dev/null || true
echo "Pre-agent sanitization: deleted all working-tree symlinks"

# 2. Strip execute bits from all working-tree files so that a prior run cannot plant
# executable scripts (e.g. helper.sh) that the agent or runner could invoke before
# any validation gate fires.
find . -not -path './.git/*' -type f -exec chmod a-x {} + 2>/dev/null || true
echo "Pre-agent sanitization: stripped execute permissions from all working-tree files"

# 3. If GH_AW_ALLOWED_EXTENSIONS is set (colon-separated, e.g. .json:.md:.txt), remove
# any restored file whose extension is not in the allowed list. This ensures the agent
# never encounters unexpected file types planted by a prior compromised run.
if [ -n "${GH_AW_ALLOWED_EXTENSIONS:-}" ]; then
echo "Pre-agent sanitization: enforcing allowed extensions: ${GH_AW_ALLOWED_EXTENSIONS}"
# Build a normalized (lowercase, whitespace-trimmed) allowed list for case-insensitive
# comparison. Pre-computing this once avoids re-parsing it for every file.
_normalized_allowed=""
IFS=: read -ra _raw_exts <<< "$GH_AW_ALLOWED_EXTENSIONS"
for _e in "${_raw_exts[@]}"; do
# Trim all whitespace and convert to lowercase
_e="$(printf '%s' "$_e" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]')"
if [ -n "$_e" ]; then
_normalized_allowed="${_normalized_allowed}${_e}:"
fi
done
removed=0
# Use NUL-delimited output so filenames containing newlines are handled correctly.
while IFS= read -r -d '' file; do
filename="$(basename "$file")"
# Extract the last dot-prefixed segment as the extension, or empty if no dot.
# Normalize to lowercase for case-insensitive comparison against the allowed list.
case "$filename" in
*.*) ext=".$(printf '%s' "${filename##*.}" | tr '[:upper:]' '[:lower:]')" ;;
*) ext="" ;;
esac
# Check whether this extension appears in the normalized allowed list
found=0
IFS=: read -ra _ALLOWED_EXTS <<< "${_normalized_allowed%:}"
for _a in "${_ALLOWED_EXTS[@]}"; do
if [ "$ext" = "$_a" ]; then
found=1
break
fi
done
if [ "$found" -eq 0 ]; then
echo "Removing disallowed file: $file (extension: '${ext:-none}')"
rm -f "$file"
removed=$((removed + 1))
fi
done < <(find . -not -path './.git/*' -type f -print0)
echo "Pre-agent sanitization complete: removed ${removed} file(s) with disallowed extensions"
fi
204 changes: 204 additions & 0 deletions actions/setup/sh/setup_cache_memory_git_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#!/usr/bin/env bash
# Tests for setup_cache_memory_git.sh — pre-agent sanitization block
# Run: bash setup_cache_memory_git_test.sh

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SCRIPT="${SCRIPT_DIR}/setup_cache_memory_git.sh"

# Test counters
TESTS_PASSED=0
TESTS_FAILED=0

# Temporary workspace for all tests
WORKSPACE=$(mktemp -d)

cleanup() {
rm -rf "${WORKSPACE}"
}
trap cleanup EXIT

# Helper: assert a condition
assert() {
local name="$1"
local condition="$2"
if eval "${condition}" 2>/dev/null; then
echo " ✓ ${name}"
TESTS_PASSED=$((TESTS_PASSED + 1))
else
echo " ✗ ${name}"
TESTS_FAILED=$((TESTS_FAILED + 1))
fi
}

# Helper: create a fresh git cache dir with the given files already committed.
# Usage: make_cache_dir <dir> [<file> ...]
# Files are created and committed to the 'none' branch (the lowest-trust default).
make_cache_dir() {
local dir="$1"
shift
mkdir -p "${dir}"
pushd "${dir}" >/dev/null
git init -b merged -q
git config user.email "test@example.com"
git config user.name "test"
git config core.hooksPath /dev/null
git commit --allow-empty -m "initial" -q
for level in approved unapproved none; do
git branch "${level}" 2>/dev/null || true
done
git checkout -q none
for f in "$@"; do
mkdir -p "$(dirname "${f}")"
echo "content" > "${f}"
done
git add -A
git commit --allow-empty -m "test-files" -q
popd >/dev/null
}

# Run the script, capturing stdout and ignoring the exit code.
run_script() {
local dir="$1"
local integrity="${2:-none}"
local allowed_exts="${3:-}"
GH_AW_CACHE_DIR="${dir}" \
GH_AW_MIN_INTEGRITY="${integrity}" \
GH_AW_ALLOWED_EXTENSIONS="${allowed_exts}" \
bash "${SCRIPT}" 2>&1 || true
}

echo "Testing setup_cache_memory_git.sh — pre-agent sanitization"
echo ""

# ── Test 1: Execute bits are stripped from restored files ────────────────────
echo "Test 1: Execute bits are stripped unconditionally"
D="${WORKSPACE}/test1"
make_cache_dir "${D}" "script.sh" "data.json"
# Make files executable before the script runs
chmod +x "${D}/script.sh" "${D}/data.json"
run_script "${D}" none >/dev/null
assert "script.sh is not executable" "[ ! -x '${D}/script.sh' ]"
assert "data.json is not executable" "[ ! -x '${D}/data.json' ]"
assert "script.sh still exists" "[ -f '${D}/script.sh' ]"
assert "data.json still exists" "[ -f '${D}/data.json' ]"
echo ""

# ── Test 2: .git directory files are NOT touched (sanity check) ──────────────
echo "Test 2: .git directory is not affected by chmod"
D="${WORKSPACE}/test2"
make_cache_dir "${D}" "file.txt"
HOOK_FILE="${D}/.git/hooks/pre-commit"
echo "#!/bin/bash" > "${HOOK_FILE}"
chmod +x "${HOOK_FILE}"
run_script "${D}" none >/dev/null
# The hook file cleanup happens earlier in the script but the .git dir itself is
# excluded from find. Verify find exclusion by checking the .git dir is intact.
assert ".git directory still exists" "[ -d '${D}/.git' ]"
echo ""

# ── Test 3: No extension filter — all files kept when GH_AW_ALLOWED_EXTENSIONS is empty ─
echo "Test 3: No extension filter when GH_AW_ALLOWED_EXTENSIONS is unset"
D="${WORKSPACE}/test3"
make_cache_dir "${D}" "file.json" "file.md" "helper.sh" "binary"
run_script "${D}" none ""
assert "file.json kept" "[ -f '${D}/file.json' ]"
assert "file.md kept" "[ -f '${D}/file.md' ]"
assert "helper.sh kept" "[ -f '${D}/helper.sh' ]"
assert "binary kept" "[ -f '${D}/binary' ]"
echo ""

# ── Test 4: Extension filter removes disallowed files ────────────────────────
echo "Test 4: Extension filter removes disallowed file types"
D="${WORKSPACE}/test4"
make_cache_dir "${D}" "data.json" "notes.md" "helper.sh" "archive.zip"
run_script "${D}" none ".json:.md"
assert "data.json kept" "[ -f '${D}/data.json' ]"
assert "notes.md kept" "[ -f '${D}/notes.md' ]"
assert "helper.sh removed" "[ ! -f '${D}/helper.sh' ]"
assert "archive.zip removed" "[ ! -f '${D}/archive.zip' ]"
echo ""

# ── Test 5: Extension filter removes files without any extension ─────────────
echo "Test 5: Extension filter removes files with no extension"
D="${WORKSPACE}/test5"
make_cache_dir "${D}" "data.json" "noext"
run_script "${D}" none ".json"
assert "data.json kept" "[ -f '${D}/data.json' ]"
assert "noext removed" "[ ! -f '${D}/noext' ]"
echo ""

# ── Test 6: Extension filter with single extension ───────────────────────────
echo "Test 6: Extension filter with a single allowed extension"
D="${WORKSPACE}/test6"
make_cache_dir "${D}" "report.json" "notes.txt" "image.png"
run_script "${D}" none ".json"
assert "report.json kept" "[ -f '${D}/report.json' ]"
assert "notes.txt removed" "[ ! -f '${D}/notes.txt' ]"
assert "image.png removed" "[ ! -f '${D}/image.png' ]"
echo ""

# ── Test 7: Execute bits stripped AND disallowed files removed together ───────
echo "Test 7: Execute-bit stripping and extension filtering both apply"
D="${WORKSPACE}/test7"
make_cache_dir "${D}" "keep.json" "drop.sh"
chmod +x "${D}/keep.json" "${D}/drop.sh"
run_script "${D}" none ".json"
assert "keep.json exists" "[ -f '${D}/keep.json' ]"
assert "keep.json not executable" "[ ! -x '${D}/keep.json' ]"
assert "drop.sh removed" "[ ! -f '${D}/drop.sh' ]"
echo ""

# ── Test 8: Extension matching is case-insensitive ───────────────────────────
echo "Test 8: Extension matching is case-insensitive"
D="${WORKSPACE}/test8"
make_cache_dir "${D}" "data.json" "data.JSON" "notes.MD"
# Allow list uses lowercase; both .json and .JSON files, and .MD files, should be kept
run_script "${D}" none ".json:.md"
assert "data.json kept (exact match)" "[ -f '${D}/data.json' ]"
assert "data.JSON kept (uppercase file)" "[ -f '${D}/data.JSON' ]"
assert "notes.MD kept (uppercase file)" "[ -f '${D}/notes.MD' ]"
echo ""

# ── Test 9: Whitespace in GH_AW_ALLOWED_EXTENSIONS is trimmed ────────────────
echo "Test 9: Whitespace in allowed extensions list is trimmed"
D="${WORKSPACE}/test9"
make_cache_dir "${D}" "data.json" "note.md" "drop.sh"
# Extensions with leading/trailing spaces should still match
run_script "${D}" none " .json : .md "
assert "data.json kept (trimmed .json)" "[ -f '${D}/data.json' ]"
assert "note.md kept (trimmed .md)" "[ -f '${D}/note.md' ]"
assert "drop.sh removed" "[ ! -f '${D}/drop.sh' ]"
echo ""

# ── Test 10: Symlinks are deleted unconditionally ────────────────────────────
echo "Test 10: Symlinks in working tree are deleted"
D="${WORKSPACE}/test10"
make_cache_dir "${D}" "real.json"
# Plant a symlink (simulating a compromised prior run)
ln -s /etc/passwd "${D}/evil-link"
assert "symlink exists before script" "[ -L '${D}/evil-link' ]"
run_script "${D}" none >/dev/null
assert "symlink removed by script" "[ ! -L '${D}/evil-link' ]"
assert "real file still exists" "[ -f '${D}/real.json' ]"
echo ""

# ── Test 11: Files with spaces in name are handled correctly ─────────────────
echo "Test 11: Files with spaces in names are handled correctly"
D="${WORKSPACE}/test11"
make_cache_dir "${D}" "my data.json" "my script.sh"
run_script "${D}" none ".json"
assert "file with space and .json kept" "[ -f '${D}/my data.json' ]"
assert "file with space and .sh removed" "[ ! -f '${D}/my script.sh' ]"
echo ""

# ── Summary ──────────────────────────────────────────────────────────────────
echo "Tests passed: ${TESTS_PASSED}"
echo "Tests failed: ${TESTS_FAILED}"

if [ "${TESTS_FAILED}" -gt 0 ]; then
exit 1
fi

echo "✓ All tests passed!"
Loading