From 2904dae19bb04546f1522351c9d0a87424cb4c03 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 23 Apr 2026 21:17:46 +0000
Subject: [PATCH 1/2] Initial plan


From cf2b4bb6c4cf0edc1cf647e11b5cb1c638175def Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 23 Apr 2026 21:23:44 +0000
Subject: [PATCH 2/2] Add end-to-end install integration test harness

Agent-Logs-Url: https://github.com/githubnext/autoloop/sessions/df881075-c8de-46ce-8314-6945f02f4f05

Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com>
---
 .../workflows/install-integration-test.yml    |  54 +++
 tests/install-integration/README.md           |  67 ++++
 tests/install-integration/prompt.md           |  17 +
 tests/install-integration/run.sh              | 352 ++++++++++++++++++
 tests/install-integration/teardown.sh         |  99 +++++
 tests/install-integration/verify-phase1.sh    |  82 ++++
 tests/install-integration/verify-phase2.sh    | 120 ++++++
 7 files changed, 791 insertions(+)
 create mode 100644 .github/workflows/install-integration-test.yml
 create mode 100644 tests/install-integration/README.md
 create mode 100644 tests/install-integration/prompt.md
 create mode 100755 tests/install-integration/run.sh
 create mode 100755 tests/install-integration/teardown.sh
 create mode 100755 tests/install-integration/verify-phase1.sh
 create mode 100755 tests/install-integration/verify-phase2.sh

diff --git a/.github/workflows/install-integration-test.yml b/.github/workflows/install-integration-test.yml
new file mode 100644
index 0000000..7b2190f
--- /dev/null
+++ b/.github/workflows/install-integration-test.yml
@@ -0,0 +1,54 @@
+name: Install Integration Test
+
+# End-to-end test of install.md against a long-lived target repo.
+# Manual-dispatch only -- this exercises real LLM calls and force-pushes a
+# remote branch, so it must not run on PRs or schedules.
+
+on:
+  workflow_dispatch:
+    inputs:
+      keep_state_on_failure:
+        description: "Leave test repo in failure state for inspection"
+        type: boolean
+        default: false
+      install_test_repo:
+        description: "Target repo for the install (owner/repo)"
+        type: string
+        default: "mrjf/autoloop-test"
+
+jobs:
+  install-integration:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install gh aw extension
+        env:
+          GH_TOKEN: ${{ secrets.INSTALL_TEST_TOKEN }}
+        run: gh extension install github/gh-aw
+
+      - name: Install Copilot CLI
+        env:
+          GH_TOKEN: ${{ secrets.INSTALL_TEST_TOKEN }}
+        # The Copilot CLI is distributed as an npm package. If the install
+        # path changes upstream, update this single step.
+        run: npm install -g @github/copilot
+
+      - name: Verify gh auth
+        env:
+          GH_TOKEN: ${{ secrets.INSTALL_TEST_TOKEN }}
+        run: gh auth status
+
+      - name: Run integration test
+        env:
+          GH_TOKEN: ${{ secrets.INSTALL_TEST_TOKEN }}
+          INSTALL_TEST_REPO: ${{ inputs.install_test_repo }}
+          KEEP_STATE_ON_FAILURE: ${{ inputs.keep_state_on_failure && '1' || '0' }}
+        run: ./tests/install-integration/run.sh
diff --git a/tests/install-integration/README.md b/tests/install-integration/README.md
new file mode 100644
index 0000000..fa81662
--- /dev/null
+++ b/tests/install-integration/README.md
@@ -0,0 +1,67 @@
+# install-integration
+
+End-to-end integration test for [`install.md`](../../install.md). Runs the
+install flow as a real coding agent (Copilot CLI) against a long-lived
+target repo (`mrjf/autoloop-test` by default), then exercises Phase 2 by
+running one iteration each of three programs across the program-source ×
+strategy matrix.
+
+This test is **manual-dispatch only**. It is not part of CI.
+
+## Local mode
+
+```bash
+# from the autoloop repo root:
+./tests/install-integration/run.sh
+```
+
+Requirements:
+
+- `gh` CLI authenticated as a user with write access to the target repo.
+- `copilot` CLI on PATH.
+- `python3` and `git` on PATH.
+
+Optional env / flags:
+
+- `INSTALL_TEST_REPO=<owner>/<repo>` -- override the target (default
+  `mrjf/autoloop-test`).
+- `--keep` (or `KEEP_STATE_ON_FAILURE=1`) -- skip teardown on failure so
+  the failure state can be inspected. Run `teardown.sh <repo> <base-sha>`
+  manually afterwards.
+
+## Actions mode
+
+Trigger the **Install Integration Test** workflow from the Actions tab. It
+runs the same script on a GitHub-hosted runner. Requires the
+`INSTALL_TEST_TOKEN` repo secret -- a PAT with `repo` scope on the target
+repo (the default `GITHUB_TOKEN` has no access to repos outside the host).
+
+## What it tests
+
+See [the issue that introduced this harness](https://github.com/githubnext/autoloop/issues)
+for the full motivation. In short:
+
+- **Phase 1** (file presence + lock idempotency) -- catches regressions in
+  `install.md` and in `gh aw compile`.
+- **Phase 2** (3 programs × 1 iteration each) -- catches regressions in
+  the scheduler, in strategy discovery, and in the iteration loop. The
+  three programs cover:
+
+  | # | Source     | Strategy        |
+  |---|------------|-----------------|
+  | 1 | file-based | OpenEvolve      |
+  | 2 | issue-based| Test-Driven     |
+  | 3 | file-based | plain (default) |
+
+- **Phase 3** (teardown) -- resets the target repo to the captured base
+  SHA, closes test issues/PRs, and deletes test branches.
+
+## Files
+
+| File                  | Purpose                                          |
+|-----------------------|--------------------------------------------------|
+| `run.sh`              | Driver. Orchestrates phases 1-3.                 |
+| `prompt.md`           | Prompt fed to Copilot CLI (edit without touching the driver). |
+| `verify-phase1.sh`    | File-presence + lock-idempotency assertions.     |
+| `verify-phase2.sh`    | Per-program assertions (one call per program).   |
+| `teardown.sh`         | Idempotent cleanup. Safe to re-run.              |
diff --git a/tests/install-integration/prompt.md b/tests/install-integration/prompt.md
new file mode 100644
index 0000000..0a2017e
--- /dev/null
+++ b/tests/install-integration/prompt.md
@@ -0,0 +1,17 @@
+You are installing autoloop into a freshly-reset GitHub repository.
+
+Your working directory is the root of that repository, cloned locally. The
+repository is empty except for the base fixtures in `src/` and `tests/`.
+
+Follow the install instructions at the URL below, EXACTLY AS WRITTEN. Execute
+each step using shell commands. Do not skip steps. Do not improvise. Do not
+optimize or "improve" the instructions.
+
+Stop after Step 5 (the install PR is opened). Do NOT proceed to Step 6
+("Create Your First Program") -- the test harness handles program creation
+itself in a deterministic way.
+
+When you finish: print a single line `INSTALL_PR=<url>` with the URL of the
+PR you opened in step 5. Then stop.
+
+Install instructions: https://github.com/githubnext/autoloop/blob/main/install.md
diff --git a/tests/install-integration/run.sh b/tests/install-integration/run.sh
new file mode 100755
index 0000000..f991085
--- /dev/null
+++ b/tests/install-integration/run.sh
@@ -0,0 +1,352 @@
+#!/usr/bin/env bash
+# run.sh - end-to-end install integration test driver.
+#
+# Runs against $INSTALL_TEST_REPO (default: mrjf/autoloop-test). See the
+# README in this directory for prerequisites and what the test verifies.
+#
+# Flags / env:
+#   --keep                    leave the test repo in failure state for inspection
+#   KEEP_STATE_ON_FAILURE=1   same as --keep (used by the Actions wrapper)
+#   INSTALL_TEST_REPO=...     target repo (default: mrjf/autoloop-test)
+#   AUTOLOOP_REF=main         git ref of install.md to follow (default: main)
+#
+# Exits non-zero on any failed assertion. Cleanup runs in `trap EXIT` so it
+# happens even on abort.
+set -euo pipefail
+
+# --------------------------------------------------------------------------
+# Args / env
+# --------------------------------------------------------------------------
+KEEP="${KEEP_STATE_ON_FAILURE:-0}"
+for arg in "$@"; do
+  case "$arg" in
+    --keep) KEEP=1 ;;
+    -h|--help)
+      sed -n '2,20p' "$0"
+      exit 0
+      ;;
+  esac
+done
+
+INSTALL_TEST_REPO="${INSTALL_TEST_REPO:-mrjf/autoloop-test}"
+AUTOLOOP_REF="${AUTOLOOP_REF:-main}"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+log()   { echo "[run.sh] $*"; }
+hr()    { echo "[run.sh] ----------------------------------------"; }
+
+PASS=0
+EXIT_CODE=1
+BASE_SHA=""
+WORKDIR=""
+
+cleanup() {
+  local rc=$?
+  hr
+  if [ "$PASS" = "1" ]; then
+    log "test PASSED"
+    EXIT_CODE=0
+  else
+    log "test FAILED (exit=$rc)"
+    EXIT_CODE=1
+  fi
+
+  if [ "$KEEP" = "1" ] && [ "$PASS" != "1" ]; then
+    log "KEEP=1 set: skipping teardown so failure state can be inspected"
+    log "remember to run teardown.sh manually:"
+    log "  $SCRIPT_DIR/teardown.sh $INSTALL_TEST_REPO $BASE_SHA"
+  else
+    if [ -n "$BASE_SHA" ]; then
+      log "running teardown..."
+      "$SCRIPT_DIR/teardown.sh" "$INSTALL_TEST_REPO" "$BASE_SHA" || true
+    fi
+  fi
+
+  if [ -n "$WORKDIR" ] && [ -d "$WORKDIR" ]; then
+    rm -rf "$WORKDIR"
+  fi
+
+  exit "$EXIT_CODE"
+}
+trap cleanup EXIT
+
+# --------------------------------------------------------------------------
+# Pre-flight
+# --------------------------------------------------------------------------
+hr
+log "pre-flight"
+command -v gh       >/dev/null || { log "gh CLI not on PATH"; exit 1; }
+command -v copilot  >/dev/null || { log "copilot CLI not on PATH"; exit 1; }
+command -v python3  >/dev/null || { log "python3 not on PATH"; exit 1; }
+command -v git      >/dev/null || { log "git not on PATH"; exit 1; }
+gh auth status >/dev/null 2>&1 || { log "gh is not authenticated"; exit 1; }
+log "tools ok; target repo: $INSTALL_TEST_REPO"
+
+# Detect whether the autoloop source has sync-branches.md so phase-1
+# verification can require its lock file (issue #52 may remove it).
+if [ -f "$REPO_ROOT/workflows/sync-branches.md" ]; then
+  export EXPECT_SYNC_BRANCHES=1
+  log "sync-branches.md present in source repo: phase-1 will require its lock file"
+else
+  export EXPECT_SYNC_BRANCHES=0
+fi
+
+# --------------------------------------------------------------------------
+# Capture base-state SHA and reset target repo to it.
+# --------------------------------------------------------------------------
+hr
+log "capturing base-state SHA on $INSTALL_TEST_REPO@main"
+BASE_SHA="$(gh api "repos/${INSTALL_TEST_REPO}/branches/main" --jq '.commit.sha')"
+log "base SHA: $BASE_SHA"
+
+log "pre-test reset (discards any debris from prior failed runs)"
+"$SCRIPT_DIR/teardown.sh" "$INSTALL_TEST_REPO" "$BASE_SHA"
+
+# --------------------------------------------------------------------------
+# Clone target locally and feed install.md to copilot.
+# --------------------------------------------------------------------------
+WORKDIR="$(mktemp -d -t autoloop-install-int-XXXXXX)"
+CHECKOUT="$WORKDIR/repo"
+log "cloning $INSTALL_TEST_REPO -> $CHECKOUT"
+git clone --quiet "https://github.com/${INSTALL_TEST_REPO}.git" "$CHECKOUT"
+
+hr
+log "running copilot CLI against install.md (this can take several minutes)"
+COPILOT_LOG="$WORKDIR/copilot.log"
+PROMPT="$(cat "$SCRIPT_DIR/prompt.md")"
+(
+  cd "$CHECKOUT"
+  # `--allow-all-tools` lets the agent run the shell commands install.md
+  # tells it to run; without that the test can't actually exercise the flow.
+  copilot --allow-all-tools -p "$PROMPT" 2>&1 | tee "$COPILOT_LOG"
+)
+
+# Extract install PR URL from the agent's output. Tolerate quoting/extra ws.
+INSTALL_PR="$(grep -Eo 'INSTALL_PR=https://github\.com/[^ ]+' "$COPILOT_LOG" \
+  | tail -1 | cut -d= -f2- || true)"
+if [ -z "$INSTALL_PR" ]; then
+  log "could not find INSTALL_PR=... in copilot output"
+  exit 1
+fi
+export INSTALL_PR
+log "install PR: $INSTALL_PR"
+
+# --------------------------------------------------------------------------
+# Phase 1 verification (against the install branch checkout).
+# --------------------------------------------------------------------------
+hr
+log "PHASE 1: verifying install artifacts"
+# Make sure the local checkout is on the install branch the agent used.
+HEAD_REF="$(gh pr view "$INSTALL_PR" --repo "$INSTALL_TEST_REPO" --json headRefName --jq '.headRefName')"
+(
+  cd "$CHECKOUT"
+  git fetch --quiet origin "$HEAD_REF"
+  git checkout --quiet -B "$HEAD_REF" "origin/$HEAD_REF"
+)
+"$SCRIPT_DIR/verify-phase1.sh" "$CHECKOUT"
+
+# --------------------------------------------------------------------------
+# Merge the install PR and wait for it to land.
+# --------------------------------------------------------------------------
+hr
+log "merging install PR via squash"
+gh pr merge "$INSTALL_PR" --repo "$INSTALL_TEST_REPO" --squash --admin --delete-branch
+# Brief wait so subsequent gh api calls see the new main SHA.
+sleep 5
+
+# --------------------------------------------------------------------------
+# Phase 2: create three programs (file/openevolve, issue/test-driven,
+# file/plain) and run one iteration of each, sequentially.
+# --------------------------------------------------------------------------
+hr
+log "PHASE 2: program × strategy matrix"
+
+# Refresh local checkout to merged main.
+(
+  cd "$CHECKOUT"
+  git fetch --quiet origin main
+  git checkout --quiet -B main origin/main
+)
+
+# Helper: commit a file-based program to main and push.
+push_program() {
+  local name="$1"
+  local body_file="$2"
+  (
+    cd "$CHECKOUT"
+    mkdir -p ".autoloop/programs/${name}"
+    cp "$body_file" ".autoloop/programs/${name}/program.md"
+    git add ".autoloop/programs/${name}/program.md"
+    git -c user.email=integration-test@autoloop -c user.name="autoloop integration test" \
+      commit -m "test: add program ${name}" --quiet
+    git push --quiet origin main
+  )
+}
+
+# Helper: trigger one iteration for a program and echo the run id.
+run_iteration() {
+  local name="$1"
+  log "dispatching autoloop.lock.yml for program=$name"
+  # Capture the timestamp just before dispatch so we can find the run we
+  # just kicked off.
+  local before
+  before="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+  gh workflow run autoloop.lock.yml --repo "$INSTALL_TEST_REPO" \
+    -f program="$name" >/dev/null
+
+  # Poll for the run.
+  local run_id="" attempts=0
+  while [ -z "$run_id" ] && [ "$attempts" -lt 30 ]; do
+    sleep 5
+    run_id="$(gh run list --repo "$INSTALL_TEST_REPO" --workflow autoloop.lock.yml \
+      --created ">${before}" --limit 1 --json databaseId --jq '.[0].databaseId' 2>/dev/null || true)"
+    attempts=$((attempts + 1))
+  done
+  [ -n "$run_id" ] || { log "could not find dispatched run for $name"; return 1; }
+  log "run id: $run_id; waiting for completion..."
+  gh run watch "$run_id" --repo "$INSTALL_TEST_REPO" --exit-status >/dev/null 2>&1 \
+    || log "run $run_id finished with non-zero status (will be checked by verify-phase2.sh)"
+  echo "$run_id"
+}
+
+# ---- Program 1: file-based + OpenEvolve ----------------------------------
+PROG1=rastrigin-openevolve
+PROG1_FILE="$WORKDIR/${PROG1}.md"
+cat > "$PROG1_FILE" <<'EOF'
+---
+schedule: every 6h
+---
+
+# Rastrigin (OpenEvolve)
+
+## Goal
+
+Improve `src/minimize.py` to find lower minima of the Rastrigin function more
+reliably. Lower metric is better.
+
+## Target
+
+Only modify these files:
+- `src/minimize.py` -- the minimizer
+
+Do NOT modify:
+- `src/evaluate.py`
+- `tests/test_minimize.py`
+
+## Evaluation
+
+```bash
+python3 src/evaluate.py
+```
+
+The metric is `metric`. **Lower is better.**
+
+## Evolution Strategy
+
+See `strategy/openevolve.md`. Maintain a small population across these islands:
+- `grid-search` (baseline)
+- `scipy-minimize`
+- `gradient-descent`
+EOF
+
+# ---- Program 3: file-based + plain prose (no strategy) -------------------
+# Defined here so we can push both file-based programs in one push later.
+PROG3=rastrigin-plain
+PROG3_FILE="$WORKDIR/${PROG3}.md"
+cat > "$PROG3_FILE" <<'EOF'
+---
+schedule: every 6h
+---
+
+# Rastrigin (Plain)
+
+## Goal
+
+Improve `src/minimize.py` to find lower minima of the Rastrigin function. Lower
+metric is better.
+
+## Target
+
+Only modify these files:
+- `src/minimize.py`
+
+Do NOT modify:
+- `src/evaluate.py`
+- `tests/test_minimize.py`
+
+## Evaluation
+
+```bash
+python3 src/evaluate.py
+```
+
+The metric is `metric`. **Lower is better.**
+EOF
+
+push_program "$PROG1" "$PROG1_FILE"
+push_program "$PROG3" "$PROG3_FILE"
+
+# ---- Program 2: issue-based + Test-Driven --------------------------------
+PROG2=rastrigin-tdd
+log "creating issue-based program $PROG2"
+PROG2_BODY=$(cat <<'EOF'
+<!-- AUTOLOOP:ISSUE-PROGRAM -->
+
+---
+schedule: every 6h
+---
+
+# rastrigin-tdd
+
+## Goal
+
+Cover the public API of `src/minimize.py` with tighter correctness tests.
+Higher passing test count is better.
+
+## Target
+
+Only modify these files:
+- `tests/test_minimize.py`
+
+Do NOT modify:
+- `src/minimize.py`
+- `src/evaluate.py`
+
+## Evaluation
+
+```bash
+python3 -m pytest tests/ -q | tail -1
+```
+
+The metric is `passing_tests`. **Higher is better.**
+
+## Evolution Strategy
+
+See `strategy/test-driven.md`. Maintain a test harness with at least one
+candidate per iteration.
+EOF
+)
+gh issue create --repo "$INSTALL_TEST_REPO" \
+  --title "[Autoloop: ${PROG2}]" \
+  --label "autoloop-program" \
+  --body "$PROG2_BODY" >/dev/null
+log "issue created for $PROG2"
+
+# ---- Run the three programs sequentially ---------------------------------
+RUN1="$(run_iteration "$PROG1")"
+"$SCRIPT_DIR/verify-phase2.sh" "$INSTALL_TEST_REPO" "$PROG1" "$RUN1" "openevolve"
+
+RUN2="$(run_iteration "$PROG2")"
+"$SCRIPT_DIR/verify-phase2.sh" "$INSTALL_TEST_REPO" "$PROG2" "$RUN2" "test-driven"
+
+RUN3="$(run_iteration "$PROG3")"
+"$SCRIPT_DIR/verify-phase2.sh" "$INSTALL_TEST_REPO" "$PROG3" "$RUN3" "plain"
+
+# --------------------------------------------------------------------------
+# All assertions passed -- mark for the cleanup trap.
+# --------------------------------------------------------------------------
+hr
+log "all phases passed"
+PASS=1
diff --git a/tests/install-integration/teardown.sh b/tests/install-integration/teardown.sh
new file mode 100755
index 0000000..88ac2af
--- /dev/null
+++ b/tests/install-integration/teardown.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+# teardown.sh - reset the integration-test target repo to a known-good base
+# state. Idempotent: safe to re-run.
+#
+# Usage:
+#   teardown.sh <owner/repo> <base-sha>
+#
+# What it does (all against the *remote* repo via gh / git push --force):
+#   1. Force-reset main to <base-sha>.
+#   2. Close all issues labeled `autoloop-program` (also the
+#      `[Autoloop: ...]` status issues).
+#   3. Close all open PRs whose head branch starts with `autoloop/` or
+#      whose head branch is `install-autoloop`.
+#   4. Delete all remote branches matching `autoloop/*`,
+#      `install-autoloop`, and `memory/autoloop`.
+#
+# Requires: gh authenticated with write access to <owner/repo>; git on PATH.
+set -euo pipefail
+
+REPO="${1:?usage: teardown.sh <owner/repo> <base-sha>}"
+BASE_SHA="${2:?usage: teardown.sh <owner/repo> <base-sha>}"
+
+log() { echo "TEARDOWN: $*"; }
+warn() { echo "TEARDOWN WARN: $*" >&2; }
+
+# 1. Force-reset main to base-sha. Use a temp clone to avoid touching the
+#    caller's working dir.
+TMP="$(mktemp -d -t autoloop-teardown-XXXXXX)"
+trap 'rm -rf "$TMP"' EXIT
+
+log "cloning $REPO to reset main -> $BASE_SHA"
+git clone --quiet "https://github.com/${REPO}.git" "$TMP/repo"
+(
+  cd "$TMP/repo"
+  # Only force-push if main is not already at base sha.
+  CURRENT="$(git rev-parse origin/main)"
+  if [ "$CURRENT" != "$BASE_SHA" ]; then
+    log "main is at $CURRENT; resetting to $BASE_SHA"
+    git checkout --quiet -B main "$BASE_SHA"
+    git push --force --quiet origin main
+  else
+    log "main already at $BASE_SHA; no reset needed"
+  fi
+) || warn "main reset failed (continuing)"
+
+# 2. Close `autoloop-program`-labelled issues.
+log "closing autoloop-program issues"
+mapfile -t ISSUES < <(gh issue list --repo "$REPO" --label autoloop-program \
+  --state open --json number --jq '.[].number' 2>/dev/null || true)
+for n in "${ISSUES[@]:-}"; do
+  [ -z "$n" ] && continue
+  gh issue close "$n" --repo "$REPO" --reason "not planned" \
+    --comment "Closed by install-integration test teardown." \
+    >/dev/null 2>&1 || warn "could not close issue #$n"
+  log "closed issue #$n"
+done
+
+# Also catch `[Autoloop: ...]`-titled issues that lost the label or were
+# auto-created without it (defensive).
+mapfile -t TITLED < <(gh issue list --repo "$REPO" --state open --search '[Autoloop:' \
+  --json number,title --jq '.[] | select(.title | startswith("[Autoloop:")) | .number' 2>/dev/null || true)
+for n in "${TITLED[@]:-}"; do
+  [ -z "$n" ] && continue
+  gh issue close "$n" --repo "$REPO" --reason "not planned" \
+    --comment "Closed by install-integration test teardown." \
+    >/dev/null 2>&1 || warn "could not close titled issue #$n"
+  log "closed titled issue #$n"
+done
+
+# 3. Close open PRs from autoloop/* and install-autoloop branches.
+log "closing test PRs"
+mapfile -t PRS < <(gh pr list --repo "$REPO" --state open \
+  --json number,headRefName \
+  --jq '.[] | select(.headRefName | startswith("autoloop/") or . == "install-autoloop") | .number' \
+  2>/dev/null || true)
+for n in "${PRS[@]:-}"; do
+  [ -z "$n" ] && continue
+  gh pr close "$n" --repo "$REPO" --delete-branch \
+    --comment "Closed by install-integration test teardown." \
+    >/dev/null 2>&1 || warn "could not close PR #$n"
+  log "closed PR #$n"
+done
+
+# 4. Delete any remaining branches we created (gh pr close --delete-branch
+#    handles most, but autoloop/* branches without a PR also exist).
+log "deleting test branches"
+mapfile -t BRANCHES < <(gh api "repos/${REPO}/branches" --paginate \
+  --jq '.[].name' 2>/dev/null || true)
+for b in "${BRANCHES[@]:-}"; do
+  case "$b" in
+    autoloop/*|install-autoloop|memory/autoloop)
+      gh api -X DELETE "repos/${REPO}/git/refs/heads/${b}" \
+        >/dev/null 2>&1 || warn "could not delete branch $b"
+      log "deleted branch $b"
+      ;;
+  esac
+done
+
+log "done"
diff --git a/tests/install-integration/verify-phase1.sh b/tests/install-integration/verify-phase1.sh
new file mode 100755
index 0000000..9acba83
--- /dev/null
+++ b/tests/install-integration/verify-phase1.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# verify-phase1.sh - assert install.md produced the expected files and that
+# `gh aw compile autoloop` is idempotent.
+#
+# Usage: verify-phase1.sh <path-to-checkout-of-install-branch>
+#
+# The checkout must be on the install branch (post-`gh aw init`,
+# post-copy-files, post-`gh aw compile autoloop`). All paths are relative
+# to that checkout.
+set -euo pipefail
+
+CHECKOUT="${1:?usage: verify-phase1.sh <checkout-dir>}"
+cd "$CHECKOUT"
+
+fail() { echo "PHASE1 FAIL: $*" >&2; exit 1; }
+ok()   { echo "PHASE1 ok:   $*"; }
+
+require_file() {
+  [ -f "$1" ] || fail "missing file: $1"
+  ok "file exists: $1"
+}
+
+require_dir() {
+  [ -d "$1" ] || fail "missing directory: $1"
+  ok "dir exists:  $1"
+}
+
+# --- gh aw init artifacts -------------------------------------------------
+require_file ".gitattributes"
+
+# --- autoloop workflow files copied from this repo ------------------------
+require_file ".github/workflows/autoloop.md"
+require_dir  ".github/workflows/shared"
+
+# Issue #52: when sync-branches is removed, only autoloop.md should exist.
+# Until then, sync-branches.md must also be present. Detect from the
+# autoloop source repo (cloned in the test driver) and verify accordingly.
+if [ -n "${EXPECT_SYNC_BRANCHES:-}" ] && [ "$EXPECT_SYNC_BRANCHES" = "1" ]; then
+  require_file ".github/workflows/sync-branches.md"
+  require_file ".github/workflows/sync-branches.lock.yml"
+fi
+
+# --- compiled lock file ---------------------------------------------------
+require_file ".github/workflows/autoloop.lock.yml"
+
+# --- issue template -------------------------------------------------------
+require_file ".github/ISSUE_TEMPLATE/autoloop-program.md"
+
+# --- programs directory present (may be empty) ----------------------------
+require_dir ".autoloop/programs"
+
+# --- lock idempotency: re-running compile must not change the lock file --
+LOCK=".github/workflows/autoloop.lock.yml"
+sha256() { shasum -a 256 "$1" | awk '{print $1}'; }
+SHA_BEFORE="$(sha256 "$LOCK")"
+ok "lock sha256 before: $SHA_BEFORE"
+
+# Re-run the compiler. If it fails or changes the lock, that's a phase-1
+# failure (install.md said this command is the way to compile, so it must
+# be idempotent).
+gh aw compile autoloop >/dev/null
+SHA_AFTER="$(sha256 "$LOCK")"
+ok "lock sha256 after:  $SHA_AFTER"
+
+if [ "$SHA_BEFORE" != "$SHA_AFTER" ]; then
+  echo "--- diff ---" >&2
+  git --no-pager diff -- "$LOCK" >&2 || true
+  fail "gh aw compile is non-idempotent: lock file changed on second run"
+fi
+ok "lock file is idempotent"
+
+# --- install PR exists ----------------------------------------------------
+# The driver passes the PR URL via INSTALL_PR. Sanity-check it points at the
+# target repo and a real PR number.
+if [ -n "${INSTALL_PR:-}" ]; then
+  if ! [[ "$INSTALL_PR" =~ ^https://github\.com/[^/]+/[^/]+/pull/[0-9]+$ ]]; then
+    fail "INSTALL_PR is not a well-formed PR URL: $INSTALL_PR"
+  fi
+  ok "install PR URL looks valid: $INSTALL_PR"
+fi
+
+echo "PHASE1 PASS"
diff --git a/tests/install-integration/verify-phase2.sh b/tests/install-integration/verify-phase2.sh
new file mode 100755
index 0000000..071609c
--- /dev/null
+++ b/tests/install-integration/verify-phase2.sh
@@ -0,0 +1,120 @@
+#!/usr/bin/env bash
+# verify-phase2.sh - per-program assertions for one Phase-2 program run.
+#
+# Usage:
+#   verify-phase2.sh <owner/repo> <program-name> <run-id> <strategy>
+#
+# <strategy> is one of: openevolve | test-driven | plain
+#
+# Asserts:
+#   1. Workflow run completed with conclusion `success` (the `agent` job exited
+#      success, regardless of accept/reject of the iteration).
+#   2. A program issue exists for <program-name>.
+#   3. The status comment (<!-- AUTOLOOP:STATUS -->) is present on the issue.
+#   4. State file `<program-name>.md` exists on the `memory/autoloop` branch.
+#   5. Branch `autoloop/<program-name>` exists OR the iteration was rejected.
+#   6. Strategy-specific subsection in the state file:
+#        openevolve  -> contains `## 🧬 Population`
+#        test-driven -> contains `## ✅ Test Harness`
+#        plain       -> contains `## 📊 Iteration History`
+#                        and does NOT contain Population or Test Harness
+#                        (negative assertion).
+set -euo pipefail
+
+REPO="${1:?usage: verify-phase2.sh <owner/repo> <program-name> <run-id> <strategy>}"
+PROGRAM="${2:?missing <program-name>}"
+RUN_ID="${3:?missing <run-id>}"
+STRATEGY="${4:?missing <strategy>}"
+
+fail() { echo "PHASE2 [$PROGRAM] FAIL: $*" >&2; exit 1; }
+ok()   { echo "PHASE2 [$PROGRAM] ok:   $*"; }
+
+# 1. Workflow run conclusion.
+CONCLUSION="$(gh run view "$RUN_ID" --repo "$REPO" --json conclusion --jq '.conclusion' 2>/dev/null || echo "")"
+if [ "$CONCLUSION" != "success" ]; then
+  fail "workflow run $RUN_ID conclusion=$CONCLUSION (want: success)"
+fi
+ok "workflow run $RUN_ID conclusion=success"
+
+# 2. Program issue exists. For issue-based programs the issue was created by
+#    the test driver before the run; for file-based programs the iteration
+#    auto-creates one. Either way, search by title.
+ISSUE_NUMBER="$(gh issue list --repo "$REPO" --state all --search "[Autoloop: $PROGRAM]" \
+  --json number,title \
+  --jq ".[] | select(.title == \"[Autoloop: $PROGRAM]\") | .number" \
+  2>/dev/null | head -1)"
+if [ -z "$ISSUE_NUMBER" ]; then
+  fail "no program issue titled [Autoloop: $PROGRAM] found"
+fi
+ok "program issue: #$ISSUE_NUMBER"
+
+# 3. Status comment present.
+STATUS_HIT="$(gh issue view "$ISSUE_NUMBER" --repo "$REPO" --comments \
+  --json comments --jq '.comments[].body' 2>/dev/null \
+  | grep -c '<!-- AUTOLOOP:STATUS -->' || true)"
+if [ "${STATUS_HIT:-0}" -lt 1 ]; then
+  fail "no <!-- AUTOLOOP:STATUS --> comment on issue #$ISSUE_NUMBER"
+fi
+ok "status comment present on #$ISSUE_NUMBER"
+
+# 4. State file on memory/autoloop branch.
+STATE_FILE="${PROGRAM}.md"
+STATE_BODY="$(gh api "repos/${REPO}/contents/${STATE_FILE}?ref=memory/autoloop" \
+  --jq '.content' 2>/dev/null | base64 --decode 2>/dev/null || true)"
+if [ -z "$STATE_BODY" ]; then
+  fail "state file ${STATE_FILE} missing on memory/autoloop branch"
+fi
+ok "state file present: memory/autoloop:${STATE_FILE}"
+
+if ! echo "$STATE_BODY" | grep -q 'Machine State'; then
+  fail "state file has no 'Machine State' table"
+fi
+ok "state file has Machine State table"
+
+# 5. autoloop/<program-name> branch exists OR iteration was rejected.
+BRANCH_OK=0
+if gh api "repos/${REPO}/branches/autoloop/${PROGRAM}" >/dev/null 2>&1; then
+  BRANCH_OK=1
+  ok "branch autoloop/${PROGRAM} exists"
+else
+  # Acceptable only if the iteration was rejected. Look for the marker in
+  # the latest per-iteration comment on the issue.
+  if gh issue view "$ISSUE_NUMBER" --repo "$REPO" --comments \
+       --json comments --jq '.comments[].body' 2>/dev/null \
+       | grep -qiE 'reject|rejected'; then
+    ok "no autoloop branch, but iteration was rejected (acceptable)"
+    BRANCH_OK=1
+  fi
+fi
+[ "$BRANCH_OK" = "1" ] || fail "no autoloop/${PROGRAM} branch and no rejection marker"
+
+# 6. Strategy-specific subsection.
+case "$STRATEGY" in
+  openevolve)
+    echo "$STATE_BODY" | grep -q '## 🧬 Population' \
+      || fail "openevolve: state file missing '## 🧬 Population'"
+    ok "state file has '## 🧬 Population'"
+    ;;
+  test-driven)
+    echo "$STATE_BODY" | grep -q '## ✅ Test Harness' \
+      || fail "test-driven: state file missing '## ✅ Test Harness'"
+    ok "state file has '## ✅ Test Harness'"
+    ;;
+  plain)
+    echo "$STATE_BODY" | grep -q '## 📊 Iteration History' \
+      || fail "plain: state file missing '## 📊 Iteration History'"
+    ok "state file has '## 📊 Iteration History'"
+    if echo "$STATE_BODY" | grep -q '## 🧬 Population'; then
+      fail "plain: state file unexpectedly contains '## 🧬 Population' (strategy bleed)"
+    fi
+    if echo "$STATE_BODY" | grep -q '## ✅ Test Harness'; then
+      fail "plain: state file unexpectedly contains '## ✅ Test Harness' (strategy bleed)"
+    fi
+    ok "no strategy-section bleed into plain program"
+    ;;
+  *)
+    fail "unknown strategy: $STRATEGY"
+    ;;
+esac
+
+echo "PHASE2 [$PROGRAM] PASS"