From 035423c0f4fc119dc2754aa5c8ee7e28463deffa Mon Sep 17 00:00:00 2001
From: z23cc <admin@z23.cc>
Date: Tue, 7 Apr 2026 17:17:45 +0800
Subject: [PATCH 1/7] feat(ci): add shellcheck, cargo-audit, JSON validation,
 and setup-hooks script [fn-140.2]

Add CI pipeline hardening:
- shellcheck step for scripts/*.sh, scripts/hooks/*.sh, flowctl/install.sh
- cargo-audit step for dependency vulnerability scanning
- JSON validation for hooks/hooks.json and .claude-plugin/plugin.json
- .shellcheckrc with shell=bash default
- scripts/setup-hooks.sh for idempotent pre-commit hook symlink setup
- Expanded CI path triggers to include scripts/ and hooks/ directories

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/ci.yml | 21 +++++++++++++++++++++
 .shellcheckrc            |  1 +
 scripts/setup-hooks.sh   | 20 ++++++++++++++++++++
 3 files changed, 42 insertions(+)
 create mode 100644 .shellcheckrc
 create mode 100755 scripts/setup-hooks.sh

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a0becaa3..e9083a54 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,11 +5,15 @@ on:
     branches: [main]
     paths:
       - "flowctl/**"
+      - "scripts/**"
+      - "hooks/**"
       - ".github/workflows/ci.yml"
   pull_request:
     branches: [main]
     paths:
       - "flowctl/**"
+      - "scripts/**"
+      - "hooks/**"
       - ".github/workflows/ci.yml"
 
 env:
@@ -25,6 +29,20 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
+      - name: Install shellcheck
+        working-directory: .
+        run: sudo apt-get install -y shellcheck
+
+      - name: Lint shell scripts
+        working-directory: .
+        run: shellcheck scripts/*.sh scripts/hooks/*.sh flowctl/install.sh
+
+      - name: Validate JSON configs
+        working-directory: .
+        run: |
+          python3 -c "import json; json.load(open('hooks/hooks.json'))"
+          python3 -c "import json; json.load(open('.claude-plugin/plugin.json'))"
+
       - name: Install Rust toolchain
         uses: dtolnay/rust-toolchain@stable
         with:
@@ -53,6 +71,9 @@ jobs:
       - name: Clippy
         run: cargo clippy --all-targets -- -D warnings
 
+      - name: Audit dependencies
+        run: cargo install cargo-audit && cargo audit
+
   coverage:
     name: Test Coverage
     runs-on: ubuntu-latest
diff --git a/.shellcheckrc b/.shellcheckrc
new file mode 100644
index 00000000..9822e6c4
--- /dev/null
+++ b/.shellcheckrc
@@ -0,0 +1 @@
+shell=bash
diff --git a/scripts/setup-hooks.sh b/scripts/setup-hooks.sh
new file mode 100755
index 00000000..1d6d5c5e
--- /dev/null
+++ b/scripts/setup-hooks.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# setup-hooks.sh — Symlink pre-commit hook into .git/hooks/
+#
+# Idempotent: safe to run multiple times.
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+
+HOOK_SRC="$ROOT/scripts/pre-commit.sh"
+HOOK_DST="$ROOT/.git/hooks/pre-commit"
+
+if [ -L "$HOOK_DST" ] && [ "$(readlink "$HOOK_DST")" = "$HOOK_SRC" ]; then
+  echo "pre-commit hook already installed."
+  exit 0
+fi
+
+mkdir -p "$ROOT/.git/hooks"
+ln -sf "$HOOK_SRC" "$HOOK_DST"
+echo "pre-commit hook installed: $HOOK_DST -> $HOOK_SRC"

From ad32ef1e3571f79ca3567a435793f66e7a7f77bf Mon Sep 17 00:00:00 2001
From: z23cc <admin@z23.cc>
Date: Tue, 7 Apr 2026 17:18:04 +0800
Subject: [PATCH 2/7] fix(docs): align documentation with current codebase
 state [fn-140.1]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- CLAUDE.md: update crate count to 4 (add flowctl-service), fix skill count to 8+22
- README.md: bump version badge 0.1.27 → 0.1.31, fix broken CHANGELOG.md links → releases page
- docs/skills.md: add 6 missing extension skills, update count 16 → 22
- flowctl/README.md: add flowctl-service crate to architecture section
- Delete dead flowctl/tests/integration/compare_outputs.sh (Python flowctl removed in 577e9c7)
- Rename parity_test.rs → integration_test.rs (no longer parity tests)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                                     |   4 +-
 README.md                                     |   4 +-
 docs/skills.md                                |   8 +-
 flowctl/README.md                             |   5 +-
 .../{parity_test.rs => integration_test.rs}   |   0
 flowctl/tests/integration/compare_outputs.sh  | 565 ------------------
 6 files changed, 14 insertions(+), 572 deletions(-)
 rename flowctl/crates/flowctl-cli/tests/{parity_test.rs => integration_test.rs} (100%)
 delete mode 100755 flowctl/tests/integration/compare_outputs.sh

diff --git a/CLAUDE.md b/CLAUDE.md
index 9123de5a..90c716d2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -13,12 +13,12 @@ commands/flow-code/*.md  → Slash command definitions (user-invocable entry poi
 skills/*/SKILL.md        → Skill implementations (loaded by Skill tool, never Read directly)
 agents/*.md              → Subagent definitions (research scouts, worker, plan-sync, etc.)
 bin/flowctl               → Rust binary (built from flowctl/ workspace)
-flowctl/                  → Rust Cargo workspace (3 crates: core, db, cli)
+flowctl/                  → Rust Cargo workspace (4 crates: core, db, service, cli)
 hooks/hooks.json         → Ralph workflow guards (active when FLOW_RALPH=1)
 docs/                    → Architecture docs, CI examples
 ```
 
-**Skills**: 8 core + 16 extensions. See `docs/skills.md` for the full classification. Core workflow: plan → plan-review → work → impl-review → epic-review.
+**Skills**: 8 core + 22 extensions. See `docs/skills.md` for the full classification. Core workflow: plan → plan-review → work → impl-review → epic-review.
 
 **Key invariant**: The `bin/flowctl` Rust binary is the single source of truth for `.flow/` state. Always invoke as:
 ```bash
diff --git a/README.md b/README.md
index b79691ef..e3cd16ea 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,9 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](../../LICENSE)
 [![Claude Code](https://img.shields.io/badge/Claude_Code-Plugin-blueviolet)](https://claude.ai/code)
 
-[![Version](https://img.shields.io/badge/Version-0.1.27-green)](../../CHANGELOG.md)
+[![Version](https://img.shields.io/badge/Version-0.1.31-green)](https://github.com/z23cc/flow-code/releases)
 
-[![Status](https://img.shields.io/badge/Status-Active_Development-brightgreen)](../../CHANGELOG.md)
+[![Status](https://img.shields.io/badge/Status-Active_Development-brightgreen)](https://github.com/z23cc/flow-code/releases)
 
 **A production-grade harness for Claude Code. Full-auto development from idea to PR.**
 
diff --git a/docs/skills.md b/docs/skills.md
index 8e1a411c..d980d769 100644
--- a/docs/skills.md
+++ b/docs/skills.md
@@ -17,7 +17,7 @@ These skills form the primary plan-execute-review workflow. They ship with the p
 | `flow-code-setup` | `/flow-code:setup` | Install flowctl CLI and configure project |
 | `flow-code-map` | `/flow-code:map` | Generate codebase architecture maps |
 
-## Extension Skills (16)
+## Extension Skills (22)
 
 Optional capabilities that extend the core workflow. Install as needed.
 
@@ -29,6 +29,9 @@ Optional capabilities that extend the core workflow. Install as needed.
 | `flow-code-auto-improve` | `/flow-code:auto-improve` | Autonomous code quality improvement loops |
 | `flow-code-django` | `/flow-code:django` | Django-specific patterns, security, and testing |
 | `flow-code-deps` | `/flow-code:deps` | Dependency graph visualization and execution order |
+| `flow-code-api-design` | `/flow-code:api-design` | API design and module boundary review |
+| `flow-code-brainstorm` | `/flow-code:brainstorm` | Explore and pressure-test ideas before planning |
+| `flow-code-performance` | `/flow-code:performance` | Performance investigation, optimization, and benchmarks |
 
 ### Workflow Extensions
 
@@ -50,6 +53,9 @@ Optional capabilities that extend the core workflow. Install as needed.
 | `flow-code-rp-explorer` | `/flow-code:rp-explorer` | RepoPrompt-powered codebase exploration |
 | `flow-code-skill-create` | `/flow-code:skill-create` | Create new flow-code skills |
 | `flow-code-prompt-eng` | Internal | Prompt engineering guidance for review agents |
+| `flow-code-cicd` | `/flow-code:cicd` | CI/CD pipeline setup, quality gates, and deployment automation |
+| `flow-code-context-eng` | `/flow-code:context-eng` | Context window management and optimization |
+| `flow-code-deprecation` | `/flow-code:deprecation` | Feature, API, and module deprecation workflows |
 | `browser` | `/browser` | Browser automation via agent-browser CLI |
 
 ## Recommended Usage Order
diff --git a/flowctl/README.md b/flowctl/README.md
index 0d311cde..a2ffb340 100644
--- a/flowctl/README.md
+++ b/flowctl/README.md
@@ -54,15 +54,16 @@ flowctl tasks -e ep-1
 
 ## Architecture
 
-flowctl is split into three crates:
+flowctl is split into four crates:
 
 ```
 flowctl-core        Core types, ID parsing, state machine, DAG, JSON I/O
 flowctl-db          libSQL storage layer (async, native vector search)
+flowctl-service     Business logic service layer — unifies CLI, daemon, and MCP execution paths
 flowctl-cli         CLI entry point (clap) — the `flowctl` binary
 ```
 
-**Data flow**: CLI parses commands via `clap`, calls into `flowctl-db` for storage, which uses `flowctl-core` types. The DAG module computes task dependencies and execution order.
+**Data flow**: CLI parses commands via `clap`, calls into `flowctl-service` for business logic, which uses `flowctl-db` for storage and `flowctl-core` types. The DAG module computes task dependencies and execution order.
 
 ## Release profile
 
diff --git a/flowctl/crates/flowctl-cli/tests/parity_test.rs b/flowctl/crates/flowctl-cli/tests/integration_test.rs
similarity index 100%
rename from flowctl/crates/flowctl-cli/tests/parity_test.rs
rename to flowctl/crates/flowctl-cli/tests/integration_test.rs
diff --git a/flowctl/tests/integration/compare_outputs.sh b/flowctl/tests/integration/compare_outputs.sh
deleted file mode 100755
index 11c1e948..00000000
--- a/flowctl/tests/integration/compare_outputs.sh
+++ /dev/null
@@ -1,565 +0,0 @@
-#!/usr/bin/env bash
-# compare_outputs.sh — Integration tests comparing Rust and Python flowctl output.
-#
-# Runs both Python ($FLOWCTL) and Rust (cargo-built binary) against identical
-# input and compares JSON output structure, key presence, exit codes.
-#
-# Usage:
-#   bash flowctl/tests/integration/compare_outputs.sh [--verbose]
-#
-# Environment:
-#   FLOWCTL        Path to Python flowctl.py (auto-detected if unset)
-#   RUST_BINARY    Path to Rust flowctl binary (auto-detected if unset)
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
-
-VERBOSE="${1:-}"
-PASS=0
-FAIL=0
-SKIP=0
-
-# ── Locate binaries ──────────────────────────────────────────────────
-FLOWCTL="${FLOWCTL:-$REPO_ROOT/bin/flowctl}"
-if [[ ! -f "$FLOWCTL" ]]; then
-  echo "FATAL: Python flowctl not found at $FLOWCTL"
-  exit 1
-fi
-
-RUST_BINARY="${RUST_BINARY:-$REPO_ROOT/flowctl/target/debug/flowctl}"
-if [[ ! -f "$RUST_BINARY" ]]; then
-  echo "Building Rust binary..."
-  cargo build --manifest-path "$REPO_ROOT/flowctl/Cargo.toml" 2>/dev/null
-fi
-if [[ ! -f "$RUST_BINARY" ]]; then
-  echo "FATAL: Rust binary not found at $RUST_BINARY"
-  exit 1
-fi
-
-# ── Helpers ───────────────────────────────────────────────────────────
-TMPDIR_BASE="$(mktemp -d)"
-trap 'rm -rf "$TMPDIR_BASE"' EXIT
-
-log() { echo "  $*"; }
-log_verbose() { [[ "$VERBOSE" == "--verbose" ]] && echo "    $*" || true; }
-
-# Run Python flowctl (--json goes AFTER subcommand)
-run_python() {
-  local dir="$1"; shift
-  local cmd="$1"; shift
-  # Python: flowctl.py <subcommand> [sub-subcommand...] --json [args...]
-  # We need to insert --json after the command/subcommand tokens
-  (cd "$dir" && python3 "$FLOWCTL" $cmd --json "$@" 2>&1)
-}
-run_python_exit() {
-  local dir="$1"; shift
-  local cmd="$1"; shift
-  (cd "$dir" && python3 "$FLOWCTL" $cmd --json "$@" 2>&1; echo "EXIT:$?") | tail -1 | sed 's/EXIT://'
-}
-
-# Run Rust flowctl (--json goes BEFORE subcommand)
-run_rust() {
-  local dir="$1"; shift
-  (cd "$dir" && "$RUST_BINARY" --json "$@" 2>&1)
-}
-run_rust_exit() {
-  local dir="$1"; shift
-  (cd "$dir" && "$RUST_BINARY" --json "$@" 2>&1; echo "EXIT:$?") | tail -1 | sed 's/EXIT://'
-}
-
-# Compare JSON output: normalize timestamps, ignore key ordering, ignore
-# auto-generated IDs (they differ because each binary uses its own .flow/).
-# Returns 0 if structurally equivalent, 1 otherwise.
-compare_json() {
-  local py_json="$1"
-  local rs_json="$2"
-  local label="$3"
-
-  # Normalize: sort keys, strip timestamps/dates, strip IDs, strip paths
-  local py_norm rs_norm
-  py_norm=$(echo "$py_json" | python3 -c "
-import sys, json, re
-try:
-    d = json.load(sys.stdin)
-except:
-    print('PARSE_ERROR')
-    sys.exit(0)
-print(json.dumps(d, sort_keys=True))
-" 2>/dev/null || echo "PARSE_ERROR")
-
-  rs_norm=$(echo "$rs_json" | python3 -c "
-import sys, json, re
-try:
-    d = json.load(sys.stdin)
-except:
-    print('PARSE_ERROR')
-    sys.exit(0)
-print(json.dumps(d, sort_keys=True))
-" 2>/dev/null || echo "PARSE_ERROR")
-
-  if [[ "$py_norm" == "PARSE_ERROR" ]] || [[ "$rs_norm" == "PARSE_ERROR" ]]; then
-    log_verbose "JSON parse error for $label"
-    log_verbose "  Python: $py_json"
-    log_verbose "  Rust:   $rs_json"
-    return 1
-  fi
-
-  # Compare top-level keys
-  local py_keys rs_keys
-  py_keys=$(echo "$py_json" | python3 -c "
-import sys, json
-d = json.load(sys.stdin)
-if isinstance(d, dict):
-    print(' '.join(sorted(d.keys())))
-else:
-    print('NOT_DICT')
-" 2>/dev/null)
-  rs_keys=$(echo "$rs_json" | python3 -c "
-import sys, json
-d = json.load(sys.stdin)
-if isinstance(d, dict):
-    print(' '.join(sorted(d.keys())))
-else:
-    print('NOT_DICT')
-" 2>/dev/null)
-
-  if [[ "$py_keys" != "$rs_keys" ]]; then
-    log_verbose "Key mismatch for $label"
-    log_verbose "  Python keys: $py_keys"
-    log_verbose "  Rust keys:   $rs_keys"
-    return 1
-  fi
-
-  return 0
-}
-
-# Compare exit codes
-compare_exit() {
-  local py_exit="$1"
-  local rs_exit="$2"
-  local label="$3"
-
-  if [[ "$py_exit" != "$rs_exit" ]]; then
-    log_verbose "Exit code mismatch for $label: Python=$py_exit Rust=$rs_exit"
-    return 1
-  fi
-  return 0
-}
-
-# Test runner
-test_case() {
-  local name="$1"
-  local result="$2"  # "pass" or "fail"
-
-  if [[ "$result" == "pass" ]]; then
-    PASS=$((PASS + 1))
-    log "PASS  $name"
-  else
-    FAIL=$((FAIL + 1))
-    log "FAIL  $name"
-  fi
-}
-
-skip_case() {
-  local name="$1"
-  local reason="$2"
-  SKIP=$((SKIP + 1))
-  log "SKIP  $name ($reason)"
-}
-
-# ── Setup fresh .flow/ dirs ──────────────────────────────────────────
-setup_empty_dirs() {
-  local py_dir="$TMPDIR_BASE/py_$$_$RANDOM"
-  local rs_dir="$TMPDIR_BASE/rs_$$_$RANDOM"
-  mkdir -p "$py_dir" "$rs_dir"
-  echo "$py_dir $rs_dir"
-}
-
-setup_initialized_dirs() {
-  local dirs
-  dirs=$(setup_empty_dirs)
-  local py_dir rs_dir
-  py_dir=$(echo "$dirs" | cut -d' ' -f1)
-  rs_dir=$(echo "$dirs" | cut -d' ' -f2)
-
-  run_python "$py_dir" "init" >/dev/null 2>&1
-  run_rust "$rs_dir" "init" >/dev/null 2>&1
-
-  echo "$py_dir $rs_dir"
-}
-
-setup_with_epic() {
-  local dirs
-  dirs=$(setup_initialized_dirs)
-  local py_dir rs_dir
-  py_dir=$(echo "$dirs" | cut -d' ' -f1)
-  rs_dir=$(echo "$dirs" | cut -d' ' -f2)
-
-  run_python "$py_dir" "epic create" --title "Test Epic" >/dev/null 2>&1
-  run_rust "$rs_dir" "epic" "create" --title "Test Epic" >/dev/null 2>&1
-
-  echo "$py_dir $rs_dir"
-}
-
-setup_with_task() {
-  local dirs
-  dirs=$(setup_with_epic)
-  local py_dir rs_dir py_epic rs_epic
-  py_dir=$(echo "$dirs" | cut -d' ' -f1)
-  rs_dir=$(echo "$dirs" | cut -d' ' -f2)
-
-  # Get epic IDs (they may differ)
-  py_epic=$(run_python "$py_dir" "epics" | python3 -c "import sys,json; print(json.load(sys.stdin)['epics'][0]['id'])" 2>/dev/null)
-  rs_epic=$(run_rust "$rs_dir" "epics" | python3 -c "import sys,json; print(json.load(sys.stdin)['epics'][0]['id'])" 2>/dev/null)
-
-  run_python "$py_dir" "task create" --epic "$py_epic" --title "Task One" >/dev/null 2>&1
-  run_rust "$rs_dir" "task" "create" --epic "$rs_epic" --title "Task One" >/dev/null 2>&1
-
-  echo "$py_dir $rs_dir $py_epic $rs_epic"
-}
-
-# ══════════════════════════════════════════════════════════════════════
-echo "=== flowctl Integration Tests: Rust vs Python ==="
-echo "  Python: $FLOWCTL"
-echo "  Rust:   $RUST_BINARY"
-echo ""
-
-# ── Test 1: init ──────────────────────────────────────────────────────
-echo "--- init ---"
-dirs=$(setup_empty_dirs)
-py_dir=$(echo "$dirs" | cut -d' ' -f1)
-rs_dir=$(echo "$dirs" | cut -d' ' -f2)
-
-py_out=$(run_python "$py_dir" "init")
-rs_out=$(run_rust "$rs_dir" "init")
-py_exit=$?; rs_exit=$?
-
-if compare_json "$py_out" "$rs_out" "init"; then
-  test_case "init: JSON keys match" "pass"
-else
-  test_case "init: JSON keys match" "fail"
-fi
-
-# Check success field
-py_success=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-rs_success=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-if [[ "$py_success" == "True" ]] && [[ "$rs_success" == "True" ]]; then
-  test_case "init: both report success=true" "pass"
-else
-  test_case "init: both report success=true" "fail"
-fi
-
-# ── Test 2: init idempotent (re-init) ────────────────────────────────
-py_out2=$(run_python "$py_dir" "init")
-rs_out2=$(run_rust "$rs_dir" "init")
-py_success2=$(echo "$py_out2" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-rs_success2=$(echo "$rs_out2" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-if [[ "$py_success2" == "True" ]] && [[ "$rs_success2" == "True" ]]; then
-  test_case "init: idempotent re-init succeeds" "pass"
-else
-  test_case "init: idempotent re-init succeeds" "fail"
-fi
-
-# ── Test 3: status (empty .flow/) ────────────────────────────────────
-echo "--- status ---"
-dirs=$(setup_initialized_dirs)
-py_dir=$(echo "$dirs" | cut -d' ' -f1)
-rs_dir=$(echo "$dirs" | cut -d' ' -f2)
-
-py_out=$(run_python "$py_dir" "status")
-rs_out=$(run_rust "$rs_dir" "status")
-
-if compare_json "$py_out" "$rs_out" "status"; then
-  test_case "status: JSON keys match" "pass"
-else
-  test_case "status: JSON keys match" "fail"
-fi
-
-# Verify zero counts
-py_todo=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin)['tasks']['todo'])" 2>/dev/null)
-rs_todo=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin)['tasks']['todo'])" 2>/dev/null)
-if [[ "$py_todo" == "0" ]] && [[ "$rs_todo" == "0" ]]; then
-  test_case "status: empty .flow/ shows zero tasks" "pass"
-else
-  test_case "status: empty .flow/ shows zero tasks" "fail"
-fi
-
-# ── Test 4: epics (empty) ────────────────────────────────────────────
-echo "--- epics ---"
-py_out=$(run_python "$py_dir" "epics")
-rs_out=$(run_rust "$rs_dir" "epics")
-
-if compare_json "$py_out" "$rs_out" "epics-empty"; then
-  test_case "epics: empty list JSON keys match" "pass"
-else
-  test_case "epics: empty list JSON keys match" "fail"
-fi
-
-py_count=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('count',json.load(open('/dev/null')) if False else len(json.load(sys.stdin).get('epics',[]))))" 2>/dev/null || echo "?")
-# simpler:
-py_count=$(echo "$py_out" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('count', len(d.get('epics',[]))))" 2>/dev/null)
-rs_count=$(echo "$rs_out" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('count', len(d.get('epics',[]))))" 2>/dev/null)
-if [[ "$py_count" == "0" ]] && [[ "$rs_count" == "0" ]]; then
-  test_case "epics: both show count=0" "pass"
-else
-  test_case "epics: both show count=0" "fail"
-fi
-
-# ── Test 5: epic create ──────────────────────────────────────────────
-echo "--- epic create ---"
-py_out=$(run_python "$py_dir" "epic create" --title "Integration Test Epic")
-rs_out=$(run_rust "$rs_dir" "epic" "create" --title "Integration Test Epic")
-
-if compare_json "$py_out" "$rs_out" "epic-create"; then
-  test_case "epic create: JSON keys match" "pass"
-else
-  test_case "epic create: JSON keys match" "fail"
-fi
-
-py_success=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-rs_success=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-if [[ "$py_success" == "True" ]] && [[ "$rs_success" == "True" ]]; then
-  test_case "epic create: both succeed" "pass"
-else
-  test_case "epic create: both succeed" "fail"
-fi
-
-# Get epic IDs for subsequent tests
-py_epic=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])" 2>/dev/null)
-rs_epic=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])" 2>/dev/null)
-
-# ── Test 6: show (epic) ──────────────────────────────────────────────
-echo "--- show ---"
-py_out=$(run_python "$py_dir" "show" "$py_epic")
-rs_out=$(run_rust "$rs_dir" "show" "$rs_epic")
-
-# show may have extra keys in Python that Rust hasn't implemented yet
-# Check Rust keys are a subset of Python keys
-py_keys=$(echo "$py_out" | python3 -c "import sys,json; d=json.load(sys.stdin); print(' '.join(sorted(d.keys())) if isinstance(d,dict) else '')" 2>/dev/null)
-rs_keys=$(echo "$rs_out" | python3 -c "import sys,json; d=json.load(sys.stdin); print(' '.join(sorted(d.keys())) if isinstance(d,dict) else '')" 2>/dev/null)
-extra=$(python3 -c "
-py=set('$py_keys'.split())
-rs=set('$rs_keys'.split())
-extra=rs-py
-print(' '.join(sorted(extra)) if extra else '')
-")
-if [[ -z "$extra" ]]; then
-  test_case "show epic: Rust keys subset of Python" "pass"
-  missing=$(python3 -c "
-py=set('$py_keys'.split())
-rs=set('$rs_keys'.split())
-m=py-rs
-if m: print('  (Rust missing: ' + ', '.join(sorted(m)) + ')')
-")
-  [[ -n "$missing" ]] && log "$missing"
-else
-  test_case "show epic: Rust keys subset of Python" "fail"
-  log_verbose "  Extra Rust keys: $extra"
-fi
-
-# ── Test 7: task create ──────────────────────────────────────────────
-echo "--- task create ---"
-py_out=$(run_python "$py_dir" "task create" --epic "$py_epic" --title "Test Task Alpha")
-rs_out=$(run_rust "$rs_dir" "task" "create" --epic "$rs_epic" --title "Test Task Alpha")
-
-if compare_json "$py_out" "$rs_out" "task-create"; then
-  test_case "task create: JSON keys match" "pass"
-else
-  test_case "task create: JSON keys match" "fail"
-fi
-
-py_task=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])" 2>/dev/null)
-rs_task=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])" 2>/dev/null)
-
-# ── Test 8: tasks list ───────────────────────────────────────────────
-echo "--- tasks ---"
-py_out=$(run_python "$py_dir" "tasks" --epic "$py_epic")
-rs_out=$(run_rust "$rs_dir" "tasks" --epic "$rs_epic")
-
-if compare_json "$py_out" "$rs_out" "tasks"; then
-  test_case "tasks: JSON keys match" "pass"
-else
-  test_case "tasks: JSON keys match" "fail"
-fi
-
-# ── Test 9: start ────────────────────────────────────────────────────
-echo "--- start ---"
-py_out=$(run_python "$py_dir" "start" "$py_task")
-rs_out=$(run_rust "$rs_dir" "start" "$rs_task")
-
-py_success=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-rs_success=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-if [[ "$py_success" == "True" ]] && [[ "$rs_success" == "True" ]]; then
-  test_case "start: both succeed" "pass"
-else
-  test_case "start: both succeed" "fail"
-fi
-
-if compare_json "$py_out" "$rs_out" "start"; then
-  test_case "start: JSON keys match" "pass"
-else
-  test_case "start: JSON keys match" "fail"
-fi
-
-# ── Test 10: done ────────────────────────────────────────────────────
-echo "--- done ---"
-py_out=$(run_python "$py_dir" "done" "$py_task" --summary "Completed" --force)
-rs_out=$(run_rust "$rs_dir" "done" "$rs_task" --summary "Completed" --force)
-
-py_success=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-rs_success=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null)
-if [[ "$py_success" == "True" ]] && [[ "$rs_success" == "True" ]]; then
-  test_case "done: both succeed" "pass"
-else
-  test_case "done: both succeed" "fail"
-fi
-
-if compare_json "$py_out" "$rs_out" "done"; then
-  test_case "done: JSON keys match" "pass"
-else
-  test_case "done: JSON keys match" "fail"
-fi
-
-# ── Test 11: status after work ───────────────────────────────────────
-echo "--- status after work ---"
-py_out=$(run_python "$py_dir" "status")
-rs_out=$(run_rust "$rs_dir" "status")
-
-py_done=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin)['tasks']['done'])" 2>/dev/null)
-rs_done=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin)['tasks']['done'])" 2>/dev/null)
-if [[ "$py_done" == "1" ]] && [[ "$rs_done" == "1" ]]; then
-  test_case "status: both show 1 done task" "pass"
-else
-  test_case "status: both show 1 done task" "fail"
-fi
-
-# ══════════════════════════════════════════════════════════════════════
-# Edge Cases
-# ══════════════════════════════════════════════════════════════════════
-echo ""
-echo "--- Edge Cases ---"
-
-# ── Edge 1: status without .flow/ ─────────────────────────────────────
-edge_dir_py="$TMPDIR_BASE/edge_py_$$"
-edge_dir_rs="$TMPDIR_BASE/edge_rs_$$"
-mkdir -p "$edge_dir_py" "$edge_dir_rs"
-
-py_out=$(run_python "$edge_dir_py" "status" 2>&1 || true)
-py_exit=$?
-rs_out=$(run_rust "$edge_dir_rs" "status" 2>&1 || true)
-rs_exit=$?
-
-# Both should indicate no .flow/ or fail gracefully
-py_exists=$(echo "$py_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('flow_exists',''))" 2>/dev/null || echo "error")
-rs_exists=$(echo "$rs_out" | python3 -c "import sys,json; print(json.load(sys.stdin).get('flow_exists',''))" 2>/dev/null || echo "error")
-if [[ "$py_exists" == "False" ]] && [[ "$rs_exists" == "False" ]]; then
-  test_case "edge: status without .flow/ returns flow_exists=false" "pass"
-elif [[ "$py_exists" == "error" ]] && [[ "$rs_exists" == "error" ]]; then
-  # Both error out - also acceptable
-  test_case "edge: status without .flow/ both error (consistent)" "pass"
-else
-  test_case "edge: status without .flow/ consistent behavior" "fail"
-  log_verbose "  Python flow_exists=$py_exists  Rust flow_exists=$rs_exists"
-fi
-
-# ── Edge 2: show with invalid ID ─────────────────────────────────────
-dirs=$(setup_initialized_dirs)
-py_dir=$(echo "$dirs" | cut -d' ' -f1)
-rs_dir=$(echo "$dirs" | cut -d' ' -f2)
-
-py_out=$(run_python "$py_dir" "show" "nonexistent-id-999" 2>&1; echo "EXIT:$?")
-py_exit=$(echo "$py_out" | grep "EXIT:" | sed 's/EXIT://')
-rs_out=$(run_rust "$rs_dir" "show" "nonexistent-id-999" 2>&1; echo "EXIT:$?")
-rs_exit=$(echo "$rs_out" | grep "EXIT:" | sed 's/EXIT://')
-
-# Both should return non-zero or error JSON
-if [[ "$py_exit" != "0" ]] && [[ "$rs_exit" != "0" ]]; then
-  test_case "edge: show invalid ID - both return non-zero exit" "pass"
-else
-  # Check if they return error in JSON
-  py_success=$(echo "$py_out" | head -1 | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null || echo "?")
-  rs_success=$(echo "$rs_out" | head -1 | python3 -c "import sys,json; print(json.load(sys.stdin).get('success',''))" 2>/dev/null || echo "?")
-  if [[ "$py_success" == "False" ]] && [[ "$rs_success" == "False" ]]; then
-    test_case "edge: show invalid ID - both return success=false" "pass"
-  else
-    test_case "edge: show invalid ID - consistent error behavior" "fail"
-    log_verbose "  Python exit=$py_exit success=$py_success"
-    log_verbose "  Rust exit=$rs_exit success=$rs_success"
-  fi
-fi
-
-# ── Edge 3: start with invalid ID ────────────────────────────────────
-py_out=$(run_python "$py_dir" "start" "bogus-task-id" 2>&1; echo "EXIT:$?")
-py_exit=$(echo "$py_out" | grep "EXIT:" | sed 's/EXIT://')
-rs_out=$(run_rust "$rs_dir" "start" "bogus-task-id" 2>&1; echo "EXIT:$?")
-rs_exit=$(echo "$rs_out" | grep "EXIT:" | sed 's/EXIT://')
-
-if [[ "$py_exit" != "0" ]] && [[ "$rs_exit" != "0" ]]; then
-  test_case "edge: start invalid ID - both return non-zero" "pass"
-elif [[ "$py_exit" == "$rs_exit" ]]; then
-  test_case "edge: start invalid ID - same exit code ($py_exit)" "pass"
-else
-  test_case "edge: start invalid ID - consistent error" "fail"
-  log_verbose "  Python exit=$py_exit  Rust exit=$rs_exit"
-fi
-
-# ── Edge 4: done without required args ────────────────────────────────
-py_exit=0
-(cd "$py_dir" && python3 "$FLOWCTL" done --json >/dev/null 2>&1) || py_exit=$?
-rs_exit=0
-(cd "$rs_dir" && "$RUST_BINARY" --json done >/dev/null 2>&1) || rs_exit=$?
-
-if [[ "$py_exit" != "0" ]] && [[ "$rs_exit" != "0" ]]; then
-  test_case "edge: done without task ID - both error" "pass"
-else
-  test_case "edge: done without task ID - consistent error" "fail"
-  log_verbose "  Python exit=$py_exit  Rust exit=$rs_exit"
-fi
-
-# ── Edge 5: epic create without title ─────────────────────────────────
-py_exit=0
-(cd "$py_dir" && python3 "$FLOWCTL" epic create --json >/dev/null 2>&1) || py_exit=$?
-rs_exit=0
-(cd "$rs_dir" && "$RUST_BINARY" --json epic create >/dev/null 2>&1) || rs_exit=$?
-
-if [[ "$py_exit" != "0" ]] && [[ "$rs_exit" != "0" ]]; then
-  test_case "edge: epic create without title - both error" "pass"
-else
-  test_case "edge: epic create without title - consistent error" "fail"
-  log_verbose "  Python exit=$py_exit  Rust exit=$rs_exit"
-fi
-
-# ── Edge 6: task create without epic ──────────────────────────────────
-py_exit=0
-(cd "$py_dir" && python3 "$FLOWCTL" task create --json --title "Orphan" >/dev/null 2>&1) || py_exit=$?
-rs_exit=0
-(cd "$rs_dir" && "$RUST_BINARY" --json task create --title "Orphan" >/dev/null 2>&1) || rs_exit=$?
-
-if [[ "$py_exit" != "0" ]] && [[ "$rs_exit" != "0" ]]; then
-  test_case "edge: task create without epic - both error" "pass"
-else
-  test_case "edge: task create without epic - consistent error" "fail"
-  log_verbose "  Python exit=$py_exit  Rust exit=$rs_exit"
-fi
-
-# ══════════════════════════════════════════════════════════════════════
-# Summary
-# ══════════════════════════════════════════════════════════════════════
-echo ""
-echo "=== Results ==="
-echo "  PASS: $PASS"
-echo "  FAIL: $FAIL"
-echo "  SKIP: $SKIP"
-TOTAL=$((PASS + FAIL))
-echo "  TOTAL: $TOTAL"
-echo ""
-
-if [[ $FAIL -gt 0 ]]; then
-  echo "FAILED ($FAIL of $TOTAL tests failed)"
-  exit 1
-else
-  echo "ALL TESTS PASSED"
-  exit 0
-fi

From 6963d77eec590ab7dad57ead8155ee868ab3ea49 Mon Sep 17 00:00:00 2001
From: z23cc <admin@z23.cc>
Date: Tue, 7 Apr 2026 17:40:40 +0800
Subject: [PATCH 3/7] refactor(tests): split monolithic smoke_test.sh into
 focused test files [fn-140.4]

Split the ~2000-line smoke_test.sh into 12 focused test files under
scripts/tests/, each independently runnable with its own isolated
temp directory. All test assertions preserved exactly.

Files created:
- common.sh: shared setup (python detection, flowctl binary, colors, counters)
- run_all.sh: sequential test runner with per-file pass/fail tracking
- test_init.sh: idempotent init, config set/get, planSync config
- test_scheduling.sh: next plan/work/none, priority, artifact file resilience
- test_lifecycle.sh: plan_review_status, branch, set-title, block/validate,
  duration tracking, workspace_changes
- test_gaps.sh: gap add/resolve/check, idempotency, priority filtering
- test_memory.sh: memory init/add/list, verify, staleness, retro suggestion
- test_files.sh: file ownership map, conflict detection
- test_review.sh: parse_receipt_path, review-backend compare, archival,
  parse-findings
- test_domain.sh: domain tagging, epic archive/clean
- test_worker.sh: context hints, build_review_prompt, worker-prompt,
  worker-phase lifecycle
- test_misc.sh: schema validate, codex commands, depends_on_epics, stdin,
  set-spec, checkpoint, sync command files
- test_restart.sh: restart command, status --interrupted, auto-execute
- test_codex_e2e.sh: codex plan-review/impl-review e2e

smoke_test.sh is now a thin wrapper that delegates to run_all.sh.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 scripts/smoke_test.sh            | 1996 +-----------------------------
 scripts/tests/common.sh          |   65 +
 scripts/tests/run_all.sh         |   38 +
 scripts/tests/test_codex_e2e.sh  |  108 ++
 scripts/tests/test_domain.sh     |  152 +++
 scripts/tests/test_files.sh      |   48 +
 scripts/tests/test_gaps.sh       |  121 ++
 scripts/tests/test_init.sh       |   97 ++
 scripts/tests/test_lifecycle.sh  |  277 +++++
 scripts/tests/test_memory.sh     |  103 ++
 scripts/tests/test_misc.sh       |  241 ++++
 scripts/tests/test_restart.sh    |  254 ++++
 scripts/tests/test_review.sh     |  271 ++++
 scripts/tests/test_scheduling.sh |  122 ++
 scripts/tests/test_worker.sh     |  233 ++++
 15 files changed, 2132 insertions(+), 1994 deletions(-)
 create mode 100755 scripts/tests/common.sh
 create mode 100755 scripts/tests/run_all.sh
 create mode 100755 scripts/tests/test_codex_e2e.sh
 create mode 100755 scripts/tests/test_domain.sh
 create mode 100755 scripts/tests/test_files.sh
 create mode 100755 scripts/tests/test_gaps.sh
 create mode 100755 scripts/tests/test_init.sh
 create mode 100755 scripts/tests/test_lifecycle.sh
 create mode 100755 scripts/tests/test_memory.sh
 create mode 100755 scripts/tests/test_misc.sh
 create mode 100755 scripts/tests/test_restart.sh
 create mode 100755 scripts/tests/test_review.sh
 create mode 100755 scripts/tests/test_scheduling.sh
 create mode 100755 scripts/tests/test_worker.sh

diff --git a/scripts/smoke_test.sh b/scripts/smoke_test.sh
index 69ee6292..3854e24e 100755
--- a/scripts/smoke_test.sh
+++ b/scripts/smoke_test.sh
@@ -1,1998 +1,6 @@
 #!/usr/bin/env bash
 set -euo pipefail
 
+# Thin wrapper — delegates to split test files in scripts/tests/
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
-
-# Python detection: prefer python3, fallback to python (Windows support, GH-35)
-pick_python() {
-  if [[ -n "${PYTHON_BIN:-}" ]]; then
-    command -v "$PYTHON_BIN" >/dev/null 2>&1 && { echo "$PYTHON_BIN"; return; }
-  fi
-  if command -v python3 >/dev/null 2>&1; then echo "python3"; return; fi
-  if command -v python  >/dev/null 2>&1; then echo "python"; return; fi
-  echo ""
-}
-
-PYTHON_BIN="$(pick_python)"
-[[ -n "$PYTHON_BIN" ]] || { echo "ERROR: python not found (need python3 or python in PATH)" >&2; exit 1; }
-
-# Safety: never run tests from the main plugin repo
-if [[ -f "$PWD/.claude-plugin/marketplace.json" ]] || [[ -f "$PWD/plugins/flow-code/.claude-plugin/plugin.json" ]]; then
-  echo "ERROR: refusing to run from main plugin repo. Run from any other directory." >&2
-  exit 1
-fi
-
-TEST_DIR="/tmp/flowctl-smoke-$$"
-PASS=0
-FAIL=0
-
-GREEN='\033[0;32m'
-RED='\033[0;31m'
-YELLOW='\033[1;33m'
-NC='\033[0m'
-
-cleanup() {
-  rm -rf "$TEST_DIR"
-}
-trap cleanup EXIT
-
-echo -e "${YELLOW}=== flowctl smoke tests ===${NC}"
-
-mkdir -p "$TEST_DIR/repo"
-cd "$TEST_DIR/repo"
-git init -q
-
-# Locate flowctl binary (Rust)
-if [[ -x "$PLUGIN_ROOT/bin/flowctl" ]]; then
-  FLOWCTL="$PLUGIN_ROOT/bin/flowctl"
-elif command -v flowctl >/dev/null 2>&1; then
-  FLOWCTL="$(command -v flowctl)"
-else
-  echo "ERROR: flowctl binary not found. Build with: cd flowctl && cargo build --release && cp target/release/flowctl ../bin/" >&2
-  exit 1
-fi
-
-$FLOWCTL init --json >/dev/null
-printf '{"commits":[],"tests":[],"prs":[]}' > "$TEST_DIR/evidence.json"
-printf "ok\n" > "$TEST_DIR/summary.md"
-
-echo -e "${YELLOW}--- idempotent init ---${NC}"
-
-# Test 1: Re-run init (no changes)
-init_result="$($FLOWCTL init --json)"
-init_actions="$(echo "$init_result" | "$PYTHON_BIN" -c 'import json,sys; print(len(json.load(sys.stdin).get("actions", [])))')"
-if [[ "$init_actions" == "0" ]]; then
-  echo -e "${GREEN}✓${NC} init idempotent (no changes on re-run)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} init idempotent: expected 0 actions, got $init_actions"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: Config upgrade (old config without planSync)
-echo '{"memory":{"enabled":true}}' > .flow/config.json
-init_upgrade="$($FLOWCTL init --json)"
-upgrade_msg="$(echo "$init_upgrade" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("message", ""))')"
-if [[ "$upgrade_msg" == *"upgraded config.json"* ]]; then
-  echo -e "${GREEN}✓${NC} init upgrades config (adds missing keys)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} init upgrade: expected 'upgraded config.json' in message, got: $upgrade_msg"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 3: Verify existing values preserved after upgrade
-memory_val="$($FLOWCTL config get memory.enabled --json | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("value"))')"
-if [[ "$memory_val" == "True" ]]; then
-  echo -e "${GREEN}✓${NC} init preserves existing config values"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} init preserve: expected memory.enabled=True, got $memory_val"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 4: Verify new defaults added (memory + planSync now default to True)
-plansync_val="$($FLOWCTL config get planSync.enabled --json | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("value"))')"
-if [[ "$plansync_val" == "True" ]]; then
-  echo -e "${GREEN}✓${NC} init adds new default keys"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} init defaults: expected planSync.enabled=True, got $plansync_val"
-  FAIL=$((FAIL + 1))
-fi
-
-# Reset config for remaining tests
-$FLOWCTL config set memory.enabled false --json >/dev/null
-
-echo -e "${YELLOW}--- next: plan/work/none + priority ---${NC}"
-# Capture epic ID from create output (fn-N-xxx format)
-EPIC1_JSON="$($FLOWCTL epic create --title "Epic One" --json)"
-EPIC1="$(echo "$EPIC1_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-$FLOWCTL task create --epic "$EPIC1" --title "Low pri" --priority 5 --json >/dev/null
-$FLOWCTL task create --epic "$EPIC1" --title "High pri" --priority 1 --json >/dev/null
-
-plan_json="$($FLOWCTL next --require-plan-review --json)"
-"$PYTHON_BIN" - "$plan_json" "$EPIC1" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-expected_epic = sys.argv[2]
-assert data["status"] == "plan"
-assert data["epic"] == expected_epic, f"Expected {expected_epic}, got {data['epic']}"
-PY
-echo -e "${GREEN}✓${NC} next plan"
-PASS=$((PASS + 1))
-
-$FLOWCTL epic review "$EPIC1" ship --json >/dev/null
-work_json="$($FLOWCTL next --json)"
-"$PYTHON_BIN" - "$work_json" "$EPIC1" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-expected_epic = sys.argv[2]
-assert data["status"] == "work"
-assert data["task"] == f"{expected_epic}.2", f"Expected {expected_epic}.2, got {data['task']}"
-PY
-echo -e "${GREEN}✓${NC} next work priority"
-PASS=$((PASS + 1))
-
-$FLOWCTL start "${EPIC1}.2" --json >/dev/null
-$FLOWCTL done "${EPIC1}.2" --summary-file "$TEST_DIR/summary.md" --evidence-json "$TEST_DIR/evidence.json" --json >/dev/null
-$FLOWCTL start "${EPIC1}.1" --json >/dev/null
-$FLOWCTL done "${EPIC1}.1" --summary-file "$TEST_DIR/summary.md" --evidence-json "$TEST_DIR/evidence.json" --json >/dev/null
-none_json="$($FLOWCTL next --json)"
-"$PYTHON_BIN" - <<'PY' "$none_json"
-import json, sys
-data = json.loads(sys.argv[1])
-assert data["status"] == "none"
-PY
-echo -e "${GREEN}✓${NC} next none"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- artifact files in tasks dir (GH-21) ---${NC}"
-# Create artifact files that match glob but aren't valid task files
-# This simulates Claude writing evidence/summary files to .flow/tasks/
-cat > ".flow/tasks/${EPIC1}.1-evidence.json" << 'EOF'
-{"commits":["abc123"],"tests":["npm test"],"prs":[]}
-EOF
-cat > ".flow/tasks/${EPIC1}.1-summary.json" << 'EOF'
-{"summary":"Task completed successfully"}
-EOF
-# Test that next still works with artifact files present
-set +e
-next_result="$($FLOWCTL next --json 2>&1)"
-next_rc=$?
-set -e
-if [[ "$next_rc" -eq 0 ]]; then
-  echo -e "${GREEN}✓${NC} next ignores artifact files"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} next crashes on artifact files: $next_result"
-  FAIL=$((FAIL + 1))
-fi
-# Test that list still works
-set +e
-list_result="$($FLOWCTL list --json 2>&1)"
-list_rc=$?
-set -e
-if [[ "$list_rc" -eq 0 ]]; then
-  echo -e "${GREEN}✓${NC} list ignores artifact files"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} list crashes on artifact files: $list_result"
-  FAIL=$((FAIL + 1))
-fi
-# Test that ready still works
-set +e
-ready_result="$($FLOWCTL ready --epic "$EPIC1" --json 2>&1)"
-ready_rc=$?
-set -e
-if [[ "$ready_rc" -eq 0 ]]; then
-  echo -e "${GREEN}✓${NC} ready ignores artifact files"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} ready crashes on artifact files: $ready_result"
-  FAIL=$((FAIL + 1))
-fi
-# Test that show (with tasks) still works
-set +e
-show_result="$($FLOWCTL show "$EPIC1" --json 2>&1)"
-show_rc=$?
-set -e
-if [[ "$show_rc" -eq 0 ]]; then
-  echo -e "${GREEN}✓${NC} show ignores artifact files"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} show crashes on artifact files: $show_result"
-  FAIL=$((FAIL + 1))
-fi
-# Test that validate still works
-set +e
-validate_result="$($FLOWCTL validate --epic "$EPIC1" --json 2>&1)"
-validate_rc=$?
-set -e
-if [[ "$validate_rc" -eq 0 ]]; then
-  echo -e "${GREEN}✓${NC} validate ignores artifact files"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} validate crashes on artifact files: $validate_result"
-  FAIL=$((FAIL + 1))
-fi
-# Cleanup artifact files
-rm -f ".flow/tasks/${EPIC1}.1-evidence.json" ".flow/tasks/${EPIC1}.1-summary.json"
-
-echo -e "${YELLOW}--- plan_review_status default ---${NC}"
-"$PYTHON_BIN" - "$EPIC1" <<'PY'
-import json, sys
-from pathlib import Path
-epic_id = sys.argv[1]
-path = Path(f".flow/epics/{epic_id}.json")
-data = json.loads(path.read_text())
-data.pop("plan_review_status", None)
-data.pop("plan_reviewed_at", None)
-data.pop("branch_name", None)
-path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
-PY
-show_json="$($FLOWCTL show "$EPIC1" --json)"
-"$PYTHON_BIN" - <<'PY' "$show_json"
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("plan_review_status") is None or data.get("plan_review_status") == "unknown"
-assert data.get("plan_reviewed_at") is None
-assert data.get("branch_name") is None
-PY
-echo -e "${GREEN}✓${NC} plan_review_status defaulted"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- branch_name set ---${NC}"
-$FLOWCTL epic branch "$EPIC1" "${EPIC1}-epic" --json >/dev/null
-show_json="$($FLOWCTL show "$EPIC1" --json)"
-if "$PYTHON_BIN" - "$show_json" "$EPIC1" <<'PY' 2>/dev/null
-import json, sys
-data = json.loads(sys.argv[1])
-expected_branch = f"{sys.argv[2]}-epic"
-assert data.get("branch_name") == expected_branch, f"Expected {expected_branch}, got {data.get('branch_name')}"
-PY
-then
-  echo -e "${GREEN}✓${NC} branch_name set"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} branch_name set: show does not return branch_name (DB-only field)"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "${YELLOW}--- epic set-title ---${NC}"
-# Create epic with tasks for rename test
-RENAME_EPIC_JSON="$($FLOWCTL epic create --title "Old Title" --json)"
-RENAME_EPIC="$(echo "$RENAME_EPIC_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-$FLOWCTL task create --epic "$RENAME_EPIC" --title "First task" --json >/dev/null
-$FLOWCTL task create --epic "$RENAME_EPIC" --title "Second task" --json >/dev/null
-# Add task dependency within epic
-$FLOWCTL dep add "${RENAME_EPIC}.2" "${RENAME_EPIC}.1" --json >/dev/null
-
-# Rename epic
-rename_result="$($FLOWCTL epic title "$RENAME_EPIC" --title "New Shiny Title" --json)"
-NEW_EPIC="$(echo "$rename_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["new_id"])')"
-
-# Test 1: Verify old files are gone
-if [[ ! -f ".flow/epics/${RENAME_EPIC}.json" ]] && [[ ! -f ".flow/specs/${RENAME_EPIC}.md" ]]; then
-  echo -e "${GREEN}✓${NC} set-title removes old files"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} set-title old files still exist"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: Verify new files exist
-if [[ -f ".flow/epics/${NEW_EPIC}.json" ]] && [[ -f ".flow/specs/${NEW_EPIC}.md" ]]; then
-  echo -e "${GREEN}✓${NC} set-title creates new files"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} set-title new files missing"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 3: Verify epic JSON content updated
-"$PYTHON_BIN" - "$NEW_EPIC" <<'PY'
-import json, sys
-from pathlib import Path
-new_id = sys.argv[1]
-epic_data = json.loads(Path(f".flow/epics/{new_id}.json").read_text())
-assert epic_data["id"] == new_id, f"Epic ID not updated: {epic_data['id']}"
-assert epic_data["title"] == "New Shiny Title", f"Title not updated: {epic_data['title']}"
-assert new_id in epic_data["spec_path"], f"spec_path not updated: {epic_data['spec_path']}"
-PY
-echo -e "${GREEN}✓${NC} set-title updates epic JSON"
-PASS=$((PASS + 1))
-
-# Test 4: Verify task files renamed
-if [[ -f ".flow/tasks/${NEW_EPIC}.1.json" ]] && [[ -f ".flow/tasks/${NEW_EPIC}.2.json" ]]; then
-  echo -e "${GREEN}✓${NC} set-title renames task files"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} set-title task files not renamed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 5: Verify task JSON content updated (including depends_on)
-"$PYTHON_BIN" - "$NEW_EPIC" <<'PY'
-import json, sys
-from pathlib import Path
-new_id = sys.argv[1]
-task1_data = json.loads(Path(f".flow/tasks/{new_id}.1.json").read_text())
-task2_data = json.loads(Path(f".flow/tasks/{new_id}.2.json").read_text())
-assert task1_data["id"] == f"{new_id}.1", f"Task 1 ID not updated: {task1_data['id']}"
-assert task1_data["epic"] == new_id, f"Task 1 epic not updated: {task1_data['epic']}"
-assert task2_data["id"] == f"{new_id}.2", f"Task 2 ID not updated: {task2_data['id']}"
-# Verify depends_on was updated
-deps = task2_data.get("depends_on", [])
-assert f"{new_id}.1" in deps, f"depends_on not updated: {deps}"
-PY
-echo -e "${GREEN}✓${NC} set-title updates task JSON and deps"
-PASS=$((PASS + 1))
-
-# Test 6: Verify show works with new ID
-show_json="$($FLOWCTL show "$NEW_EPIC" --json)"
-"$PYTHON_BIN" - "$show_json" "$NEW_EPIC" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-expected_id = sys.argv[2]
-assert data["id"] == expected_id, f"Show returns wrong ID: {data['id']}"
-assert data["title"] == "New Shiny Title"
-PY
-echo -e "${GREEN}✓${NC} set-title show works with new ID"
-PASS=$((PASS + 1))
-
-# Test 7: depends_on_epics update in other epics
-DEP_EPIC_JSON="$($FLOWCTL epic create --title "Depends on renamed" --json)"
-DEP_EPIC="$(echo "$DEP_EPIC_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-$FLOWCTL epic add-dep "$DEP_EPIC" "$NEW_EPIC" --json >/dev/null
-# Rename the dependency
-rename2_result="$($FLOWCTL epic title "$NEW_EPIC" --title "Final Title" --json)"
-FINAL_EPIC="$(echo "$rename2_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["new_id"])')"
-# Verify DEP_EPIC's depends_on_epics was updated
-"$PYTHON_BIN" - "$DEP_EPIC" "$FINAL_EPIC" <<'PY'
-import json, sys
-from pathlib import Path
-dep_epic = sys.argv[1]
-final_epic = sys.argv[2]
-dep_data = json.loads(Path(f".flow/epics/{dep_epic}.json").read_text())
-deps = dep_data.get("depends_on_epics", [])
-assert final_epic in deps, f"depends_on_epics not updated: {deps}, expected {final_epic}"
-PY
-echo -e "${GREEN}✓${NC} set-title updates depends_on_epics in other epics"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- block + validate + epic close ---${NC}"
-EPIC2_JSON="$($FLOWCTL epic create --title "Epic Two" --json)"
-EPIC2="$(echo "$EPIC2_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-$FLOWCTL task create --epic "$EPIC2" --title "Block me" --json >/dev/null
-$FLOWCTL task create --epic "$EPIC2" --title "Other" --json >/dev/null
-printf "Blocked by test\n" > "$TEST_DIR/reason.md"
-$FLOWCTL block "${EPIC2}.1" --reason-file "$TEST_DIR/reason.md" --json >/dev/null
-$FLOWCTL validate --epic "$EPIC2" --json >/dev/null
-echo -e "${GREEN}✓${NC} validate allows blocked"
-PASS=$((PASS + 1))
-
-set +e
-$FLOWCTL epic close "$EPIC2" --json >/dev/null
-rc=$?
-set -e
-if [[ "$rc" -ne 0 ]]; then
-  echo -e "${GREEN}✓${NC} epic close fails when blocked"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} epic close fails when blocked"
-  FAIL=$((FAIL + 1))
-fi
-
-$FLOWCTL start "${EPIC2}.1" --force --json >/dev/null
-$FLOWCTL done "${EPIC2}.1" --summary-file "$TEST_DIR/summary.md" --evidence-json "$TEST_DIR/evidence.json" --json >/dev/null
-$FLOWCTL start "${EPIC2}.2" --json >/dev/null
-$FLOWCTL done "${EPIC2}.2" --summary-file "$TEST_DIR/summary.md" --evidence-json "$TEST_DIR/evidence.json" --json >/dev/null
-$FLOWCTL epic close "$EPIC2" --json >/dev/null
-echo -e "${GREEN}✓${NC} epic close succeeds when done"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- config set/get ---${NC}"
-$FLOWCTL config set memory.enabled true --json >/dev/null
-config_json="$($FLOWCTL config get memory.enabled --json)"
-"$PYTHON_BIN" - <<'PY' "$config_json"
-import json, sys
-data = json.loads(sys.argv[1])
-assert data["value"] == True, f"Expected True, got {data['value']}"
-PY
-echo -e "${GREEN}✓${NC} config set/get"
-PASS=$((PASS + 1))
-
-$FLOWCTL config set memory.enabled false --json >/dev/null
-config_json="$($FLOWCTL config get memory.enabled --json)"
-"$PYTHON_BIN" - <<'PY' "$config_json"
-import json, sys
-data = json.loads(sys.argv[1])
-assert data["value"] == False, f"Expected False, got {data['value']}"
-PY
-echo -e "${GREEN}✓${NC} config toggle"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- planSync config ---${NC}"
-$FLOWCTL config set planSync.enabled true --json >/dev/null
-config_json="$($FLOWCTL config get planSync.enabled --json)"
-"$PYTHON_BIN" - <<'PY' "$config_json"
-import json, sys
-data = json.loads(sys.argv[1])
-assert data["value"] is True, f"Expected True, got {data['value']}"
-PY
-echo -e "${GREEN}✓${NC} planSync config set/get"
-PASS=$((PASS + 1))
-
-$FLOWCTL config set planSync.enabled false --json >/dev/null
-config_json="$($FLOWCTL config get planSync.enabled --json)"
-"$PYTHON_BIN" - <<'PY' "$config_json"
-import json, sys
-data = json.loads(sys.argv[1])
-assert data["value"] is False, f"Expected False, got {data['value']}"
-PY
-echo -e "${GREEN}✓${NC} planSync config toggle"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- gap commands ---${NC}"
-
-# Use EPIC1 which was created earlier in the test
-# Test 1: gap add
-gap_add_result="$($FLOWCTL gap add --epic "$EPIC1" --capability "Missing auth check" --priority required --source flow-gap-analyst --json)"
-gap_created="$(echo "$gap_add_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("created", False))')"
-if [[ "$gap_created" == "True" ]]; then
-  echo -e "${GREEN}✓${NC} gap add creates new gap"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap add failed to create gap"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: gap add idempotent
-gap_dup_result="$($FLOWCTL gap add --epic "$EPIC1" --capability "Missing auth check" --priority required --json)"
-gap_dup_created="$(echo "$gap_dup_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("created", False))')"
-if [[ "$gap_dup_created" == "False" ]]; then
-  echo -e "${GREEN}✓${NC} gap add idempotent (duplicate returns created=false)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap add not idempotent"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 3: gap add nice-to-have
-$FLOWCTL gap add --epic "$EPIC1" --capability "Optional caching" --priority nice-to-have --json >/dev/null
-
-# Test 4: gap list
-gap_list_count="$($FLOWCTL gap list --epic "$EPIC1" --json | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
-if [[ "$gap_list_count" == "2" ]]; then
-  echo -e "${GREEN}✓${NC} gap list returns correct count"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap list count wrong (expected 2, got $gap_list_count)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 5: gap list with status filter
-gap_open_count="$($FLOWCTL gap list --epic "$EPIC1" --status open --json | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
-if [[ "$gap_open_count" == "2" ]]; then
-  echo -e "${GREEN}✓${NC} gap list --status open filter works"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap list --status filter wrong (expected 2, got $gap_open_count)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 6: gap check fails with open required gap
-if ! $FLOWCTL gap check --epic "$EPIC1" --json >/dev/null 2>&1; then
-  echo -e "${GREEN}✓${NC} gap check fails with open blocking gaps (exit 1)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap check should fail with open blocking gaps"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 7: gap check JSON has gate=fail
-gap_check_gate="$($FLOWCTL gap check --epic "$EPIC1" --json 2>/dev/null || true)"
-gap_gate_val="$(echo "$gap_check_gate" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("gate", ""))')"
-if [[ "$gap_gate_val" == "fail" ]]; then
-  echo -e "${GREEN}✓${NC} gap check gate=fail in JSON output"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap check gate expected 'fail', got '$gap_gate_val'"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 8: gap resolve
-gap_resolve_result="$($FLOWCTL gap resolve --epic "$EPIC1" --capability "Missing auth check" --evidence "Added in auth.py:42" --json)"
-gap_changed="$(echo "$gap_resolve_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("changed", False))')"
-if [[ "$gap_changed" == "True" ]]; then
-  echo -e "${GREEN}✓${NC} gap resolve marks gap as resolved"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap resolve failed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 9: gap resolve idempotent
-gap_resolve_dup="$($FLOWCTL gap resolve --epic "$EPIC1" --capability "Missing auth check" --evidence "duplicate" --json)"
-gap_dup_changed="$(echo "$gap_resolve_dup" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("changed", False))')"
-if [[ "$gap_dup_changed" == "False" ]]; then
-  echo -e "${GREEN}✓${NC} gap resolve idempotent (already resolved)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap resolve not idempotent"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 10: gap check passes (only nice-to-have left)
-if $FLOWCTL gap check --epic "$EPIC1" --json >/dev/null 2>&1; then
-  echo -e "${GREEN}✓${NC} gap check passes (nice-to-have does not block)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap check should pass with only nice-to-have gaps"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 11: gap check gate=pass in JSON
-gap_pass_gate="$($FLOWCTL gap check --epic "$EPIC1" --json)"
-gap_pass_val="$(echo "$gap_pass_gate" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("gate", ""))')"
-if [[ "$gap_pass_val" == "pass" ]]; then
-  echo -e "${GREEN}✓${NC} gap check gate=pass in JSON output"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} gap check gate expected 'pass', got '$gap_pass_val'"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "${YELLOW}--- memory commands ---${NC}"
-$FLOWCTL config set memory.enabled true --json >/dev/null
-$FLOWCTL memory init --json >/dev/null
-if [[ -d ".flow/memory/entries" ]]; then
-  echo -e "${GREEN}✓${NC} memory init creates entries dir"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} memory init creates entries dir"
-  FAIL=$((FAIL + 1))
-fi
-
-add_result="$($FLOWCTL memory add pitfall "Test pitfall entry" --json)"
-add_ok="$(echo "$add_result" | "$PYTHON_BIN" -c 'import json,sys; d=json.load(sys.stdin); print(d.get("success",False) and d.get("type")=="pitfall")')"
-if [[ "$add_ok" == "True" ]]; then
-  echo -e "${GREEN}✓${NC} memory add pitfall"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} memory add pitfall"
-  FAIL=$((FAIL + 1))
-fi
-
-$FLOWCTL memory add convention "Test convention" --json >/dev/null
-$FLOWCTL memory add decision "Test decision" --json >/dev/null
-list_json="$($FLOWCTL memory list --json)"
-"$PYTHON_BIN" - <<'PY' "$list_json"
-import json, sys
-data = json.loads(sys.argv[1])
-assert data["success"] == True
-counts = data["counts"]
-assert counts.get("pitfall", 0) >= 1
-assert counts.get("convention", 0) >= 1
-assert counts.get("decision", 0) >= 1
-assert data["total"] >= 3
-PY
-echo -e "${GREEN}✓${NC} memory list"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- schema v1 validate ---${NC}"
-"$PYTHON_BIN" - <<'PY'
-import json
-from pathlib import Path
-path = Path(".flow/meta.json")
-data = json.loads(path.read_text())
-data["schema_version"] = 1
-path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
-PY
-$FLOWCTL validate --all --json >/dev/null
-echo -e "${GREEN}✓${NC} schema v1 validate"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- codex commands ---${NC}"
-# Test codex check (may or may not have codex installed)
-codex_check_json="$($FLOWCTL codex check --json 2>/dev/null || echo '{"success":true}')"
-"$PYTHON_BIN" - <<'PY' "$codex_check_json"
-import json, sys
-data = json.loads(sys.argv[1])
-assert data["success"] == True, f"codex check failed: {data}"
-# available can be true or false depending on codex install
-PY
-echo -e "${GREEN}✓${NC} codex check"
-PASS=$((PASS + 1))
-
-# Test codex impl-review help (no codex required for argparse check)
-set +e
-$FLOWCTL codex impl-review --help >/dev/null 2>&1
-rc=$?
-set -e
-if [[ "$rc" -eq 0 ]]; then
-  echo -e "${GREEN}✓${NC} codex impl-review --help"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} codex impl-review --help"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test codex plan-review help
-set +e
-$FLOWCTL codex plan-review --help >/dev/null 2>&1
-rc=$?
-set -e
-if [[ "$rc" -eq 0 ]]; then
-  echo -e "${GREEN}✓${NC} codex plan-review --help"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} codex plan-review --help"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "${YELLOW}--- context hints ---${NC}"
-# Create files in same commit, then modify one to test context hints
-mkdir -p "$TEST_DIR/repo/src"
-# First commit: both auth.py and handler.py together
-cat > "$TEST_DIR/repo/src/auth.py" << 'EOF'
-def validate_token(token: str) -> bool:
-    """Validate JWT token."""
-    return len(token) > 10
-
-class User:
-    def __init__(self, name: str):
-        self.name = name
-EOF
-cat > "$TEST_DIR/repo/src/handler.py" << 'EOF'
-from auth import validate_token, User
-
-def handle_request(token: str):
-    if validate_token(token):
-        return User("test")
-    return None
-EOF
-git -C "$TEST_DIR/repo" add src/
-git -C "$TEST_DIR/repo" commit -m "Add auth and handler" >/dev/null
-
-# Second commit: only modify auth.py (handler.py stays unchanged)
-cat > "$TEST_DIR/repo/src/auth.py" << 'EOF'
-def validate_token(token: str) -> bool:
-    """Validate JWT token with expiry check."""
-    if len(token) < 10:
-        return False
-    return True
-
-class User:
-    def __init__(self, name: str, email: str = ""):
-        self.name = name
-        self.email = email
-EOF
-git -C "$TEST_DIR/repo" add src/auth.py
-git -C "$TEST_DIR/repo" commit -m "Update auth with expiry" >/dev/null
-
-# Test context hints: should find handler.py referencing validate_token/User
-cd "$TEST_DIR/repo"
-hints_output="$(PYTHONPATH="$SCRIPT_DIR" "$PYTHON_BIN" -c "
-from flowctl import gather_context_hints
-hints = gather_context_hints('HEAD~1')
-print(hints)
-" 2>&1)"
-
-# Verify hints mention handler.py referencing validate_token or User
-if echo "$hints_output" | grep -q "handler.py"; then
-  echo -e "${GREEN}✓${NC} context hints finds references"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} context hints finds references (got: $hints_output)"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "${YELLOW}--- build_review_prompt ---${NC}"
-# Go back to plugin root for Python tests
-cd "$TEST_DIR/repo"
-# Test that build_review_prompt generates proper structure
-"$PYTHON_BIN" - "$SCRIPT_DIR" <<'PY'
-import sys
-sys.path.insert(0, sys.argv[1])
-from flowctl import build_review_prompt
-
-# Test impl prompt has all 7 criteria
-impl_prompt = build_review_prompt("impl", "Test spec", "Test hints", "Test diff")
-assert "<review_instructions>" in impl_prompt
-assert "Correctness" in impl_prompt
-assert "Simplicity" in impl_prompt
-assert "DRY" in impl_prompt
-assert "Architecture" in impl_prompt
-assert "Edge Cases" in impl_prompt
-assert "Tests" in impl_prompt
-assert "Security" in impl_prompt
-assert "<verdict>SHIP</verdict>" in impl_prompt
-assert "File:Line" in impl_prompt  # Structured output format
-
-# Test plan prompt has all 7 criteria
-plan_prompt = build_review_prompt("plan", "Test spec", "Test hints")
-assert "Completeness" in plan_prompt
-assert "Feasibility" in plan_prompt
-assert "Clarity" in plan_prompt
-assert "Architecture" in plan_prompt
-assert "Risks" in plan_prompt
-assert "Scope" in plan_prompt
-assert "Testability" in plan_prompt
-assert "<verdict>SHIP</verdict>" in plan_prompt
-
-# Test context hints and diff are included
-assert "<context_hints>" in impl_prompt
-assert "Test hints" in impl_prompt
-assert "<diff_summary>" in impl_prompt
-assert "Test diff" in impl_prompt
-assert "<spec>" in impl_prompt
-assert "Test spec" in impl_prompt
-PY
-echo -e "${GREEN}✓${NC} build_review_prompt has full criteria"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- parse_receipt_path ---${NC}"
-# Test receipt path parsing for Ralph gating (both legacy and new fn-N-xxx formats)
-"$PYTHON_BIN" - "$SCRIPT_DIR/hooks" <<'PY'
-import sys
-hooks_dir = sys.argv[1]
-sys.path.insert(0, hooks_dir)
-from importlib.util import spec_from_file_location, module_from_spec
-spec = spec_from_file_location("ralph_guard", f"{hooks_dir}/ralph-guard.py")
-guard = module_from_spec(spec)
-spec.loader.exec_module(guard)
-
-# Test plan receipt parsing (legacy format)
-rtype, rid = guard.parse_receipt_path("/tmp/receipts/plan-fn-1.json")
-assert rtype == "plan_review", f"Expected plan_review, got {rtype}"
-assert rid == "fn-1", f"Expected fn-1, got {rid}"
-
-# Test impl receipt parsing (legacy format)
-rtype, rid = guard.parse_receipt_path("/tmp/receipts/impl-fn-1.3.json")
-assert rtype == "impl_review", f"Expected impl_review, got {rtype}"
-assert rid == "fn-1.3", f"Expected fn-1.3, got {rid}"
-
-# Test plan receipt parsing (new fn-N-xxx format)
-rtype, rid = guard.parse_receipt_path("/tmp/receipts/plan-fn-5-x7k.json")
-assert rtype == "plan_review", f"Expected plan_review, got {rtype}"
-assert rid == "fn-5-x7k", f"Expected fn-5-x7k, got {rid}"
-
-# Test impl receipt parsing (new fn-N-xxx format)
-rtype, rid = guard.parse_receipt_path("/tmp/receipts/impl-fn-5-x7k.3.json")
-assert rtype == "impl_review", f"Expected impl_review, got {rtype}"
-assert rid == "fn-5-x7k.3", f"Expected fn-5-x7k.3, got {rid}"
-
-# Test completion receipt parsing (legacy format)
-rtype, rid = guard.parse_receipt_path("/tmp/receipts/completion-fn-2.json")
-assert rtype == "completion_review", f"Expected completion_review, got {rtype}"
-assert rid == "fn-2", f"Expected fn-2, got {rid}"
-
-# Test completion receipt parsing (new fn-N-xxx format)
-rtype, rid = guard.parse_receipt_path("/tmp/receipts/completion-fn-7-abc.json")
-assert rtype == "completion_review", f"Expected completion_review, got {rtype}"
-assert rid == "fn-7-abc", f"Expected fn-7-abc, got {rid}"
-
-# Test fallback
-rtype, rid = guard.parse_receipt_path("/tmp/unknown.json")
-assert rtype == "impl_review"
-assert rid == "UNKNOWN"
-PY
-echo -e "${GREEN}✓${NC} parse_receipt_path works"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- codex e2e (requires codex CLI) ---${NC}"
-# Check if codex is available (handles its own auth)
-codex_available="$($FLOWCTL codex check --json 2>/dev/null | "$PYTHON_BIN" -c "import sys,json; print(json.load(sys.stdin).get('available', False))" 2>/dev/null || echo "False")"
-if [[ "$codex_available" == "True" ]]; then
-  # Create a simple epic + task for testing
-  EPIC3_JSON="$($FLOWCTL epic create --title "Codex test epic" --json)"
-  EPIC3="$(echo "$EPIC3_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-  $FLOWCTL task create --epic "$EPIC3" --title "Test task" --json >/dev/null
-
-  # Write a simple spec
-  cat > ".flow/specs/${EPIC3}.md" << 'EOF'
-# Codex Test Epic
-
-Simple test epic for smoke testing codex reviews.
-
-## Scope
-- Test that codex can review a plan
-- Test that codex can review an implementation
-EOF
-
-  cat > ".flow/tasks/${EPIC3}.1.md" << 'EOF'
-# Test Task
-
-Add a simple hello world function.
-
-## Acceptance
-- Function returns "hello world"
-EOF
-
-  # Test plan-review e2e
-  # Create a simple code file inside the repo for the plan to reference
-  mkdir -p src
-  echo 'def hello(): return "hello world"' > src/hello.py
-  set +e
-  plan_result="$($FLOWCTL codex plan-review "$EPIC3" --files "src/hello.py" --base main --receipt "$TEST_DIR/plan-receipt.json" --json 2>&1)"
-  plan_rc=$?
-  set -e
-
-  if [[ "$plan_rc" -eq 0 ]]; then
-    # Verify receipt was written with correct schema
-    if [[ -f "$TEST_DIR/plan-receipt.json" ]]; then
-      "$PYTHON_BIN" - "$TEST_DIR/plan-receipt.json" "$EPIC3" <<'PY'
-import sys, json
-from pathlib import Path
-data = json.loads(Path(sys.argv[1]).read_text())
-expected_id = sys.argv[2]
-assert data.get("type") == "plan_review", f"Expected type=plan_review, got {data.get('type')}"
-assert data.get("id") == expected_id, f"Expected id={expected_id}, got {data.get('id')}"
-assert data.get("mode") == "codex", f"Expected mode=codex, got {data.get('mode')}"
-assert "verdict" in data, "Missing verdict in receipt"
-assert "session_id" in data, "Missing session_id in receipt"
-PY
-      echo -e "${GREEN}✓${NC} codex plan-review e2e"
-      PASS=$((PASS + 1))
-    else
-      echo -e "${RED}✗${NC} codex plan-review e2e (no receipt)"
-      FAIL=$((FAIL + 1))
-    fi
-  else
-    echo -e "${RED}✗${NC} codex plan-review e2e (exit $plan_rc)"
-    FAIL=$((FAIL + 1))
-  fi
-
-  # Test impl-review e2e (create a simple change first)
-  cat > "$TEST_DIR/repo/src/hello.py" << 'EOF'
-def hello():
-    return "hello world"
-EOF
-  git -C "$TEST_DIR/repo" add src/hello.py
-  git -C "$TEST_DIR/repo" commit -m "Add hello function" >/dev/null
-
-  set +e
-  impl_result="$($FLOWCTL codex impl-review "${EPIC3}.1" --base HEAD~1 --receipt "$TEST_DIR/impl-receipt.json" --json 2>&1)"
-  impl_rc=$?
-  set -e
-
-  if [[ "$impl_rc" -eq 0 ]]; then
-    # Verify receipt was written with correct schema
-    if [[ -f "$TEST_DIR/impl-receipt.json" ]]; then
-      "$PYTHON_BIN" - "$TEST_DIR/impl-receipt.json" "$EPIC3" <<'PY'
-import sys, json
-from pathlib import Path
-data = json.loads(Path(sys.argv[1]).read_text())
-expected_id = f"{sys.argv[2]}.1"
-assert data.get("type") == "impl_review", f"Expected type=impl_review, got {data.get('type')}"
-assert data.get("id") == expected_id, f"Expected id={expected_id}, got {data.get('id')}"
-assert data.get("mode") == "codex", f"Expected mode=codex, got {data.get('mode')}"
-assert "verdict" in data, "Missing verdict in receipt"
-assert "session_id" in data, "Missing session_id in receipt"
-PY
-      echo -e "${GREEN}✓${NC} codex impl-review e2e"
-      PASS=$((PASS + 1))
-    else
-      echo -e "${RED}✗${NC} codex impl-review e2e (no receipt)"
-      FAIL=$((FAIL + 1))
-    fi
-  else
-    echo -e "${RED}✗${NC} codex impl-review e2e (exit $impl_rc)"
-    FAIL=$((FAIL + 1))
-  fi
-else
-  echo -e "${YELLOW}⊘${NC} codex e2e skipped (codex not available)"
-fi
-
-echo -e "${YELLOW}--- depends_on_epics gate ---${NC}"
-cd "$TEST_DIR/repo"  # Back to test repo
-# Create epics and capture their IDs
-DEP_BASE_JSON="$($FLOWCTL epic create --title "Dep base" --json)"
-DEP_BASE_ID="$(echo "$DEP_BASE_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-$FLOWCTL task create --epic "$DEP_BASE_ID" --title "Base task" --json >/dev/null
-DEP_CHILD_JSON="$($FLOWCTL epic create --title "Dep child" --json)"
-DEP_CHILD_ID="$(echo "$DEP_CHILD_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-"$PYTHON_BIN" - "$DEP_CHILD_ID" "$DEP_BASE_ID" <<'PY'
-import json, sys
-from pathlib import Path
-child_id, base_id = sys.argv[1], sys.argv[2]
-path = Path(f".flow/epics/{child_id}.json")
-data = json.loads(path.read_text())
-data["depends_on_epics"] = [base_id]
-path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
-PY
-printf '{"epics":["%s"]}\n' "$DEP_CHILD_ID" > "$TEST_DIR/epics.json"
-blocked_json="$($FLOWCTL next --epics-file "$TEST_DIR/epics.json" --json)"
-"$PYTHON_BIN" - "$DEP_CHILD_ID" "$blocked_json" <<'PY'
-import json, sys
-child_id = sys.argv[1]
-data = json.loads(sys.argv[2])
-assert data["status"] == "none"
-assert data["reason"] == "blocked_by_epic_deps"
-assert child_id in data.get("blocked_epics", {})
-PY
-echo -e "${GREEN}✓${NC} depends_on_epics blocks"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- stdin support ---${NC}"
-cd "$TEST_DIR/repo"
-STDIN_EPIC_JSON="$($FLOWCTL epic create --title "Stdin test" --json)"
-STDIN_EPIC="$(echo "$STDIN_EPIC_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-# Test epic set-plan with stdin
-$FLOWCTL epic plan "$STDIN_EPIC" --file - --json <<'EOF'
-# Stdin Test Plan
-
-## Overview
-Testing stdin support for set-plan.
-
-## Acceptance
-- Works via stdin
-EOF
-# Verify content was written
-spec_content="$($FLOWCTL cat "$STDIN_EPIC")"
-echo "$spec_content" | grep -q "Testing stdin support" || { echo "stdin set-plan failed"; FAIL=$((FAIL + 1)); }
-echo -e "${GREEN}✓${NC} stdin epic set-plan"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- task set-spec combined ---${NC}"
-$FLOWCTL task create --epic "$STDIN_EPIC" --title "Set-spec test" --json >/dev/null
-SETSPEC_TASK="${STDIN_EPIC}.1"
-# Write temp files for combined set-spec
-echo 'This is the description.' > "$TEST_DIR/desc.md"
-echo '- [ ] Check 1
-- [ ] Check 2' > "$TEST_DIR/acc.md"
-$FLOWCTL task set-spec "$SETSPEC_TASK" --description "$TEST_DIR/desc.md" --acceptance "$TEST_DIR/acc.md" --json >/dev/null
-# Verify both sections were written
-task_spec="$($FLOWCTL cat "$SETSPEC_TASK")"
-echo "$task_spec" | grep -q "This is the description" || { echo "set-spec description failed"; FAIL=$((FAIL + 1)); }
-echo "$task_spec" | grep -q "Check 1" || { echo "set-spec acceptance failed"; FAIL=$((FAIL + 1)); }
-echo -e "${GREEN}✓${NC} task set-spec combined"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- task set-spec --file (full replacement) ---${NC}"
-$FLOWCTL task create --epic "$STDIN_EPIC" --title "Full replacement test" --json >/dev/null
-FULLREPLACE_TASK="${STDIN_EPIC}.2"
-# Write complete spec file
-cat > "$TEST_DIR/full_spec.md" << 'FULLSPEC'
-# Task: Full replacement test
-
-## Description
-
-This is a completely new spec that replaces everything.
-
-## Acceptance
-
-- [ ] Verify full replacement works
-- [ ] Original content is gone
-FULLSPEC
-$FLOWCTL task set-spec "$FULLREPLACE_TASK" --file "$TEST_DIR/full_spec.md" --json >/dev/null
-# Verify full replacement
-full_spec="$($FLOWCTL cat "$FULLREPLACE_TASK")"
-echo "$full_spec" | grep -q "completely new spec that replaces everything" || { echo "set-spec --file content failed"; FAIL=$((FAIL + 1)); }
-echo "$full_spec" | grep -q "Verify full replacement works" || { echo "set-spec --file acceptance failed"; FAIL=$((FAIL + 1)); }
-echo -e "${GREEN}✓${NC} task set-spec --file"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- task set-spec --file stdin ---${NC}"
-$FLOWCTL task create --epic "$STDIN_EPIC" --title "Stdin replacement test" --json >/dev/null
-STDIN_REPLACE_TASK="${STDIN_EPIC}.3"
-# Full replacement via stdin
-$FLOWCTL task set-spec "$STDIN_REPLACE_TASK" --file - --json <<'EOF'
-# Task: Stdin replacement test
-
-## Description
-
-This spec was written via stdin.
-
-## Acceptance
-
-- [ ] Stdin replacement works
-EOF
-# Verify stdin replacement
-stdin_spec="$($FLOWCTL cat "$STDIN_REPLACE_TASK")"
-echo "$stdin_spec" | grep -q "spec was written via stdin" || { echo "set-spec --file stdin failed"; FAIL=$((FAIL + 1)); }
-echo -e "${GREEN}✓${NC} task set-spec --file stdin"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- checkpoint save/restore ---${NC}"
-# Save checkpoint
-$FLOWCTL checkpoint save --epic "$STDIN_EPIC" --json >/dev/null
-# Verify checkpoint file exists
-[[ -f ".flow/.checkpoint-${STDIN_EPIC}.json" ]] || { echo "checkpoint file not created"; FAIL=$((FAIL + 1)); }
-# Modify epic spec
-$FLOWCTL epic plan "$STDIN_EPIC" --file - --json <<'EOF'
-# Modified content
-EOF
-# Restore from checkpoint
-$FLOWCTL checkpoint restore --epic "$STDIN_EPIC" --json >/dev/null
-# Verify original content restored
-restored_spec="$($FLOWCTL cat "$STDIN_EPIC")"
-echo "$restored_spec" | grep -q "Testing stdin support" || { echo "checkpoint restore failed"; FAIL=$((FAIL + 1)); }
-# Delete checkpoint
-$FLOWCTL checkpoint delete --epic "$STDIN_EPIC" --json >/dev/null
-[[ ! -f ".flow/.checkpoint-${STDIN_EPIC}.json" ]] || { echo "checkpoint delete failed"; FAIL=$((FAIL + 1)); }
-echo -e "${GREEN}✓${NC} checkpoint save/restore/delete"
-PASS=$((PASS + 1))
-
-echo -e "${YELLOW}--- sync command files ---${NC}"
-# Test 1: Command stub exists
-if [[ -f "$PLUGIN_ROOT/commands/flow-code/sync.md" ]]; then
-  echo -e "${GREEN}✓${NC} sync command stub exists"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} sync command stub missing"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: Skill file exists
-if [[ -f "$PLUGIN_ROOT/skills/flow-code-sync/SKILL.md" ]]; then
-  echo -e "${GREEN}✓${NC} sync skill exists"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} sync skill missing"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 3: Command invokes skill
-if grep -q "flow-code-sync" "$PLUGIN_ROOT/commands/flow-code/sync.md"; then
-  echo -e "${GREEN}✓${NC} sync command invokes skill"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} sync command doesn't reference skill"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 4: Skill has correct frontmatter
-if grep -q "name: flow-code-sync" "$PLUGIN_ROOT/skills/flow-code-sync/SKILL.md"; then
-  echo -e "${GREEN}✓${NC} sync skill has correct name"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} sync skill missing name frontmatter"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 5: Skill mentions plan-sync agent
-if grep -q "plan-sync" "$PLUGIN_ROOT/skills/flow-code-sync/SKILL.md"; then
-  echo -e "${GREEN}✓${NC} sync skill references plan-sync agent"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} sync skill doesn't reference plan-sync agent"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 6: Skill supports dry-run
-if grep -qi "dry.run\|dry-run\|DRY_RUN" "$PLUGIN_ROOT/skills/flow-code-sync/SKILL.md"; then
-  echo -e "${GREEN}✓${NC} sync skill supports dry-run"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} sync skill missing dry-run support"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- task duration tracking ---${NC}"
-
-# Setup: create epic + task, start and complete with a small delay
-DUR_EPIC_JSON="$($FLOWCTL epic create --title "Duration test" --json)"
-DUR_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$DUR_EPIC_JSON")"
-$FLOWCTL task create --epic "$DUR_EPIC" --title "Timed task" --json > /dev/null
-$FLOWCTL start "${DUR_EPIC}.1" --json > /dev/null
-sleep 1
-result="$($FLOWCTL done "${DUR_EPIC}.1" --summary "done" --evidence '{"commits":[],"tests":[],"prs":[]}' --json)"
-
-# Test 1: duration_seconds present in JSON output
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert "duration_seconds" in data, f"missing duration_seconds: {data}"
-assert data["duration_seconds"] >= 1, f"expected >= 1s, got {data['duration_seconds']}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} duration_seconds in done output (>= 1s)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} duration_seconds missing or too small"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: duration rendered in spec markdown
-SPEC="$($FLOWCTL cat "${DUR_EPIC}.1")"
-if echo "$SPEC" | grep -q "Duration:"; then
-  echo -e "${GREEN}✓${NC} duration rendered in spec evidence"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} duration not in spec"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- workspace_changes evidence ---${NC}"
-
-# Setup: create epic + task, start it
-WS_EPIC_JSON="$($FLOWCTL epic create --title "Workspace test" --json)"
-WS_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$WS_EPIC_JSON")"
-$FLOWCTL task create --epic "$WS_EPIC" --title "WS task" --json > /dev/null
-$FLOWCTL start "${WS_EPIC}.1" --json > /dev/null
-
-# Test 1: valid workspace_changes renders in spec
-WS_EVIDENCE='{"commits":["abc"],"tests":["pytest"],"prs":[],"workspace_changes":{"baseline_rev":"aaa111bbb","final_rev":"ccc222ddd","files_changed":5,"insertions":120,"deletions":30}}'
-result="$($FLOWCTL done "${WS_EPIC}.1" --summary "done" --evidence "$WS_EVIDENCE" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("status") == "done"
-assert "warning" not in data, f"unexpected warning: {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} valid workspace_changes accepted without warning"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} valid workspace_changes should not warn"
-  FAIL=$((FAIL + 1))
-fi
-
-# Check spec has workspace line
-WS_SPEC="$($FLOWCTL cat "${WS_EPIC}.1")"
-if echo "$WS_SPEC" | grep -q "5 files changed"; then
-  echo -e "${GREEN}✓${NC} workspace_changes rendered in spec markdown"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} workspace_changes not in spec"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: malformed workspace_changes triggers warning
-$FLOWCTL task reset "${WS_EPIC}.1" --json > /dev/null
-$FLOWCTL start "${WS_EPIC}.1" --force --json > /dev/null
-BAD_EVIDENCE='{"commits":[],"tests":[],"prs":[],"workspace_changes":{"baseline_rev":"aaa"}}'
-result="$($FLOWCTL done "${WS_EPIC}.1" --summary "done" --evidence "$BAD_EVIDENCE" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("status") == "done"
-assert "warning" in data, f"expected warning for missing keys: {data}"
-assert "missing keys" in data["warning"]
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} malformed workspace_changes warns but completes"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} malformed workspace_changes handling failed"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- files ownership map ---${NC}"
-
-# Setup: epic + tasks with --files
-FO_EPIC_JSON="$($FLOWCTL epic create --title "Files test" --json)"
-FO_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$FO_EPIC_JSON")"
-$FLOWCTL task create --epic "$FO_EPIC" --title "T1" --files "src/auth.ts,src/middleware.ts" --json > /dev/null
-$FLOWCTL task create --epic "$FO_EPIC" --title "T2" --files "src/routes.ts" --json > /dev/null
-$FLOWCTL task create --epic "$FO_EPIC" --title "T3" --files "src/auth.ts" --json > /dev/null
-
-# Test 1: files stored in task JSON
-result="$($FLOWCTL show "${FO_EPIC}.1" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-d = json.loads(sys.argv[1])
-assert d.get("files") == ["src/auth.ts", "src/middleware.ts"], f"unexpected files: {d.get('files')}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} --files stored in task JSON"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} --files not stored"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: files command detects ownership + conflicts
-result="$($FLOWCTL files --epic "$FO_EPIC" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-d = json.loads(sys.argv[1])
-assert d["file_count"] == 3, f"expected 3 files, got {d['file_count']}"
-assert d["conflict_count"] == 1, f"expected 1 conflict, got {d['conflict_count']}"
-assert "src/auth.ts" in d["conflicts"], f"src/auth.ts should conflict: {d['conflicts']}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} files command detects ownership + conflicts"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} files command failed"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- memory verify + staleness ---${NC}"
-
-# Setup: enable memory + add entry
-$FLOWCTL config set memory.enabled true --json > /dev/null
-$FLOWCTL memory init --json > /dev/null
-$FLOWCTL memory add pitfall "Test pitfall for verify" --json > /dev/null
-
-# Test 1: memory verify updates last_verified
-result="$($FLOWCTL memory verify 1 --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("id") == 1
-assert "last_verified" in data
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} memory verify updates last_verified"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} memory verify failed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: memory list includes last_verified and stale flag in JSON
-result="$($FLOWCTL memory list --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-entry = data["index"][0]
-assert "last_verified" in entry, f"missing last_verified: {entry}"
-assert "stale" in entry, f"missing stale flag: {entry}"
-assert entry["stale"] == False, f"newly verified should not be stale: {entry}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} memory list shows last_verified + stale flag"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} memory list missing staleness fields"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 3: epic close includes retro_suggested
-EPC_EPIC_JSON="$($FLOWCTL epic create --title "Retro prompt test" --json)"
-EPC_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$EPC_EPIC_JSON")"
-$FLOWCTL task create --epic "$EPC_EPIC" --title "Done task" --json > /dev/null
-$FLOWCTL start "${EPC_EPIC}.1" --json > /dev/null
-$FLOWCTL done "${EPC_EPIC}.1" --summary "ok" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
-result="$($FLOWCTL epic close "$EPC_EPIC" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("retro_suggested") == True, f"missing retro_suggested: {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} epic close suggests retro"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} epic close missing retro suggestion"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- restart command ---${NC}"
-
-# Setup: create epic + 3 tasks with deps: .1 -> .2 -> .3
-RST_EPIC_JSON="$($FLOWCTL epic create --title "Restart test" --json)"
-RST_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$RST_EPIC_JSON")"
-$FLOWCTL task create --epic "$RST_EPIC" --title "Task 1" --json > /dev/null
-$FLOWCTL task create --epic "$RST_EPIC" --title "Task 2" --deps "${RST_EPIC}.1" --json > /dev/null
-$FLOWCTL task create --epic "$RST_EPIC" --title "Task 3" --deps "${RST_EPIC}.2" --json > /dev/null
-
-# Complete tasks 1, 2, 3
-$FLOWCTL start "${RST_EPIC}.1" --json > /dev/null
-$FLOWCTL done "${RST_EPIC}.1" --summary "done" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
-$FLOWCTL start "${RST_EPIC}.2" --json > /dev/null
-$FLOWCTL done "${RST_EPIC}.2" --summary "done" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
-$FLOWCTL start "${RST_EPIC}.3" --json > /dev/null
-$FLOWCTL done "${RST_EPIC}.3" --summary "done" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
-
-# Test 1: restart --dry-run shows what would be reset
-result="$($FLOWCTL restart "${RST_EPIC}.1" --dry-run --json)"
-"$PYTHON_BIN" - "$result" "$RST_EPIC" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-ep = sys.argv[2]
-assert data.get("dry_run") == True, f"expected dry_run=True, got {data}"
-assert f"{ep}.1" in data.get("would_reset", []), f"{ep}.1 not in would_reset: {data}"
-assert f"{ep}.2" in data.get("would_reset", []), f"{ep}.2 not in would_reset: {data}"
-assert f"{ep}.3" in data.get("would_reset", []), f"{ep}.3 not in would_reset: {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} restart --dry-run shows target + downstream"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} restart --dry-run failed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: restart actually resets target + downstream
-result="$($FLOWCTL restart "${RST_EPIC}.1" --json)"
-"$PYTHON_BIN" - "$result" "$RST_EPIC" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-ep = sys.argv[2]
-assert data.get("success") == True, f"expected success, got {data}"
-assert f"{ep}.1" in data.get("reset", []), f"{ep}.1 not in reset: {data}"
-assert f"{ep}.2" in data.get("reset", []), f"{ep}.2 not in reset: {data}"
-assert f"{ep}.3" in data.get("reset", []), f"{ep}.3 not in reset: {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} restart cascades to downstream dependents"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} restart cascade failed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 3: verify tasks are back to todo
-result="$($FLOWCTL show "${RST_EPIC}.1" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("status") == "todo", f"expected todo, got {data.get('status')}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} restarted task status is todo"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} restarted task not todo"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 4: restart already-todo is no-op
-result="$($FLOWCTL restart "${RST_EPIC}.1" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("success") == True
-assert len(data.get("reset", [])) == 0, f"expected empty reset, got {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} restart already-todo is idempotent no-op"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} restart idempotent check failed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 5: restart rejects in_progress without --force
-$FLOWCTL start "${RST_EPIC}.1" --json > /dev/null
-set +e
-result="$($FLOWCTL restart "${RST_EPIC}.1" --json 2>&1)"
-rc=$?
-set -e
-"$PYTHON_BIN" - "$result" "$rc" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-rc = int(sys.argv[2])
-assert rc != 0, f"expected non-zero exit, got {rc}"
-assert "in progress" in data.get("error", "").lower() or "in_progress" in str(data).lower(), f"expected in_progress error: {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} restart blocks on in_progress without --force"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} restart should block in_progress"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 6: restart --force overrides in_progress
-result="$($FLOWCTL restart "${RST_EPIC}.1" --force --json)"
-"$PYTHON_BIN" - "$result" "$RST_EPIC" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-ep = sys.argv[2]
-assert data.get("success") == True
-assert f"{ep}.1" in data.get("reset", [])
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} restart --force overrides in_progress"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} restart --force failed"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- review-backend --compare ---${NC}"
-
-# Create mock receipt files
-cat > "$TEST_DIR/receipt-codex.json" << 'EOF'
-{"type":"impl_review","id":"fn-1.1","mode":"codex","verdict":"SHIP","timestamp":"2026-03-30T00:00:00Z","review":"Looks good"}
-EOF
-cat > "$TEST_DIR/receipt-rp.json" << 'EOF'
-{"type":"impl_review","id":"fn-1.1","mode":"rp","verdict":"SHIP","timestamp":"2026-03-30T00:00:00Z","review":"LGTM"}
-EOF
-cat > "$TEST_DIR/receipt-conflict.json" << 'EOF'
-{"type":"impl_review","id":"fn-1.1","mode":"rp","verdict":"NEEDS_WORK","timestamp":"2026-03-30T00:00:00Z","review":"Needs fixes"}
-EOF
-
-# Test 1: compare with consensus (both SHIP)
-result="$($FLOWCTL review-backend --compare "$TEST_DIR/receipt-codex.json,$TEST_DIR/receipt-rp.json" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("consensus") == "SHIP", f"expected SHIP consensus, got {data}"
-assert data.get("has_conflict") == False, f"expected no conflict: {data}"
-assert data.get("reviews") == 2, f"expected 2 reviews: {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} review-backend --compare consensus detected"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} review-backend --compare consensus failed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: compare with conflict (SHIP vs NEEDS_WORK)
-result="$($FLOWCTL review-backend --compare "$TEST_DIR/receipt-codex.json,$TEST_DIR/receipt-conflict.json" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("has_conflict") == True, f"expected conflict: {data}"
-assert data.get("consensus") is None, f"expected no consensus: {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} review-backend --compare conflict detected"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} review-backend --compare conflict failed"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- review receipt archival ---${NC}"
-
-# Setup: create epic + task
-RR_EPIC_JSON="$($FLOWCTL epic create --title "Receipt test" --json)"
-RR_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$RR_EPIC_JSON")"
-$FLOWCTL task create --epic "$RR_EPIC" --title "Task with review" --json > /dev/null
-$FLOWCTL start "${RR_EPIC}.1" --json > /dev/null
-
-# Test 1: done with review_receipt archives to .flow/reviews/
-RR_EVIDENCE="{\"commits\":[\"x1\"],\"tests\":[],\"prs\":[],\"review_receipt\":{\"type\":\"impl_review\",\"id\":\"${RR_EPIC}.1\",\"mode\":\"codex\",\"verdict\":\"SHIP\",\"timestamp\":\"2026-03-30T00:00:00Z\",\"review\":\"LGTM\"}}"
-$FLOWCTL done "${RR_EPIC}.1" --summary "done" --evidence "$RR_EVIDENCE" --json > /dev/null
-if [ -f ".flow/reviews/impl_review-${RR_EPIC}.1-codex.json" ]; then
-  echo -e "${GREEN}✓${NC} review receipt archived to .flow/reviews/"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} review receipt not archived"
-  FAIL=$((FAIL + 1))
-fi
-
-# Add a second receipt (simulate rp review)
-cat > ".flow/reviews/impl_review-${RR_EPIC}.1-rp.json" << 'EOF'
-{"type":"impl_review","id":"PLACEHOLDER","mode":"rp","verdict":"SHIP","timestamp":"2026-03-30T00:01:00Z","review":"Looks good"}
-EOF
-
-# Test 2: review-backend --epic auto-discovers receipts
-result="$($FLOWCTL review-backend --epic "$RR_EPIC" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("reviews") == 2, f"expected 2 reviews, got {data.get('reviews')}"
-assert data.get("consensus") == "SHIP", f"expected SHIP consensus: {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} review-backend --epic auto-discovers receipts"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} review-backend --epic failed"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- task domain tagging ---${NC}"
-
-# Setup: create epic + tasks with domains
-DOM_EPIC_JSON="$($FLOWCTL epic create --title "Domain test" --json)"
-DOM_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$DOM_EPIC_JSON")"
-$FLOWCTL task create --epic "$DOM_EPIC" --title "Build API" --domain backend --json > /dev/null
-$FLOWCTL task create --epic "$DOM_EPIC" --title "Build UI" --domain frontend --json > /dev/null
-$FLOWCTL task create --epic "$DOM_EPIC" --title "No domain" --json > /dev/null
-
-# Test 1: domain stored in task JSON
-result="$($FLOWCTL show "${DOM_EPIC}.1" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("domain") == "backend", f"expected backend, got {data.get('domain')}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} task create stores domain"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} task create domain not stored"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: task without domain has null domain
-result="$($FLOWCTL show "${DOM_EPIC}.3" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("domain") is None, f"expected None, got {data.get('domain')}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} task without domain is null"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} task without domain should be null"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 3: tasks --domain filters correctly
-result="$($FLOWCTL tasks --epic "$DOM_EPIC" --domain backend --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("count") == 1, f"expected 1, got {data.get('count')}"
-assert data["tasks"][0]["domain"] == "backend"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} tasks --domain filters correctly"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} tasks --domain filter failed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 4: tasks without --domain shows all
-result="$($FLOWCTL tasks --epic "$DOM_EPIC" --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("count") == 3, f"expected 3, got {data.get('count')}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} tasks without --domain shows all"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} tasks without --domain should show all"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "\n${YELLOW}--- epic archive/clean ---${NC}"
-
-# Setup: create + close an epic
-ARC_EPIC_JSON="$($FLOWCTL epic create --title "Archive me" --json)"
-ARC_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$ARC_EPIC_JSON")"
-$FLOWCTL task create --epic "$ARC_EPIC" --title "Done task" --json > /dev/null
-$FLOWCTL start "${ARC_EPIC}.1" --json > /dev/null
-$FLOWCTL done "${ARC_EPIC}.1" --summary "ok" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
-$FLOWCTL epic close "$ARC_EPIC" --json > /dev/null
-
-# Test 1: archive moves files
-result="$($FLOWCTL epic archive "$ARC_EPIC" --json)"
-"$PYTHON_BIN" - "$result" "$ARC_EPIC" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-ep = sys.argv[2]
-assert data.get("success") == True, f"expected success: {data}"
-assert data.get("count", 0) >= 3, f"expected >= 3 files moved, got {data.get('count')}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} epic archive moves files to .archive/"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} epic archive failed"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 2: archived epic no longer shows in list
-result="$($FLOWCTL epics --json)"
-"$PYTHON_BIN" - "$result" "$ARC_EPIC" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-ep = sys.argv[2]
-ids = [e["id"] for e in data.get("epics", [])]
-assert ep not in ids, f"{ep} should not be in epics list: {ids}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} archived epic removed from epics list"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} archived epic still in list"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 3: archive dir has the files
-if [ -d ".flow/.archive/$ARC_EPIC" ]; then
-  echo -e "${GREEN}✓${NC} .flow/.archive/$ARC_EPIC/ directory exists"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} archive directory missing"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test 4: epic clean archives all closed epics
-CLEAN_EP1_JSON="$($FLOWCTL epic create --title "Clean1" --json)"
-CLEAN_EP1="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$CLEAN_EP1_JSON")"
-$FLOWCTL task create --epic "$CLEAN_EP1" --title "T1" --json > /dev/null
-$FLOWCTL start "${CLEAN_EP1}.1" --json > /dev/null
-$FLOWCTL done "${CLEAN_EP1}.1" --summary "ok" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
-$FLOWCTL epic close "$CLEAN_EP1" --json > /dev/null
-
-result="$($FLOWCTL epic clean --json)"
-"$PYTHON_BIN" - "$result" <<'PY'
-import json, sys
-data = json.loads(sys.argv[1])
-assert data.get("count", 0) >= 1, f"expected >= 1 archived, got {data}"
-PY
-if [ $? -eq 0 ]; then
-  echo -e "${GREEN}✓${NC} epic clean archives all closed epics"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} epic clean failed"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "${YELLOW}--- parse-findings ---${NC}"
-
-# Test: valid <findings> tag
-FINDINGS_FILE="$TEST_DIR/findings_valid.txt"
-cat > "$FINDINGS_FILE" <<'FINDINGS_EOF'
-Some review preamble text.
-
-<findings>
-[
-  {
-    "title": "Missing input validation",
-    "severity": "critical",
-    "location": "src/auth.py:42",
-    "recommendation": "Add input sanitization"
-  },
-  {
-    "title": "Unused import",
-    "severity": "nitpick",
-    "location": "src/utils.py:1",
-    "recommendation": "Remove unused import"
-  }
-]
-</findings>
-
-More review text after.
-FINDINGS_EOF
-
-pf_result="$($FLOWCTL parse-findings --file "$FINDINGS_FILE" --json)"
-pf_count="$(echo "$pf_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
-if [[ "$pf_count" == "2" ]]; then
-  echo -e "${GREEN}✓${NC} parse-findings extracts findings from <findings> tag"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} parse-findings count wrong (expected 2, got $pf_count)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: missing <findings> tag → graceful empty
-FINDINGS_EMPTY="$TEST_DIR/findings_empty.txt"
-echo "No findings here, just plain review text." > "$FINDINGS_EMPTY"
-
-pf_empty="$($FLOWCTL parse-findings --file "$FINDINGS_EMPTY" --json)"
-pf_empty_count="$(echo "$pf_empty" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
-pf_empty_warns="$(echo "$pf_empty" | "$PYTHON_BIN" -c 'import json,sys; w=json.load(sys.stdin).get("warnings",[]); print(len(w))')"
-if [[ "$pf_empty_count" == "0" ]] && [[ "$pf_empty_warns" -ge 1 ]]; then
-  echo -e "${GREEN}✓${NC} parse-findings gracefully handles missing tags"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} parse-findings missing tag handling wrong (count=$pf_empty_count, warns=$pf_empty_warns)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: malformed JSON (trailing commas)
-FINDINGS_MALFORMED="$TEST_DIR/findings_malformed.txt"
-cat > "$FINDINGS_MALFORMED" <<'FINDINGS_EOF'
-<findings>
-[
-  {
-    "title": "Trailing comma issue",
-    "severity": "major",
-    "location": "src/app.py:10",
-    "recommendation": "Fix the trailing comma",
-  },
-]
-</findings>
-FINDINGS_EOF
-
-pf_mal="$($FLOWCTL parse-findings --file "$FINDINGS_MALFORMED" --json)"
-pf_mal_count="$(echo "$pf_mal" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
-if [[ "$pf_mal_count" == "1" ]]; then
-  echo -e "${GREEN}✓${NC} parse-findings handles malformed JSON (trailing commas)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} parse-findings malformed JSON handling wrong (expected 1, got $pf_mal_count)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: --register auto gap add
-FINDINGS_REG="$TEST_DIR/findings_register.txt"
-cat > "$FINDINGS_REG" <<'FINDINGS_EOF'
-<findings>
-[
-  {
-    "title": "SQL injection vulnerability",
-    "severity": "critical",
-    "location": "src/db.py:99",
-    "recommendation": "Use parameterized queries"
-  },
-  {
-    "title": "Minor typo in comment",
-    "severity": "minor",
-    "location": "src/main.py:5",
-    "recommendation": "Fix typo"
-  }
-]
-</findings>
-FINDINGS_EOF
-
-pf_reg="$($FLOWCTL parse-findings --file "$FINDINGS_REG" --epic "$EPIC1" --register --source plan-review --json)"
-pf_reg_registered="$(echo "$pf_reg" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("registered", 0))')"
-if [[ "$pf_reg_registered" == "1" ]]; then
-  echo -e "${GREEN}✓${NC} parse-findings --register adds critical/major gaps (skips minor)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} parse-findings --register wrong count (expected 1, got $pf_reg_registered)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Verify the gap was actually created
-gap_reg_check="$($FLOWCTL gap list --epic "$EPIC1" --json | "$PYTHON_BIN" -c '
-import json, sys
-data = json.load(sys.stdin)
-gaps = data.get("gaps", [])
-sql_gaps = [g for g in gaps if "SQL injection" in g.get("capability", "")]
-print(len(sql_gaps))
-')"
-if [[ "$gap_reg_check" == "1" ]]; then
-  echo -e "${GREEN}✓${NC} parse-findings --register actually created the gap"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} parse-findings --register gap not found in registry (found $gap_reg_check)"
-  FAIL=$((FAIL + 1))
-fi
-
-# ── status --interrupted ──
-echo -e "\n${YELLOW}=== status --interrupted ===${NC}"
-
-# Create a second epic with todo tasks to test interrupted detection
-EPIC_INT_JSON="$($FLOWCTL epic create --title "Interrupted test epic" --json)"
-EPIC_INT="$(echo "$EPIC_INT_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-$FLOWCTL task create --epic "$EPIC_INT" --title "Interrupted task 1" --json > /dev/null
-$FLOWCTL task create --epic "$EPIC_INT" --title "Interrupted task 2" --json > /dev/null
-
-# Test --interrupted --json detects epic with todo tasks
-int_json="$($FLOWCTL status --interrupted --json)"
-int_count="$(echo "$int_json" | "$PYTHON_BIN" -c '
-import json, sys
-data = json.load(sys.stdin)
-epics = data.get("interrupted", [])
-matching = [e for e in epics if e["id"] == "'"$EPIC_INT"'"]
-print(len(matching))
-')"
-if [[ "$int_count" == "1" ]]; then
-  echo -e "${GREEN}✓${NC} status --interrupted detects epic with todo tasks"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} status --interrupted did not detect epic (found $int_count)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Verify suggested command is included
-int_suggested="$(echo "$int_json" | "$PYTHON_BIN" -c '
-import json, sys
-data = json.load(sys.stdin)
-epics = data.get("interrupted", [])
-matching = [e for e in epics if e["id"] == "'"$EPIC_INT"'"]
-print(matching[0].get("suggested", "") if matching else "")
-')"
-if [[ "$int_suggested" == "/flow-code:work $EPIC_INT" ]]; then
-  echo -e "${GREEN}✓${NC} status --interrupted includes suggested resume command"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} status --interrupted wrong suggested (got: $int_suggested)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Verify task counts in interrupted output
-int_todo="$(echo "$int_json" | "$PYTHON_BIN" -c '
-import json, sys
-data = json.load(sys.stdin)
-epics = data.get("interrupted", [])
-matching = [e for e in epics if e["id"] == "'"$EPIC_INT"'"]
-print(matching[0].get("todo", 0) if matching else 0)
-')"
-if [[ "$int_todo" == "2" ]]; then
-  echo -e "${GREEN}✓${NC} status --interrupted reports correct todo count"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} status --interrupted wrong todo count (expected 2, got $int_todo)"
-  FAIL=$((FAIL + 1))
-fi
-
-# ── epic set-auto-execute ──
-echo -e "\n${YELLOW}=== epic set-auto-execute ===${NC}"
-
-# Create an epic with tasks for auto-execute testing
-EPIC_AE_JSON="$($FLOWCTL epic create --title "Auto execute test" --json)"
-EPIC_AE="$(echo "$EPIC_AE_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-$FLOWCTL task create --epic "$EPIC_AE" --title "AE task 1" --json > /dev/null
-$FLOWCTL task create --epic "$EPIC_AE" --title "AE task 2" --json > /dev/null
-
-# Set pending marker
-ae_pending="$($FLOWCTL epic auto-exec "$EPIC_AE" --pending --json)"
-ae_pending_val="$(echo "$ae_pending" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["auto_execute_pending"])')"
-if [[ "$ae_pending_val" == "True" ]]; then
-  echo -e "${GREEN}✓${NC} set-auto-execute --pending sets marker"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} set-auto-execute --pending: expected True, got $ae_pending_val"
-  FAIL=$((FAIL + 1))
-fi
-
-# Verify --interrupted shows it with reason "planned_not_started"
-ae_int_json="$($FLOWCTL status --interrupted --json)"
-ae_reason="$(echo "$ae_int_json" | "$PYTHON_BIN" -c '
-import json, sys
-data = json.load(sys.stdin)
-epics = data.get("interrupted", [])
-matching = [e for e in epics if e["id"] == "'"$EPIC_AE"'"]
-print(matching[0].get("reason", "") if matching else "")
-')"
-if [[ "$ae_reason" == "planned_not_started" ]]; then
-  echo -e "${GREEN}✓${NC} --interrupted shows planned_not_started reason for pending epic"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} --interrupted wrong reason (expected planned_not_started, got: $ae_reason)"
-  FAIL=$((FAIL + 1))
-fi
-
-# Clear marker with --done
-ae_done="$($FLOWCTL epic auto-exec "$EPIC_AE" --done --json)"
-ae_done_val="$(echo "$ae_done" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["auto_execute_pending"])')"
-if [[ "$ae_done_val" == "False" ]]; then
-  echo -e "${GREEN}✓${NC} set-auto-execute --done clears marker"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} set-auto-execute --done: expected False, got $ae_done_val"
-  FAIL=$((FAIL + 1))
-fi
-
-# Verify --interrupted now shows "partially_complete" reason (marker cleared)
-ae_int2_json="$($FLOWCTL status --interrupted --json)"
-ae_reason2="$(echo "$ae_int2_json" | "$PYTHON_BIN" -c '
-import json, sys
-data = json.load(sys.stdin)
-epics = data.get("interrupted", [])
-matching = [e for e in epics if e["id"] == "'"$EPIC_AE"'"]
-print(matching[0].get("reason", "") if matching else "")
-')"
-if [[ "$ae_reason2" == "partially_complete" ]]; then
-  echo -e "${GREEN}✓${NC} --interrupted shows partially_complete after marker cleared"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} --interrupted wrong reason after clear (expected partially_complete, got: $ae_reason2)"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "${YELLOW}--- worker-prompt ---${NC}"
-
-# Copy agents directory so worker-phase can find worker.md
-cp -r "$PLUGIN_ROOT/agents" "$TEST_DIR/repo/agents"
-
-# Test: worker-prompt default output (bootstrap mode — full mode removed)
-wp_json="$(CLAUDE_PLUGIN_ROOT="$TEST_DIR/repo" $FLOWCTL worker-prompt --task "${EPIC1}.1" --json)"
-wp_mode="$(echo "$wp_json" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["mode"])')"
-wp_tokens="$(echo "$wp_json" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["estimated_tokens"])')"
-if [[ "$wp_mode" == "bootstrap" ]] && [[ "$wp_tokens" -gt 0 ]] && [[ "$wp_tokens" -lt 300 ]]; then
-  echo -e "${GREEN}✓${NC} worker-prompt default: bootstrap mode, ${wp_tokens} tokens (<300)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-prompt default: expected mode=bootstrap and <300 tokens, got mode=$wp_mode tokens=$wp_tokens"
-  FAIL=$((FAIL + 1))
-fi
-
-echo -e "${YELLOW}--- worker-phase ---${NC}"
-
-# Create a fresh epic+task for phase testing
-EPIC_PH_JSON="$($FLOWCTL epic create --title "Phase test" --json)"
-EPIC_PH="$(echo "$EPIC_PH_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
-$FLOWCTL task create --epic "$EPIC_PH" --title "Phase task" --json >/dev/null
-$FLOWCTL start "${EPIC_PH}.1" --json >/dev/null
-
-# Test: worker-phase next returns phase 1 initially (worktree+teams default)
-wph_next="$($FLOWCTL worker-phase next --task "${EPIC_PH}.1" --json)"
-wph_phase="$(echo "$wph_next" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["phase"])')"
-wph_done="$(echo "$wph_next" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["all_done"])')"
-if [[ "$wph_phase" == "1" ]] && [[ "$wph_done" == "False" ]]; then
-  echo -e "${GREEN}✓${NC} worker-phase next: initial phase is 1 (worktree+teams default)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-phase next: expected phase=1 all_done=False, got phase=$wph_phase all_done=$wph_done"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: worker-phase done phase 1 → next returns phase 2
-wph_next1="$wph_next"
-$FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase 1 --json >/dev/null
-wph_next1b="$($FLOWCTL worker-phase next --task "${EPIC_PH}.1" --json)"
-wph_phase1b="$(echo "$wph_next1b" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["phase"])')"
-if [[ "$wph_phase1b" == "2" ]]; then
-  echo -e "${GREEN}✓${NC} worker-phase done→next: advances to phase 2"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-phase done→next: expected phase=2, got $wph_phase1b"
-  FAIL=$((FAIL + 1))
-fi
-
-# Advance through phase 2 and 5 to test 6
-$FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase 2 --json >/dev/null
-$FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase 5 --json >/dev/null
-wph_next6="$($FLOWCTL worker-phase next --task "${EPIC_PH}.1" --json)"
-wph_phase6="$(echo "$wph_next6" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["phase"])')"
-if [[ "$wph_phase6" == "6" ]]; then
-  echo -e "${GREEN}✓${NC} worker-phase done→next: advances to phase 6"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-phase done→next: expected phase=6, got $wph_phase6"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: worker-phase skip detection — try to complete phase 10 before phase 6
-wph_skip_err="$($FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase 10 --json 2>&1 || true)"
-if echo "$wph_skip_err" | "$PYTHON_BIN" -c 'import json,sys; d=json.load(sys.stdin); assert d.get("error") or not d.get("success")' 2>/dev/null; then
-  echo -e "${GREEN}✓${NC} worker-phase skip detection: rejects out-of-order phase"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-phase skip detection: expected error for out-of-order, got: $wph_skip_err"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: worker-phase next returns content field (may be empty in streamlined mode)
-wph_has_content="$(echo "$wph_next1" | "$PYTHON_BIN" -c 'import json,sys; d=json.load(sys.stdin); print("content" in d)')"
-if [[ "$wph_has_content" == "True" ]]; then
-  echo -e "${GREEN}✓${NC} worker-phase next: content field present"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-phase next: content field missing"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: worker-phase next returns different titles for different phases (phase 1 vs phase 2)
-wph_title_p1="$(echo "$wph_next1" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("title",""))')"
-wph_title_p2="$(echo "$wph_next1b" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("title",""))')"
-if [[ "$wph_title_p1" != "$wph_title_p2" ]] && [[ -n "$wph_title_p2" ]]; then
-  echo -e "${GREEN}✓${NC} worker-phase next: title changes between phases (1 vs 2)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-phase next: expected different title for phase 1 vs 2"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: worker-prompt --bootstrap outputs <300 tokens
-wp_boot_json="$(CLAUDE_PLUGIN_ROOT="$TEST_DIR/repo" $FLOWCTL worker-prompt --task "${EPIC1}.1" --bootstrap --json)"
-wp_boot_tokens="$(echo "$wp_boot_json" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["estimated_tokens"])')"
-wp_boot_mode="$(echo "$wp_boot_json" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["mode"])')"
-if [[ "$wp_boot_mode" == "bootstrap" ]] && [[ "$wp_boot_tokens" -lt 300 ]]; then
-  echo -e "${GREEN}✓${NC} worker-prompt --bootstrap: mode=bootstrap, ${wp_boot_tokens} tokens (<300)"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-prompt --bootstrap: expected mode=bootstrap and <300 tokens, got mode=$wp_boot_mode tokens=$wp_boot_tokens"
-  FAIL=$((FAIL + 1))
-fi
-
-# Test: complete all remaining default phases → all_done
-# Phases 1, 2, 5 already done above; complete remaining: 6, 7, 9, 10, 11, 12
-for phase in 6 7 9 10 11 12; do
-  $FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase "$phase" --json >/dev/null
-done
-wph_final="$($FLOWCTL worker-phase next --task "${EPIC_PH}.1" --json)"
-wph_all_done="$(echo "$wph_final" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["all_done"])')"
-if [[ "$wph_all_done" == "True" ]]; then
-  echo -e "${GREEN}✓${NC} worker-phase lifecycle: all phases complete"
-  PASS=$((PASS + 1))
-else
-  echo -e "${RED}✗${NC} worker-phase lifecycle: expected all_done=True, got $wph_all_done"
-  FAIL=$((FAIL + 1))
-fi
-
-echo ""
-echo -e "${YELLOW}=== Results ===${NC}"
-echo -e "Passed: ${GREEN}$PASS${NC}"
-echo -e "Failed: ${RED}$FAIL${NC}"
-
-if [ $FAIL -gt 0 ]; then
-  exit 1
-fi
-echo -e "\n${GREEN}All tests passed!${NC}"
+exec bash "$SCRIPT_DIR/tests/run_all.sh" "$@"
diff --git a/scripts/tests/common.sh b/scripts/tests/common.sh
new file mode 100755
index 00000000..563f3fb9
--- /dev/null
+++ b/scripts/tests/common.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Shared setup for all smoke test files.
+# Source this file at the top of each test_*.sh.
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PLUGIN_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+# Python detection: prefer python3, fallback to python (Windows support, GH-35)
+pick_python() {
+  if [[ -n "${PYTHON_BIN:-}" ]]; then
+    command -v "$PYTHON_BIN" >/dev/null 2>&1 && { echo "$PYTHON_BIN"; return; }
+  fi
+  if command -v python3 >/dev/null 2>&1; then echo "python3"; return; fi
+  if command -v python  >/dev/null 2>&1; then echo "python"; return; fi
+  echo ""
+}
+
+PYTHON_BIN="$(pick_python)"
+[[ -n "$PYTHON_BIN" ]] || { echo "ERROR: python not found (need python3 or python in PATH)" >&2; exit 1; }
+
+TEST_DIR="/tmp/flowctl-smoke-$$"
+PASS=0
+FAIL=0
+
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+cleanup() {
+  rm -rf "$TEST_DIR"
+}
+trap cleanup EXIT
+
+mkdir -p "$TEST_DIR/repo"
+cd "$TEST_DIR/repo"
+git init -q
+
+# Locate flowctl binary (Rust)
+if [[ -x "$PLUGIN_ROOT/bin/flowctl" ]]; then
+  FLOWCTL="$PLUGIN_ROOT/bin/flowctl"
+elif command -v flowctl >/dev/null 2>&1; then
+  FLOWCTL="$(command -v flowctl)"
+else
+  echo "ERROR: flowctl binary not found. Build with: cd flowctl && cargo build --release && cp target/release/flowctl ../bin/" >&2
+  exit 1
+fi
+
+$FLOWCTL init --json >/dev/null
+printf '{"commits":[],"tests":[],"prs":[]}' > "$TEST_DIR/evidence.json"
+printf "ok\n" > "$TEST_DIR/summary.md"
+
+# Print results summary (call at end of each test file)
+print_results() {
+  echo ""
+  echo -e "${YELLOW}=== Results ===${NC}"
+  echo -e "Passed: ${GREEN}$PASS${NC}"
+  echo -e "Failed: ${RED}$FAIL${NC}"
+  if [ $FAIL -gt 0 ]; then
+    exit 1
+  fi
+  echo -e "\n${GREEN}All tests passed!${NC}"
+}
diff --git a/scripts/tests/run_all.sh b/scripts/tests/run_all.sh
new file mode 100755
index 00000000..f1743a71
--- /dev/null
+++ b/scripts/tests/run_all.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Run all smoke test files sequentially.
+# Each test file is independently runnable and creates its own isolated temp dir.
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+OVERALL_PASS=0
+OVERALL_FAIL=0
+
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+echo -e "${YELLOW}=== flowctl smoke tests (split) ===${NC}\n"
+
+for test_file in "$SCRIPT_DIR"/test_*.sh; do
+  test_name="$(basename "$test_file" .sh)"
+  echo -e "${YELLOW}>>> Running $test_name ...${NC}"
+  if bash "$test_file"; then
+    echo -e "${GREEN}<<< $test_name passed${NC}\n"
+    OVERALL_PASS=$((OVERALL_PASS + 1))
+  else
+    echo -e "${RED}<<< $test_name FAILED${NC}\n"
+    OVERALL_FAIL=$((OVERALL_FAIL + 1))
+  fi
+done
+
+echo -e "\n${YELLOW}=== Overall Results ===${NC}"
+echo -e "Test files passed: ${GREEN}$OVERALL_PASS${NC}"
+echo -e "Test files failed: ${RED}$OVERALL_FAIL${NC}"
+
+if [ $OVERALL_FAIL -gt 0 ]; then
+  echo -e "\n${RED}Some test files failed!${NC}"
+  exit 1
+fi
+echo -e "\n${GREEN}All test files passed!${NC}"
diff --git a/scripts/tests/test_codex_e2e.sh b/scripts/tests/test_codex_e2e.sh
new file mode 100755
index 00000000..eb8f88e6
--- /dev/null
+++ b/scripts/tests/test_codex_e2e.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+# Tests: codex plan-review and impl-review end-to-end (requires codex CLI)
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== codex e2e tests ===${NC}"
+
+echo -e "${YELLOW}--- codex e2e (requires codex CLI) ---${NC}"
+# Check if codex is available (handles its own auth)
+codex_available="$($FLOWCTL codex check --json 2>/dev/null | "$PYTHON_BIN" -c "import sys,json; print(json.load(sys.stdin).get('available', False))" 2>/dev/null || echo "False")"
+if [[ "$codex_available" == "True" ]]; then
+  # Create a simple epic + task for testing
+  EPIC3_JSON="$($FLOWCTL epic create --title "Codex test epic" --json)"
+  EPIC3="$(echo "$EPIC3_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+  $FLOWCTL task create --epic "$EPIC3" --title "Test task" --json >/dev/null
+
+  # Write a simple spec
+  cat > ".flow/specs/${EPIC3}.md" << 'EOF'
+# Codex Test Epic
+
+Simple test epic for smoke testing codex reviews.
+
+## Scope
+- Test that codex can review a plan
+- Test that codex can review an implementation
+EOF
+
+  cat > ".flow/tasks/${EPIC3}.1.md" << 'EOF'
+# Test Task
+
+Add a simple hello world function.
+
+## Acceptance
+- Function returns "hello world"
+EOF
+
+  # Test plan-review e2e
+  mkdir -p src
+  echo 'def hello(): return "hello world"' > src/hello.py
+  set +e
+  plan_result="$($FLOWCTL codex plan-review "$EPIC3" --files "src/hello.py" --base main --receipt "$TEST_DIR/plan-receipt.json" --json 2>&1)"
+  plan_rc=$?
+  set -e
+
+  if [[ "$plan_rc" -eq 0 ]]; then
+    if [[ -f "$TEST_DIR/plan-receipt.json" ]]; then
+      "$PYTHON_BIN" - "$TEST_DIR/plan-receipt.json" "$EPIC3" <<'PY'
+import sys, json
+from pathlib import Path
+data = json.loads(Path(sys.argv[1]).read_text())
+expected_id = sys.argv[2]
+assert data.get("type") == "plan_review", f"Expected type=plan_review, got {data.get('type')}"
+assert data.get("id") == expected_id, f"Expected id={expected_id}, got {data.get('id')}"
+assert data.get("mode") == "codex", f"Expected mode=codex, got {data.get('mode')}"
+assert "verdict" in data, "Missing verdict in receipt"
+assert "session_id" in data, "Missing session_id in receipt"
+PY
+      echo -e "${GREEN}✓${NC} codex plan-review e2e"
+      PASS=$((PASS + 1))
+    else
+      echo -e "${RED}✗${NC} codex plan-review e2e (no receipt)"
+      FAIL=$((FAIL + 1))
+    fi
+  else
+    echo -e "${RED}✗${NC} codex plan-review e2e (exit $plan_rc)"
+    FAIL=$((FAIL + 1))
+  fi
+
+  # Test impl-review e2e
+  cat > "$TEST_DIR/repo/src/hello.py" << 'EOF'
+def hello():
+    return "hello world"
+EOF
+  git -C "$TEST_DIR/repo" add src/hello.py
+  git -C "$TEST_DIR/repo" commit -m "Add hello function" >/dev/null
+
+  set +e
+  impl_result="$($FLOWCTL codex impl-review "${EPIC3}.1" --base HEAD~1 --receipt "$TEST_DIR/impl-receipt.json" --json 2>&1)"
+  impl_rc=$?
+  set -e
+
+  if [[ "$impl_rc" -eq 0 ]]; then
+    if [[ -f "$TEST_DIR/impl-receipt.json" ]]; then
+      "$PYTHON_BIN" - "$TEST_DIR/impl-receipt.json" "$EPIC3" <<'PY'
+import sys, json
+from pathlib import Path
+data = json.loads(Path(sys.argv[1]).read_text())
+expected_id = f"{sys.argv[2]}.1"
+assert data.get("type") == "impl_review", f"Expected type=impl_review, got {data.get('type')}"
+assert data.get("id") == expected_id, f"Expected id={expected_id}, got {data.get('id')}"
+assert data.get("mode") == "codex", f"Expected mode=codex, got {data.get('mode')}"
+assert "verdict" in data, "Missing verdict in receipt"
+assert "session_id" in data, "Missing session_id in receipt"
+PY
+      echo -e "${GREEN}✓${NC} codex impl-review e2e"
+      PASS=$((PASS + 1))
+    else
+      echo -e "${RED}✗${NC} codex impl-review e2e (no receipt)"
+      FAIL=$((FAIL + 1))
+    fi
+  else
+    echo -e "${RED}✗${NC} codex impl-review e2e (exit $impl_rc)"
+    FAIL=$((FAIL + 1))
+  fi
+else
+  echo -e "${YELLOW}⊘${NC} codex e2e skipped (codex not available)"
+fi
+
+print_results
diff --git a/scripts/tests/test_domain.sh b/scripts/tests/test_domain.sh
new file mode 100755
index 00000000..1d661809
--- /dev/null
+++ b/scripts/tests/test_domain.sh
@@ -0,0 +1,152 @@
+#!/usr/bin/env bash
+# Tests: task domain tagging, epic archive/clean
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== domain + archive tests ===${NC}"
+
+echo -e "${YELLOW}--- task domain tagging ---${NC}"
+
+# Setup: create epic + tasks with domains
+DOM_EPIC_JSON="$($FLOWCTL epic create --title "Domain test" --json)"
+DOM_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$DOM_EPIC_JSON")"
+$FLOWCTL task create --epic "$DOM_EPIC" --title "Build API" --domain backend --json > /dev/null
+$FLOWCTL task create --epic "$DOM_EPIC" --title "Build UI" --domain frontend --json > /dev/null
+$FLOWCTL task create --epic "$DOM_EPIC" --title "No domain" --json > /dev/null
+
+# Test 1: domain stored in task JSON
+result="$($FLOWCTL show "${DOM_EPIC}.1" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("domain") == "backend", f"expected backend, got {data.get('domain')}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} task create stores domain"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} task create domain not stored"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: task without domain has null domain
+result="$($FLOWCTL show "${DOM_EPIC}.3" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("domain") is None, f"expected None, got {data.get('domain')}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} task without domain is null"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} task without domain should be null"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 3: tasks --domain filters correctly
+result="$($FLOWCTL tasks --epic "$DOM_EPIC" --domain backend --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("count") == 1, f"expected 1, got {data.get('count')}"
+assert data["tasks"][0]["domain"] == "backend"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} tasks --domain filters correctly"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} tasks --domain filter failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 4: tasks without --domain shows all
+result="$($FLOWCTL tasks --epic "$DOM_EPIC" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("count") == 3, f"expected 3, got {data.get('count')}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} tasks without --domain shows all"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} tasks without --domain should show all"
+  FAIL=$((FAIL + 1))
+fi
+
+echo -e "\n${YELLOW}--- epic archive/clean ---${NC}"
+
+# Setup: create + close an epic
+ARC_EPIC_JSON="$($FLOWCTL epic create --title "Archive me" --json)"
+ARC_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$ARC_EPIC_JSON")"
+$FLOWCTL task create --epic "$ARC_EPIC" --title "Done task" --json > /dev/null
+$FLOWCTL start "${ARC_EPIC}.1" --json > /dev/null
+$FLOWCTL done "${ARC_EPIC}.1" --summary "ok" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
+$FLOWCTL epic close "$ARC_EPIC" --json > /dev/null
+
+# Test 1: archive moves files
+result="$($FLOWCTL epic archive "$ARC_EPIC" --json)"
+"$PYTHON_BIN" - "$result" "$ARC_EPIC" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+ep = sys.argv[2]
+assert data.get("success") == True, f"expected success: {data}"
+assert data.get("count", 0) >= 3, f"expected >= 3 files moved, got {data.get('count')}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} epic archive moves files to .archive/"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} epic archive failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: archived epic no longer shows in list
+result="$($FLOWCTL epics --json)"
+"$PYTHON_BIN" - "$result" "$ARC_EPIC" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+ep = sys.argv[2]
+ids = [e["id"] for e in data.get("epics", [])]
+assert ep not in ids, f"{ep} should not be in epics list: {ids}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} archived epic removed from epics list"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} archived epic still in list"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 3: archive dir has the files
+if [ -d ".flow/.archive/$ARC_EPIC" ]; then
+  echo -e "${GREEN}✓${NC} .flow/.archive/$ARC_EPIC/ directory exists"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} archive directory missing"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 4: epic clean archives all closed epics
+CLEAN_EP1_JSON="$($FLOWCTL epic create --title "Clean1" --json)"
+CLEAN_EP1="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$CLEAN_EP1_JSON")"
+$FLOWCTL task create --epic "$CLEAN_EP1" --title "T1" --json > /dev/null
+$FLOWCTL start "${CLEAN_EP1}.1" --json > /dev/null
+$FLOWCTL done "${CLEAN_EP1}.1" --summary "ok" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
+$FLOWCTL epic close "$CLEAN_EP1" --json > /dev/null
+
+result="$($FLOWCTL epic clean --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("count", 0) >= 1, f"expected >= 1 archived, got {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} epic clean archives all closed epics"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} epic clean failed"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results
diff --git a/scripts/tests/test_files.sh b/scripts/tests/test_files.sh
new file mode 100755
index 00000000..16a3a031
--- /dev/null
+++ b/scripts/tests/test_files.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Tests: file ownership map, lock/unlock
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== files tests ===${NC}"
+
+echo -e "${YELLOW}--- files ownership map ---${NC}"
+
+# Setup: epic + tasks with --files
+FO_EPIC_JSON="$($FLOWCTL epic create --title "Files test" --json)"
+FO_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$FO_EPIC_JSON")"
+$FLOWCTL task create --epic "$FO_EPIC" --title "T1" --files "src/auth.ts,src/middleware.ts" --json > /dev/null
+$FLOWCTL task create --epic "$FO_EPIC" --title "T2" --files "src/routes.ts" --json > /dev/null
+$FLOWCTL task create --epic "$FO_EPIC" --title "T3" --files "src/auth.ts" --json > /dev/null
+
+# Test 1: files stored in task JSON
+result="$($FLOWCTL show "${FO_EPIC}.1" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+d = json.loads(sys.argv[1])
+assert d.get("files") == ["src/auth.ts", "src/middleware.ts"], f"unexpected files: {d.get('files')}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} --files stored in task JSON"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} --files not stored"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: files command detects ownership + conflicts
+result="$($FLOWCTL files --epic "$FO_EPIC" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+d = json.loads(sys.argv[1])
+assert d["file_count"] == 3, f"expected 3 files, got {d['file_count']}"
+assert d["conflict_count"] == 1, f"expected 1 conflict, got {d['conflict_count']}"
+assert "src/auth.ts" in d["conflicts"], f"src/auth.ts should conflict: {d['conflicts']}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} files command detects ownership + conflicts"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} files command failed"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results
diff --git a/scripts/tests/test_gaps.sh b/scripts/tests/test_gaps.sh
new file mode 100755
index 00000000..f34faf25
--- /dev/null
+++ b/scripts/tests/test_gaps.sh
@@ -0,0 +1,121 @@
+#!/usr/bin/env bash
+# Tests: gap add/resolve/check, idempotency, priority filtering
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== gap tests ===${NC}"
+
+# Create epic for gap tests
+EPIC1_JSON="$($FLOWCTL epic create --title "Gap Epic" --json)"
+EPIC1="$(echo "$EPIC1_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$EPIC1" --title "Task 1" --json >/dev/null
+
+echo -e "${YELLOW}--- gap commands ---${NC}"
+
+# Test 1: gap add
+gap_add_result="$($FLOWCTL gap add --epic "$EPIC1" --capability "Missing auth check" --priority required --source flow-gap-analyst --json)"
+gap_created="$(echo "$gap_add_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("created", False))')"
+if [[ "$gap_created" == "True" ]]; then
+  echo -e "${GREEN}✓${NC} gap add creates new gap"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap add failed to create gap"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: gap add idempotent
+gap_dup_result="$($FLOWCTL gap add --epic "$EPIC1" --capability "Missing auth check" --priority required --json)"
+gap_dup_created="$(echo "$gap_dup_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("created", False))')"
+if [[ "$gap_dup_created" == "False" ]]; then
+  echo -e "${GREEN}✓${NC} gap add idempotent (duplicate returns created=false)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap add not idempotent"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 3: gap add nice-to-have
+$FLOWCTL gap add --epic "$EPIC1" --capability "Optional caching" --priority nice-to-have --json >/dev/null
+
+# Test 4: gap list
+gap_list_count="$($FLOWCTL gap list --epic "$EPIC1" --json | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
+if [[ "$gap_list_count" == "2" ]]; then
+  echo -e "${GREEN}✓${NC} gap list returns correct count"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap list count wrong (expected 2, got $gap_list_count)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 5: gap list with status filter
+gap_open_count="$($FLOWCTL gap list --epic "$EPIC1" --status open --json | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
+if [[ "$gap_open_count" == "2" ]]; then
+  echo -e "${GREEN}✓${NC} gap list --status open filter works"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap list --status filter wrong (expected 2, got $gap_open_count)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 6: gap check fails with open required gap
+if ! $FLOWCTL gap check --epic "$EPIC1" --json >/dev/null 2>&1; then
+  echo -e "${GREEN}✓${NC} gap check fails with open blocking gaps (exit 1)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap check should fail with open blocking gaps"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 7: gap check JSON has gate=fail
+gap_check_gate="$($FLOWCTL gap check --epic "$EPIC1" --json 2>/dev/null || true)"
+gap_gate_val="$(echo "$gap_check_gate" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("gate", ""))')"
+if [[ "$gap_gate_val" == "fail" ]]; then
+  echo -e "${GREEN}✓${NC} gap check gate=fail in JSON output"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap check gate expected 'fail', got '$gap_gate_val'"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 8: gap resolve
+gap_resolve_result="$($FLOWCTL gap resolve --epic "$EPIC1" --capability "Missing auth check" --evidence "Added in auth.py:42" --json)"
+gap_changed="$(echo "$gap_resolve_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("changed", False))')"
+if [[ "$gap_changed" == "True" ]]; then
+  echo -e "${GREEN}✓${NC} gap resolve marks gap as resolved"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap resolve failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 9: gap resolve idempotent
+gap_resolve_dup="$($FLOWCTL gap resolve --epic "$EPIC1" --capability "Missing auth check" --evidence "duplicate" --json)"
+gap_dup_changed="$(echo "$gap_resolve_dup" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("changed", False))')"
+if [[ "$gap_dup_changed" == "False" ]]; then
+  echo -e "${GREEN}✓${NC} gap resolve idempotent (already resolved)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap resolve not idempotent"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 10: gap check passes (only nice-to-have left)
+if $FLOWCTL gap check --epic "$EPIC1" --json >/dev/null 2>&1; then
+  echo -e "${GREEN}✓${NC} gap check passes (nice-to-have does not block)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap check should pass with only nice-to-have gaps"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 11: gap check gate=pass in JSON
+gap_pass_gate="$($FLOWCTL gap check --epic "$EPIC1" --json)"
+gap_pass_val="$(echo "$gap_pass_gate" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("gate", ""))')"
+if [[ "$gap_pass_val" == "pass" ]]; then
+  echo -e "${GREEN}✓${NC} gap check gate=pass in JSON output"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} gap check gate expected 'pass', got '$gap_pass_val'"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results
diff --git a/scripts/tests/test_init.sh b/scripts/tests/test_init.sh
new file mode 100755
index 00000000..8c41b2a6
--- /dev/null
+++ b/scripts/tests/test_init.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+# Tests: idempotent init, config upgrade, config set/get, planSync config
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== init + config tests ===${NC}"
+
+echo -e "${YELLOW}--- idempotent init ---${NC}"
+
+# Test 1: Re-run init (no changes)
+init_result="$($FLOWCTL init --json)"
+init_actions="$(echo "$init_result" | "$PYTHON_BIN" -c 'import json,sys; print(len(json.load(sys.stdin).get("actions", [])))')"
+if [[ "$init_actions" == "0" ]]; then
+  echo -e "${GREEN}✓${NC} init idempotent (no changes on re-run)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} init idempotent: expected 0 actions, got $init_actions"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: Config upgrade (old config without planSync)
+echo '{"memory":{"enabled":true}}' > .flow/config.json
+init_upgrade="$($FLOWCTL init --json)"
+upgrade_msg="$(echo "$init_upgrade" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("message", ""))')"
+if [[ "$upgrade_msg" == *"upgraded config.json"* ]]; then
+  echo -e "${GREEN}✓${NC} init upgrades config (adds missing keys)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} init upgrade: expected 'upgraded config.json' in message, got: $upgrade_msg"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 3: Verify existing values preserved after upgrade
+memory_val="$($FLOWCTL config get memory.enabled --json | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("value"))')"
+if [[ "$memory_val" == "True" ]]; then
+  echo -e "${GREEN}✓${NC} init preserves existing config values"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} init preserve: expected memory.enabled=True, got $memory_val"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 4: Verify new defaults added (memory + planSync now default to True)
+plansync_val="$($FLOWCTL config get planSync.enabled --json | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("value"))')"
+if [[ "$plansync_val" == "True" ]]; then
+  echo -e "${GREEN}✓${NC} init adds new default keys"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} init defaults: expected planSync.enabled=True, got $plansync_val"
+  FAIL=$((FAIL + 1))
+fi
+
+# Reset config for remaining tests
+$FLOWCTL config set memory.enabled false --json >/dev/null
+
+echo -e "${YELLOW}--- config set/get ---${NC}"
+$FLOWCTL config set memory.enabled true --json >/dev/null
+config_json="$($FLOWCTL config get memory.enabled --json)"
+"$PYTHON_BIN" - <<'PY' "$config_json"
+import json, sys
+data = json.loads(sys.argv[1])
+assert data["value"] == True, f"Expected True, got {data['value']}"
+PY
+echo -e "${GREEN}✓${NC} config set/get"
+PASS=$((PASS + 1))
+
+$FLOWCTL config set memory.enabled false --json >/dev/null
+config_json="$($FLOWCTL config get memory.enabled --json)"
+"$PYTHON_BIN" - <<'PY' "$config_json"
+import json, sys
+data = json.loads(sys.argv[1])
+assert data["value"] == False, f"Expected False, got {data['value']}"
+PY
+echo -e "${GREEN}✓${NC} config toggle"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- planSync config ---${NC}"
+$FLOWCTL config set planSync.enabled true --json >/dev/null
+config_json="$($FLOWCTL config get planSync.enabled --json)"
+"$PYTHON_BIN" - <<'PY' "$config_json"
+import json, sys
+data = json.loads(sys.argv[1])
+assert data["value"] is True, f"Expected True, got {data['value']}"
+PY
+echo -e "${GREEN}✓${NC} planSync config set/get"
+PASS=$((PASS + 1))
+
+$FLOWCTL config set planSync.enabled false --json >/dev/null
+config_json="$($FLOWCTL config get planSync.enabled --json)"
+"$PYTHON_BIN" - <<'PY' "$config_json"
+import json, sys
+data = json.loads(sys.argv[1])
+assert data["value"] is False, f"Expected False, got {data['value']}"
+PY
+echo -e "${GREEN}✓${NC} planSync config toggle"
+PASS=$((PASS + 1))
+
+print_results
diff --git a/scripts/tests/test_lifecycle.sh b/scripts/tests/test_lifecycle.sh
new file mode 100755
index 00000000..3cb1d7cb
--- /dev/null
+++ b/scripts/tests/test_lifecycle.sh
@@ -0,0 +1,277 @@
+#!/usr/bin/env bash
+# Tests: plan_review_status, branch_name, epic set-title, block/validate/epic close,
+#        duration tracking, workspace_changes evidence
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== lifecycle tests ===${NC}"
+
+# Create initial epic for plan_review/branch tests
+EPIC1_JSON="$($FLOWCTL epic create --title "Epic One" --json)"
+EPIC1="$(echo "$EPIC1_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$EPIC1" --title "Task 1" --json >/dev/null
+$FLOWCTL task create --epic "$EPIC1" --title "Task 2" --json >/dev/null
+
+echo -e "${YELLOW}--- plan_review_status default ---${NC}"
+"$PYTHON_BIN" - "$EPIC1" <<'PY'
+import json, sys
+from pathlib import Path
+epic_id = sys.argv[1]
+path = Path(f".flow/epics/{epic_id}.json")
+data = json.loads(path.read_text())
+data.pop("plan_review_status", None)
+data.pop("plan_reviewed_at", None)
+data.pop("branch_name", None)
+path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
+PY
+show_json="$($FLOWCTL show "$EPIC1" --json)"
+"$PYTHON_BIN" - <<'PY' "$show_json"
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("plan_review_status") is None or data.get("plan_review_status") == "unknown"
+assert data.get("plan_reviewed_at") is None
+assert data.get("branch_name") is None
+PY
+echo -e "${GREEN}✓${NC} plan_review_status defaulted"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- branch_name set ---${NC}"
+$FLOWCTL epic branch "$EPIC1" "${EPIC1}-epic" --json >/dev/null
+show_json="$($FLOWCTL show "$EPIC1" --json)"
+if "$PYTHON_BIN" - "$show_json" "$EPIC1" <<'PY' 2>/dev/null
+import json, sys
+data = json.loads(sys.argv[1])
+expected_branch = f"{sys.argv[2]}-epic"
+assert data.get("branch_name") == expected_branch, f"Expected {expected_branch}, got {data.get('branch_name')}"
+PY
+then
+  echo -e "${GREEN}✓${NC} branch_name set"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} branch_name set: show does not return branch_name (DB-only field)"
+  FAIL=$((FAIL + 1))
+fi
+
+echo -e "${YELLOW}--- epic set-title ---${NC}"
+# Create epic with tasks for rename test
+RENAME_EPIC_JSON="$($FLOWCTL epic create --title "Old Title" --json)"
+RENAME_EPIC="$(echo "$RENAME_EPIC_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$RENAME_EPIC" --title "First task" --json >/dev/null
+$FLOWCTL task create --epic "$RENAME_EPIC" --title "Second task" --json >/dev/null
+# Add task dependency within epic
+$FLOWCTL dep add "${RENAME_EPIC}.2" "${RENAME_EPIC}.1" --json >/dev/null
+
+# Rename epic
+rename_result="$($FLOWCTL epic title "$RENAME_EPIC" --title "New Shiny Title" --json)"
+NEW_EPIC="$(echo "$rename_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["new_id"])')"
+
+# Test 1: Verify old files are gone
+if [[ ! -f ".flow/epics/${RENAME_EPIC}.json" ]] && [[ ! -f ".flow/specs/${RENAME_EPIC}.md" ]]; then
+  echo -e "${GREEN}✓${NC} set-title removes old files"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} set-title old files still exist"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: Verify new files exist
+if [[ -f ".flow/epics/${NEW_EPIC}.json" ]] && [[ -f ".flow/specs/${NEW_EPIC}.md" ]]; then
+  echo -e "${GREEN}✓${NC} set-title creates new files"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} set-title new files missing"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 3: Verify epic JSON content updated
+"$PYTHON_BIN" - "$NEW_EPIC" <<'PY'
+import json, sys
+from pathlib import Path
+new_id = sys.argv[1]
+epic_data = json.loads(Path(f".flow/epics/{new_id}.json").read_text())
+assert epic_data["id"] == new_id, f"Epic ID not updated: {epic_data['id']}"
+assert epic_data["title"] == "New Shiny Title", f"Title not updated: {epic_data['title']}"
+assert new_id in epic_data["spec_path"], f"spec_path not updated: {epic_data['spec_path']}"
+PY
+echo -e "${GREEN}✓${NC} set-title updates epic JSON"
+PASS=$((PASS + 1))
+
+# Test 4: Verify task files renamed
+if [[ -f ".flow/tasks/${NEW_EPIC}.1.json" ]] && [[ -f ".flow/tasks/${NEW_EPIC}.2.json" ]]; then
+  echo -e "${GREEN}✓${NC} set-title renames task files"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} set-title task files not renamed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 5: Verify task JSON content updated (including depends_on)
+"$PYTHON_BIN" - "$NEW_EPIC" <<'PY'
+import json, sys
+from pathlib import Path
+new_id = sys.argv[1]
+task1_data = json.loads(Path(f".flow/tasks/{new_id}.1.json").read_text())
+task2_data = json.loads(Path(f".flow/tasks/{new_id}.2.json").read_text())
+assert task1_data["id"] == f"{new_id}.1", f"Task 1 ID not updated: {task1_data['id']}"
+assert task1_data["epic"] == new_id, f"Task 1 epic not updated: {task1_data['epic']}"
+assert task2_data["id"] == f"{new_id}.2", f"Task 2 ID not updated: {task2_data['id']}"
+# Verify depends_on was updated
+deps = task2_data.get("depends_on", [])
+assert f"{new_id}.1" in deps, f"depends_on not updated: {deps}"
+PY
+echo -e "${GREEN}✓${NC} set-title updates task JSON and deps"
+PASS=$((PASS + 1))
+
+# Test 6: Verify show works with new ID
+show_json="$($FLOWCTL show "$NEW_EPIC" --json)"
+"$PYTHON_BIN" - "$show_json" "$NEW_EPIC" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+expected_id = sys.argv[2]
+assert data["id"] == expected_id, f"Show returns wrong ID: {data['id']}"
+assert data["title"] == "New Shiny Title"
+PY
+echo -e "${GREEN}✓${NC} set-title show works with new ID"
+PASS=$((PASS + 1))
+
+# Test 7: depends_on_epics update in other epics
+DEP_EPIC_JSON="$($FLOWCTL epic create --title "Depends on renamed" --json)"
+DEP_EPIC="$(echo "$DEP_EPIC_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL epic add-dep "$DEP_EPIC" "$NEW_EPIC" --json >/dev/null
+# Rename the dependency
+rename2_result="$($FLOWCTL epic title "$NEW_EPIC" --title "Final Title" --json)"
+FINAL_EPIC="$(echo "$rename2_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["new_id"])')"
+# Verify DEP_EPIC's depends_on_epics was updated
+"$PYTHON_BIN" - "$DEP_EPIC" "$FINAL_EPIC" <<'PY'
+import json, sys
+from pathlib import Path
+dep_epic = sys.argv[1]
+final_epic = sys.argv[2]
+dep_data = json.loads(Path(f".flow/epics/{dep_epic}.json").read_text())
+deps = dep_data.get("depends_on_epics", [])
+assert final_epic in deps, f"depends_on_epics not updated: {deps}, expected {final_epic}"
+PY
+echo -e "${GREEN}✓${NC} set-title updates depends_on_epics in other epics"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- block + validate + epic close ---${NC}"
+EPIC2_JSON="$($FLOWCTL epic create --title "Epic Two" --json)"
+EPIC2="$(echo "$EPIC2_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$EPIC2" --title "Block me" --json >/dev/null
+$FLOWCTL task create --epic "$EPIC2" --title "Other" --json >/dev/null
+printf "Blocked by test\n" > "$TEST_DIR/reason.md"
+$FLOWCTL block "${EPIC2}.1" --reason-file "$TEST_DIR/reason.md" --json >/dev/null
+$FLOWCTL validate --epic "$EPIC2" --json >/dev/null
+echo -e "${GREEN}✓${NC} validate allows blocked"
+PASS=$((PASS + 1))
+
+set +e
+$FLOWCTL epic close "$EPIC2" --json >/dev/null
+rc=$?
+set -e
+if [[ "$rc" -ne 0 ]]; then
+  echo -e "${GREEN}✓${NC} epic close fails when blocked"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} epic close fails when blocked"
+  FAIL=$((FAIL + 1))
+fi
+
+$FLOWCTL start "${EPIC2}.1" --force --json >/dev/null
+$FLOWCTL done "${EPIC2}.1" --summary-file "$TEST_DIR/summary.md" --evidence-json "$TEST_DIR/evidence.json" --json >/dev/null
+$FLOWCTL start "${EPIC2}.2" --json >/dev/null
+$FLOWCTL done "${EPIC2}.2" --summary-file "$TEST_DIR/summary.md" --evidence-json "$TEST_DIR/evidence.json" --json >/dev/null
+$FLOWCTL epic close "$EPIC2" --json >/dev/null
+echo -e "${GREEN}✓${NC} epic close succeeds when done"
+PASS=$((PASS + 1))
+
+echo -e "\n${YELLOW}--- task duration tracking ---${NC}"
+
+# Setup: create epic + task, start and complete with a small delay
+DUR_EPIC_JSON="$($FLOWCTL epic create --title "Duration test" --json)"
+DUR_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$DUR_EPIC_JSON")"
+$FLOWCTL task create --epic "$DUR_EPIC" --title "Timed task" --json > /dev/null
+$FLOWCTL start "${DUR_EPIC}.1" --json > /dev/null
+sleep 1
+result="$($FLOWCTL done "${DUR_EPIC}.1" --summary "done" --evidence '{"commits":[],"tests":[],"prs":[]}' --json)"
+
+# Test 1: duration_seconds present in JSON output
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert "duration_seconds" in data, f"missing duration_seconds: {data}"
+assert data["duration_seconds"] >= 1, f"expected >= 1s, got {data['duration_seconds']}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} duration_seconds in done output (>= 1s)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} duration_seconds missing or too small"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: duration rendered in spec markdown
+SPEC="$($FLOWCTL cat "${DUR_EPIC}.1")"
+if echo "$SPEC" | grep -q "Duration:"; then
+  echo -e "${GREEN}✓${NC} duration rendered in spec evidence"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} duration not in spec"
+  FAIL=$((FAIL + 1))
+fi
+
+echo -e "\n${YELLOW}--- workspace_changes evidence ---${NC}"
+
+# Setup: create epic + task, start it
+WS_EPIC_JSON="$($FLOWCTL epic create --title "Workspace test" --json)"
+WS_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$WS_EPIC_JSON")"
+$FLOWCTL task create --epic "$WS_EPIC" --title "WS task" --json > /dev/null
+$FLOWCTL start "${WS_EPIC}.1" --json > /dev/null
+
+# Test 1: valid workspace_changes renders in spec
+WS_EVIDENCE='{"commits":["abc"],"tests":["pytest"],"prs":[],"workspace_changes":{"baseline_rev":"aaa111bbb","final_rev":"ccc222ddd","files_changed":5,"insertions":120,"deletions":30}}'
+result="$($FLOWCTL done "${WS_EPIC}.1" --summary "done" --evidence "$WS_EVIDENCE" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("status") == "done"
+assert "warning" not in data, f"unexpected warning: {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} valid workspace_changes accepted without warning"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} valid workspace_changes should not warn"
+  FAIL=$((FAIL + 1))
+fi
+
+# Check spec has workspace line
+WS_SPEC="$($FLOWCTL cat "${WS_EPIC}.1")"
+if echo "$WS_SPEC" | grep -q "5 files changed"; then
+  echo -e "${GREEN}✓${NC} workspace_changes rendered in spec markdown"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} workspace_changes not in spec"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: malformed workspace_changes triggers warning
+$FLOWCTL task reset "${WS_EPIC}.1" --json > /dev/null
+$FLOWCTL start "${WS_EPIC}.1" --force --json > /dev/null
+BAD_EVIDENCE='{"commits":[],"tests":[],"prs":[],"workspace_changes":{"baseline_rev":"aaa"}}'
+result="$($FLOWCTL done "${WS_EPIC}.1" --summary "done" --evidence "$BAD_EVIDENCE" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("status") == "done"
+assert "warning" in data, f"expected warning for missing keys: {data}"
+assert "missing keys" in data["warning"]
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} malformed workspace_changes warns but completes"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} malformed workspace_changes handling failed"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results
diff --git a/scripts/tests/test_memory.sh b/scripts/tests/test_memory.sh
new file mode 100755
index 00000000..cb09347a
--- /dev/null
+++ b/scripts/tests/test_memory.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+# Tests: memory init/add/list, memory verify + staleness, epic close retro suggestion
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== memory tests ===${NC}"
+
+echo -e "${YELLOW}--- memory commands ---${NC}"
+$FLOWCTL config set memory.enabled true --json >/dev/null
+$FLOWCTL memory init --json >/dev/null
+if [[ -d ".flow/memory/entries" ]]; then
+  echo -e "${GREEN}✓${NC} memory init creates entries dir"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} memory init creates entries dir"
+  FAIL=$((FAIL + 1))
+fi
+
+add_result="$($FLOWCTL memory add pitfall "Test pitfall entry" --json)"
+add_ok="$(echo "$add_result" | "$PYTHON_BIN" -c 'import json,sys; d=json.load(sys.stdin); print(d.get("success",False) and d.get("type")=="pitfall")')"
+if [[ "$add_ok" == "True" ]]; then
+  echo -e "${GREEN}✓${NC} memory add pitfall"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} memory add pitfall"
+  FAIL=$((FAIL + 1))
+fi
+
+$FLOWCTL memory add convention "Test convention" --json >/dev/null
+$FLOWCTL memory add decision "Test decision" --json >/dev/null
+list_json="$($FLOWCTL memory list --json)"
+"$PYTHON_BIN" - <<'PY' "$list_json"
+import json, sys
+data = json.loads(sys.argv[1])
+assert data["success"] == True
+counts = data["counts"]
+assert counts.get("pitfall", 0) >= 1
+assert counts.get("convention", 0) >= 1
+assert counts.get("decision", 0) >= 1
+assert data["total"] >= 3
+PY
+echo -e "${GREEN}✓${NC} memory list"
+PASS=$((PASS + 1))
+
+echo -e "\n${YELLOW}--- memory verify + staleness ---${NC}"
+
+# Add entry for verify test
+$FLOWCTL memory add pitfall "Test pitfall for verify" --json > /dev/null
+
+# Test 1: memory verify updates last_verified
+result="$($FLOWCTL memory verify 1 --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("id") == 1
+assert "last_verified" in data
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} memory verify updates last_verified"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} memory verify failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: memory list includes last_verified and stale flag in JSON
+result="$($FLOWCTL memory list --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+entry = data["index"][0]
+assert "last_verified" in entry, f"missing last_verified: {entry}"
+assert "stale" in entry, f"missing stale flag: {entry}"
+assert entry["stale"] == False, f"newly verified should not be stale: {entry}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} memory list shows last_verified + stale flag"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} memory list missing staleness fields"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 3: epic close includes retro_suggested
+EPC_EPIC_JSON="$($FLOWCTL epic create --title "Retro prompt test" --json)"
+EPC_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$EPC_EPIC_JSON")"
+$FLOWCTL task create --epic "$EPC_EPIC" --title "Done task" --json > /dev/null
+$FLOWCTL start "${EPC_EPIC}.1" --json > /dev/null
+$FLOWCTL done "${EPC_EPIC}.1" --summary "ok" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
+result="$($FLOWCTL epic close "$EPC_EPIC" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("retro_suggested") == True, f"missing retro_suggested: {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} epic close suggests retro"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} epic close missing retro suggestion"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results
diff --git a/scripts/tests/test_misc.sh b/scripts/tests/test_misc.sh
new file mode 100755
index 00000000..9212d7c2
--- /dev/null
+++ b/scripts/tests/test_misc.sh
@@ -0,0 +1,241 @@
+#!/usr/bin/env bash
+# Tests: schema validate, codex commands, depends_on_epics, stdin support,
+#        task set-spec, checkpoint, sync command files
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== misc tests ===${NC}"
+
+echo -e "${YELLOW}--- schema v1 validate ---${NC}"
+"$PYTHON_BIN" - <<'PY'
+import json
+from pathlib import Path
+path = Path(".flow/meta.json")
+data = json.loads(path.read_text())
+data["schema_version"] = 1
+path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
+PY
+$FLOWCTL validate --all --json >/dev/null
+echo -e "${GREEN}✓${NC} schema v1 validate"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- codex commands ---${NC}"
+# Test codex check (may or may not have codex installed)
+codex_check_json="$($FLOWCTL codex check --json 2>/dev/null || echo '{"success":true}')"
+"$PYTHON_BIN" - <<'PY' "$codex_check_json"
+import json, sys
+data = json.loads(sys.argv[1])
+assert data["success"] == True, f"codex check failed: {data}"
+PY
+echo -e "${GREEN}✓${NC} codex check"
+PASS=$((PASS + 1))
+
+# Test codex impl-review help (no codex required for argparse check)
+set +e
+$FLOWCTL codex impl-review --help >/dev/null 2>&1
+rc=$?
+set -e
+if [[ "$rc" -eq 0 ]]; then
+  echo -e "${GREEN}✓${NC} codex impl-review --help"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} codex impl-review --help"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test codex plan-review help
+set +e
+$FLOWCTL codex plan-review --help >/dev/null 2>&1
+rc=$?
+set -e
+if [[ "$rc" -eq 0 ]]; then
+  echo -e "${GREEN}✓${NC} codex plan-review --help"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} codex plan-review --help"
+  FAIL=$((FAIL + 1))
+fi
+
+echo -e "${YELLOW}--- depends_on_epics gate ---${NC}"
+# Create epics and capture their IDs
+DEP_BASE_JSON="$($FLOWCTL epic create --title "Dep base" --json)"
+DEP_BASE_ID="$(echo "$DEP_BASE_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$DEP_BASE_ID" --title "Base task" --json >/dev/null
+DEP_CHILD_JSON="$($FLOWCTL epic create --title "Dep child" --json)"
+DEP_CHILD_ID="$(echo "$DEP_CHILD_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+"$PYTHON_BIN" - "$DEP_CHILD_ID" "$DEP_BASE_ID" <<'PY'
+import json, sys
+from pathlib import Path
+child_id, base_id = sys.argv[1], sys.argv[2]
+path = Path(f".flow/epics/{child_id}.json")
+data = json.loads(path.read_text())
+data["depends_on_epics"] = [base_id]
+path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
+PY
+printf '{"epics":["%s"]}\n' "$DEP_CHILD_ID" > "$TEST_DIR/epics.json"
+blocked_json="$($FLOWCTL next --epics-file "$TEST_DIR/epics.json" --json)"
+"$PYTHON_BIN" - "$DEP_CHILD_ID" "$blocked_json" <<'PY'
+import json, sys
+child_id = sys.argv[1]
+data = json.loads(sys.argv[2])
+assert data["status"] == "none"
+assert data["reason"] == "blocked_by_epic_deps"
+assert child_id in data.get("blocked_epics", {})
+PY
+echo -e "${GREEN}✓${NC} depends_on_epics blocks"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- stdin support ---${NC}"
+STDIN_EPIC_JSON="$($FLOWCTL epic create --title "Stdin test" --json)"
+STDIN_EPIC="$(echo "$STDIN_EPIC_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+# Test epic set-plan with stdin
+$FLOWCTL epic plan "$STDIN_EPIC" --file - --json <<'EOF'
+# Stdin Test Plan
+
+## Overview
+Testing stdin support for set-plan.
+
+## Acceptance
+- Works via stdin
+EOF
+# Verify content was written
+spec_content="$($FLOWCTL cat "$STDIN_EPIC")"
+echo "$spec_content" | grep -q "Testing stdin support" || { echo "stdin set-plan failed"; FAIL=$((FAIL + 1)); }
+echo -e "${GREEN}✓${NC} stdin epic set-plan"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- task set-spec combined ---${NC}"
+$FLOWCTL task create --epic "$STDIN_EPIC" --title "Set-spec test" --json >/dev/null
+SETSPEC_TASK="${STDIN_EPIC}.1"
+# Write temp files for combined set-spec
+echo 'This is the description.' > "$TEST_DIR/desc.md"
+echo '- [ ] Check 1
+- [ ] Check 2' > "$TEST_DIR/acc.md"
+$FLOWCTL task set-spec "$SETSPEC_TASK" --description "$TEST_DIR/desc.md" --acceptance "$TEST_DIR/acc.md" --json >/dev/null
+# Verify both sections were written
+task_spec="$($FLOWCTL cat "$SETSPEC_TASK")"
+echo "$task_spec" | grep -q "This is the description" || { echo "set-spec description failed"; FAIL=$((FAIL + 1)); }
+echo "$task_spec" | grep -q "Check 1" || { echo "set-spec acceptance failed"; FAIL=$((FAIL + 1)); }
+echo -e "${GREEN}✓${NC} task set-spec combined"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- task set-spec --file (full replacement) ---${NC}"
+$FLOWCTL task create --epic "$STDIN_EPIC" --title "Full replacement test" --json >/dev/null
+FULLREPLACE_TASK="${STDIN_EPIC}.2"
+# Write complete spec file
+cat > "$TEST_DIR/full_spec.md" << 'FULLSPEC'
+# Task: Full replacement test
+
+## Description
+
+This is a completely new spec that replaces everything.
+
+## Acceptance
+
+- [ ] Verify full replacement works
+- [ ] Original content is gone
+FULLSPEC
+$FLOWCTL task set-spec "$FULLREPLACE_TASK" --file "$TEST_DIR/full_spec.md" --json >/dev/null
+# Verify full replacement
+full_spec="$($FLOWCTL cat "$FULLREPLACE_TASK")"
+echo "$full_spec" | grep -q "completely new spec that replaces everything" || { echo "set-spec --file content failed"; FAIL=$((FAIL + 1)); }
+echo "$full_spec" | grep -q "Verify full replacement works" || { echo "set-spec --file acceptance failed"; FAIL=$((FAIL + 1)); }
+echo -e "${GREEN}✓${NC} task set-spec --file"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- task set-spec --file stdin ---${NC}"
+$FLOWCTL task create --epic "$STDIN_EPIC" --title "Stdin replacement test" --json >/dev/null
+STDIN_REPLACE_TASK="${STDIN_EPIC}.3"
+# Full replacement via stdin
+$FLOWCTL task set-spec "$STDIN_REPLACE_TASK" --file - --json <<'EOF'
+# Task: Stdin replacement test
+
+## Description
+
+This spec was written via stdin.
+
+## Acceptance
+
+- [ ] Stdin replacement works
+EOF
+# Verify stdin replacement
+stdin_spec="$($FLOWCTL cat "$STDIN_REPLACE_TASK")"
+echo "$stdin_spec" | grep -q "spec was written via stdin" || { echo "set-spec --file stdin failed"; FAIL=$((FAIL + 1)); }
+echo -e "${GREEN}✓${NC} task set-spec --file stdin"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- checkpoint save/restore ---${NC}"
+# Save checkpoint
+$FLOWCTL checkpoint save --epic "$STDIN_EPIC" --json >/dev/null
+# Verify checkpoint file exists
+[[ -f ".flow/.checkpoint-${STDIN_EPIC}.json" ]] || { echo "checkpoint file not created"; FAIL=$((FAIL + 1)); }
+# Modify epic spec
+$FLOWCTL epic plan "$STDIN_EPIC" --file - --json <<'EOF'
+# Modified content
+EOF
+# Restore from checkpoint
+$FLOWCTL checkpoint restore --epic "$STDIN_EPIC" --json >/dev/null
+# Verify original content restored
+restored_spec="$($FLOWCTL cat "$STDIN_EPIC")"
+echo "$restored_spec" | grep -q "Testing stdin support" || { echo "checkpoint restore failed"; FAIL=$((FAIL + 1)); }
+# Delete checkpoint
+$FLOWCTL checkpoint delete --epic "$STDIN_EPIC" --json >/dev/null
+[[ ! -f ".flow/.checkpoint-${STDIN_EPIC}.json" ]] || { echo "checkpoint delete failed"; FAIL=$((FAIL + 1)); }
+echo -e "${GREEN}✓${NC} checkpoint save/restore/delete"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- sync command files ---${NC}"
+# Test 1: Command stub exists
+if [[ -f "$PLUGIN_ROOT/commands/flow-code/sync.md" ]]; then
+  echo -e "${GREEN}✓${NC} sync command stub exists"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} sync command stub missing"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: Skill file exists
+if [[ -f "$PLUGIN_ROOT/skills/flow-code-sync/SKILL.md" ]]; then
+  echo -e "${GREEN}✓${NC} sync skill exists"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} sync skill missing"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 3: Command invokes skill
+if grep -q "flow-code-sync" "$PLUGIN_ROOT/commands/flow-code/sync.md"; then
+  echo -e "${GREEN}✓${NC} sync command invokes skill"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} sync command doesn't reference skill"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 4: Skill has correct frontmatter
+if grep -q "name: flow-code-sync" "$PLUGIN_ROOT/skills/flow-code-sync/SKILL.md"; then
+  echo -e "${GREEN}✓${NC} sync skill has correct name"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} sync skill missing name frontmatter"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 5: Skill mentions plan-sync agent
+if grep -q "plan-sync" "$PLUGIN_ROOT/skills/flow-code-sync/SKILL.md"; then
+  echo -e "${GREEN}✓${NC} sync skill references plan-sync agent"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} sync skill doesn't reference plan-sync agent"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 6: Skill supports dry-run
+if grep -qi "dry.run\|dry-run\|DRY_RUN" "$PLUGIN_ROOT/skills/flow-code-sync/SKILL.md"; then
+  echo -e "${GREEN}✓${NC} sync skill supports dry-run"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} sync skill missing dry-run support"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results
diff --git a/scripts/tests/test_restart.sh b/scripts/tests/test_restart.sh
new file mode 100755
index 00000000..cf1a9246
--- /dev/null
+++ b/scripts/tests/test_restart.sh
@@ -0,0 +1,254 @@
+#!/usr/bin/env bash
+# Tests: restart command, status --interrupted, epic auto-execute
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== restart + status tests ===${NC}"
+
+echo -e "\n${YELLOW}--- restart command ---${NC}"
+
+# Setup: create epic + 3 tasks with deps: .1 -> .2 -> .3
+RST_EPIC_JSON="$($FLOWCTL epic create --title "Restart test" --json)"
+RST_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$RST_EPIC_JSON")"
+$FLOWCTL task create --epic "$RST_EPIC" --title "Task 1" --json > /dev/null
+$FLOWCTL task create --epic "$RST_EPIC" --title "Task 2" --deps "${RST_EPIC}.1" --json > /dev/null
+$FLOWCTL task create --epic "$RST_EPIC" --title "Task 3" --deps "${RST_EPIC}.2" --json > /dev/null
+
+# Complete tasks 1, 2, 3
+$FLOWCTL start "${RST_EPIC}.1" --json > /dev/null
+$FLOWCTL done "${RST_EPIC}.1" --summary "done" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
+$FLOWCTL start "${RST_EPIC}.2" --json > /dev/null
+$FLOWCTL done "${RST_EPIC}.2" --summary "done" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
+$FLOWCTL start "${RST_EPIC}.3" --json > /dev/null
+$FLOWCTL done "${RST_EPIC}.3" --summary "done" --evidence '{"commits":[],"tests":[],"prs":[]}' --json > /dev/null
+
+# Test 1: restart --dry-run shows what would be reset
+result="$($FLOWCTL restart "${RST_EPIC}.1" --dry-run --json)"
+"$PYTHON_BIN" - "$result" "$RST_EPIC" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+ep = sys.argv[2]
+assert data.get("dry_run") == True, f"expected dry_run=True, got {data}"
+assert f"{ep}.1" in data.get("would_reset", []), f"{ep}.1 not in would_reset: {data}"
+assert f"{ep}.2" in data.get("would_reset", []), f"{ep}.2 not in would_reset: {data}"
+assert f"{ep}.3" in data.get("would_reset", []), f"{ep}.3 not in would_reset: {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} restart --dry-run shows target + downstream"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} restart --dry-run failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: restart actually resets target + downstream
+result="$($FLOWCTL restart "${RST_EPIC}.1" --json)"
+"$PYTHON_BIN" - "$result" "$RST_EPIC" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+ep = sys.argv[2]
+assert data.get("success") == True, f"expected success, got {data}"
+assert f"{ep}.1" in data.get("reset", []), f"{ep}.1 not in reset: {data}"
+assert f"{ep}.2" in data.get("reset", []), f"{ep}.2 not in reset: {data}"
+assert f"{ep}.3" in data.get("reset", []), f"{ep}.3 not in reset: {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} restart cascades to downstream dependents"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} restart cascade failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 3: verify tasks are back to todo
+result="$($FLOWCTL show "${RST_EPIC}.1" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("status") == "todo", f"expected todo, got {data.get('status')}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} restarted task status is todo"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} restarted task not todo"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 4: restart already-todo is no-op
+result="$($FLOWCTL restart "${RST_EPIC}.1" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("success") == True
+assert len(data.get("reset", [])) == 0, f"expected empty reset, got {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} restart already-todo is idempotent no-op"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} restart idempotent check failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 5: restart rejects in_progress without --force
+$FLOWCTL start "${RST_EPIC}.1" --json > /dev/null
+set +e
+result="$($FLOWCTL restart "${RST_EPIC}.1" --json 2>&1)"
+rc=$?
+set -e
+"$PYTHON_BIN" - "$result" "$rc" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+rc = int(sys.argv[2])
+assert rc != 0, f"expected non-zero exit, got {rc}"
+assert "in progress" in data.get("error", "").lower() or "in_progress" in str(data).lower(), f"expected in_progress error: {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} restart blocks on in_progress without --force"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} restart should block in_progress"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 6: restart --force overrides in_progress
+result="$($FLOWCTL restart "${RST_EPIC}.1" --force --json)"
+"$PYTHON_BIN" - "$result" "$RST_EPIC" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+ep = sys.argv[2]
+assert data.get("success") == True
+assert f"{ep}.1" in data.get("reset", [])
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} restart --force overrides in_progress"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} restart --force failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# ── status --interrupted ──
+echo -e "\n${YELLOW}=== status --interrupted ===${NC}"
+
+# Create a second epic with todo tasks to test interrupted detection
+EPIC_INT_JSON="$($FLOWCTL epic create --title "Interrupted test epic" --json)"
+EPIC_INT="$(echo "$EPIC_INT_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$EPIC_INT" --title "Interrupted task 1" --json > /dev/null
+$FLOWCTL task create --epic "$EPIC_INT" --title "Interrupted task 2" --json > /dev/null
+
+# Test --interrupted --json detects epic with todo tasks
+int_json="$($FLOWCTL status --interrupted --json)"
+int_count="$(echo "$int_json" | "$PYTHON_BIN" -c '
+import json, sys
+data = json.load(sys.stdin)
+epics = data.get("interrupted", [])
+matching = [e for e in epics if e["id"] == "'"$EPIC_INT"'"]
+print(len(matching))
+')"
+if [[ "$int_count" == "1" ]]; then
+  echo -e "${GREEN}✓${NC} status --interrupted detects epic with todo tasks"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} status --interrupted did not detect epic (found $int_count)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Verify suggested command is included
+int_suggested="$(echo "$int_json" | "$PYTHON_BIN" -c '
+import json, sys
+data = json.load(sys.stdin)
+epics = data.get("interrupted", [])
+matching = [e for e in epics if e["id"] == "'"$EPIC_INT"'"]
+print(matching[0].get("suggested", "") if matching else "")
+')"
+if [[ "$int_suggested" == "/flow-code:work $EPIC_INT" ]]; then
+  echo -e "${GREEN}✓${NC} status --interrupted includes suggested resume command"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} status --interrupted wrong suggested (got: $int_suggested)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Verify task counts in interrupted output
+int_todo="$(echo "$int_json" | "$PYTHON_BIN" -c '
+import json, sys
+data = json.load(sys.stdin)
+epics = data.get("interrupted", [])
+matching = [e for e in epics if e["id"] == "'"$EPIC_INT"'"]
+print(matching[0].get("todo", 0) if matching else 0)
+')"
+if [[ "$int_todo" == "2" ]]; then
+  echo -e "${GREEN}✓${NC} status --interrupted reports correct todo count"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} status --interrupted wrong todo count (expected 2, got $int_todo)"
+  FAIL=$((FAIL + 1))
+fi
+
+# ── epic set-auto-execute ──
+echo -e "\n${YELLOW}=== epic set-auto-execute ===${NC}"
+
+# Create an epic with tasks for auto-execute testing
+EPIC_AE_JSON="$($FLOWCTL epic create --title "Auto execute test" --json)"
+EPIC_AE="$(echo "$EPIC_AE_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$EPIC_AE" --title "AE task 1" --json > /dev/null
+$FLOWCTL task create --epic "$EPIC_AE" --title "AE task 2" --json > /dev/null
+
+# Set pending marker
+ae_pending="$($FLOWCTL epic auto-exec "$EPIC_AE" --pending --json)"
+ae_pending_val="$(echo "$ae_pending" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["auto_execute_pending"])')"
+if [[ "$ae_pending_val" == "True" ]]; then
+  echo -e "${GREEN}✓${NC} set-auto-execute --pending sets marker"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} set-auto-execute --pending: expected True, got $ae_pending_val"
+  FAIL=$((FAIL + 1))
+fi
+
+# Verify --interrupted shows it with reason "planned_not_started"
+ae_int_json="$($FLOWCTL status --interrupted --json)"
+ae_reason="$(echo "$ae_int_json" | "$PYTHON_BIN" -c '
+import json, sys
+data = json.load(sys.stdin)
+epics = data.get("interrupted", [])
+matching = [e for e in epics if e["id"] == "'"$EPIC_AE"'"]
+print(matching[0].get("reason", "") if matching else "")
+')"
+if [[ "$ae_reason" == "planned_not_started" ]]; then
+  echo -e "${GREEN}✓${NC} --interrupted shows planned_not_started reason for pending epic"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} --interrupted wrong reason (expected planned_not_started, got: $ae_reason)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Clear marker with --done
+ae_done="$($FLOWCTL epic auto-exec "$EPIC_AE" --done --json)"
+ae_done_val="$(echo "$ae_done" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["auto_execute_pending"])')"
+if [[ "$ae_done_val" == "False" ]]; then
+  echo -e "${GREEN}✓${NC} set-auto-execute --done clears marker"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} set-auto-execute --done: expected False, got $ae_done_val"
+  FAIL=$((FAIL + 1))
+fi
+
+# Verify --interrupted now shows "partially_complete" reason (marker cleared)
+ae_int2_json="$($FLOWCTL status --interrupted --json)"
+ae_reason2="$(echo "$ae_int2_json" | "$PYTHON_BIN" -c '
+import json, sys
+data = json.load(sys.stdin)
+epics = data.get("interrupted", [])
+matching = [e for e in epics if e["id"] == "'"$EPIC_AE"'"]
+print(matching[0].get("reason", "") if matching else "")
+')"
+if [[ "$ae_reason2" == "partially_complete" ]]; then
+  echo -e "${GREEN}✓${NC} --interrupted shows partially_complete after marker cleared"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} --interrupted wrong reason after clear (expected partially_complete, got: $ae_reason2)"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results
diff --git a/scripts/tests/test_review.sh b/scripts/tests/test_review.sh
new file mode 100755
index 00000000..b3dfb428
--- /dev/null
+++ b/scripts/tests/test_review.sh
@@ -0,0 +1,271 @@
+#!/usr/bin/env bash
+# Tests: parse_receipt_path, review-backend --compare, review receipt archival, parse-findings
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== review tests ===${NC}"
+
+echo -e "${YELLOW}--- parse_receipt_path ---${NC}"
+# Test receipt path parsing for Ralph gating (both legacy and new fn-N-xxx formats)
+"$PYTHON_BIN" - "$PLUGIN_ROOT/hooks" <<'PY'
+import sys
+hooks_dir = sys.argv[1]
+sys.path.insert(0, hooks_dir)
+from importlib.util import spec_from_file_location, module_from_spec
+spec = spec_from_file_location("ralph_guard", f"{hooks_dir}/ralph-guard.py")
+guard = module_from_spec(spec)
+spec.loader.exec_module(guard)
+
+# Test plan receipt parsing (legacy format)
+rtype, rid = guard.parse_receipt_path("/tmp/receipts/plan-fn-1.json")
+assert rtype == "plan_review", f"Expected plan_review, got {rtype}"
+assert rid == "fn-1", f"Expected fn-1, got {rid}"
+
+# Test impl receipt parsing (legacy format)
+rtype, rid = guard.parse_receipt_path("/tmp/receipts/impl-fn-1.3.json")
+assert rtype == "impl_review", f"Expected impl_review, got {rtype}"
+assert rid == "fn-1.3", f"Expected fn-1.3, got {rid}"
+
+# Test plan receipt parsing (new fn-N-xxx format)
+rtype, rid = guard.parse_receipt_path("/tmp/receipts/plan-fn-5-x7k.json")
+assert rtype == "plan_review", f"Expected plan_review, got {rtype}"
+assert rid == "fn-5-x7k", f"Expected fn-5-x7k, got {rid}"
+
+# Test impl receipt parsing (new fn-N-xxx format)
+rtype, rid = guard.parse_receipt_path("/tmp/receipts/impl-fn-5-x7k.3.json")
+assert rtype == "impl_review", f"Expected impl_review, got {rtype}"
+assert rid == "fn-5-x7k.3", f"Expected fn-5-x7k.3, got {rid}"
+
+# Test completion receipt parsing (legacy format)
+rtype, rid = guard.parse_receipt_path("/tmp/receipts/completion-fn-2.json")
+assert rtype == "completion_review", f"Expected completion_review, got {rtype}"
+assert rid == "fn-2", f"Expected fn-2, got {rid}"
+
+# Test completion receipt parsing (new fn-N-xxx format)
+rtype, rid = guard.parse_receipt_path("/tmp/receipts/completion-fn-7-abc.json")
+assert rtype == "completion_review", f"Expected completion_review, got {rtype}"
+assert rid == "fn-7-abc", f"Expected fn-7-abc, got {rid}"
+
+# Test fallback
+rtype, rid = guard.parse_receipt_path("/tmp/unknown.json")
+assert rtype == "impl_review"
+assert rid == "UNKNOWN"
+PY
+echo -e "${GREEN}✓${NC} parse_receipt_path works"
+PASS=$((PASS + 1))
+
+echo -e "\n${YELLOW}--- review-backend --compare ---${NC}"
+
+# Create mock receipt files
+cat > "$TEST_DIR/receipt-codex.json" << 'EOF'
+{"type":"impl_review","id":"fn-1.1","mode":"codex","verdict":"SHIP","timestamp":"2026-03-30T00:00:00Z","review":"Looks good"}
+EOF
+cat > "$TEST_DIR/receipt-rp.json" << 'EOF'
+{"type":"impl_review","id":"fn-1.1","mode":"rp","verdict":"SHIP","timestamp":"2026-03-30T00:00:00Z","review":"LGTM"}
+EOF
+cat > "$TEST_DIR/receipt-conflict.json" << 'EOF'
+{"type":"impl_review","id":"fn-1.1","mode":"rp","verdict":"NEEDS_WORK","timestamp":"2026-03-30T00:00:00Z","review":"Needs fixes"}
+EOF
+
+# Test 1: compare with consensus (both SHIP)
+result="$($FLOWCTL review-backend --compare "$TEST_DIR/receipt-codex.json,$TEST_DIR/receipt-rp.json" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("consensus") == "SHIP", f"expected SHIP consensus, got {data}"
+assert data.get("has_conflict") == False, f"expected no conflict: {data}"
+assert data.get("reviews") == 2, f"expected 2 reviews: {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} review-backend --compare consensus detected"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} review-backend --compare consensus failed"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test 2: compare with conflict (SHIP vs NEEDS_WORK)
+result="$($FLOWCTL review-backend --compare "$TEST_DIR/receipt-codex.json,$TEST_DIR/receipt-conflict.json" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("has_conflict") == True, f"expected conflict: {data}"
+assert data.get("consensus") is None, f"expected no consensus: {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} review-backend --compare conflict detected"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} review-backend --compare conflict failed"
+  FAIL=$((FAIL + 1))
+fi
+
+echo -e "\n${YELLOW}--- review receipt archival ---${NC}"
+
+# Setup: create epic + task
+RR_EPIC_JSON="$($FLOWCTL epic create --title "Receipt test" --json)"
+RR_EPIC="$("$PYTHON_BIN" -c "import json,sys; print(json.loads(sys.argv[1])['id'])" "$RR_EPIC_JSON")"
+$FLOWCTL task create --epic "$RR_EPIC" --title "Task with review" --json > /dev/null
+$FLOWCTL start "${RR_EPIC}.1" --json > /dev/null
+
+# Test 1: done with review_receipt archives to .flow/reviews/
+RR_EVIDENCE="{\"commits\":[\"x1\"],\"tests\":[],\"prs\":[],\"review_receipt\":{\"type\":\"impl_review\",\"id\":\"${RR_EPIC}.1\",\"mode\":\"codex\",\"verdict\":\"SHIP\",\"timestamp\":\"2026-03-30T00:00:00Z\",\"review\":\"LGTM\"}}"
+$FLOWCTL done "${RR_EPIC}.1" --summary "done" --evidence "$RR_EVIDENCE" --json > /dev/null
+if [ -f ".flow/reviews/impl_review-${RR_EPIC}.1-codex.json" ]; then
+  echo -e "${GREEN}✓${NC} review receipt archived to .flow/reviews/"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} review receipt not archived"
+  FAIL=$((FAIL + 1))
+fi
+
+# Add a second receipt (simulate rp review)
+cat > ".flow/reviews/impl_review-${RR_EPIC}.1-rp.json" << 'EOF'
+{"type":"impl_review","id":"PLACEHOLDER","mode":"rp","verdict":"SHIP","timestamp":"2026-03-30T00:01:00Z","review":"Looks good"}
+EOF
+
+# Test 2: review-backend --epic auto-discovers receipts
+result="$($FLOWCTL review-backend --epic "$RR_EPIC" --json)"
+"$PYTHON_BIN" - "$result" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+assert data.get("reviews") == 2, f"expected 2 reviews, got {data.get('reviews')}"
+assert data.get("consensus") == "SHIP", f"expected SHIP consensus: {data}"
+PY
+if [ $? -eq 0 ]; then
+  echo -e "${GREEN}✓${NC} review-backend --epic auto-discovers receipts"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} review-backend --epic failed"
+  FAIL=$((FAIL + 1))
+fi
+
+echo -e "${YELLOW}--- parse-findings ---${NC}"
+
+# Test: valid <findings> tag
+FINDINGS_FILE="$TEST_DIR/findings_valid.txt"
+cat > "$FINDINGS_FILE" <<'FINDINGS_EOF'
+Some review preamble text.
+
+<findings>
+[
+  {
+    "title": "Missing input validation",
+    "severity": "critical",
+    "location": "src/auth.py:42",
+    "recommendation": "Add input sanitization"
+  },
+  {
+    "title": "Unused import",
+    "severity": "nitpick",
+    "location": "src/utils.py:1",
+    "recommendation": "Remove unused import"
+  }
+]
+</findings>
+
+More review text after.
+FINDINGS_EOF
+
+pf_result="$($FLOWCTL parse-findings --file "$FINDINGS_FILE" --json)"
+pf_count="$(echo "$pf_result" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
+if [[ "$pf_count" == "2" ]]; then
+  echo -e "${GREEN}✓${NC} parse-findings extracts findings from <findings> tag"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} parse-findings count wrong (expected 2, got $pf_count)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: missing <findings> tag -> graceful empty
+FINDINGS_EMPTY="$TEST_DIR/findings_empty.txt"
+echo "No findings here, just plain review text." > "$FINDINGS_EMPTY"
+
+pf_empty="$($FLOWCTL parse-findings --file "$FINDINGS_EMPTY" --json)"
+pf_empty_count="$(echo "$pf_empty" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
+pf_empty_warns="$(echo "$pf_empty" | "$PYTHON_BIN" -c 'import json,sys; w=json.load(sys.stdin).get("warnings",[]); print(len(w))')"
+if [[ "$pf_empty_count" == "0" ]] && [[ "$pf_empty_warns" -ge 1 ]]; then
+  echo -e "${GREEN}✓${NC} parse-findings gracefully handles missing tags"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} parse-findings missing tag handling wrong (count=$pf_empty_count, warns=$pf_empty_warns)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: malformed JSON (trailing commas)
+FINDINGS_MALFORMED="$TEST_DIR/findings_malformed.txt"
+cat > "$FINDINGS_MALFORMED" <<'FINDINGS_EOF'
+<findings>
+[
+  {
+    "title": "Trailing comma issue",
+    "severity": "major",
+    "location": "src/app.py:10",
+    "recommendation": "Fix the trailing comma",
+  },
+]
+</findings>
+FINDINGS_EOF
+
+pf_mal="$($FLOWCTL parse-findings --file "$FINDINGS_MALFORMED" --json)"
+pf_mal_count="$(echo "$pf_mal" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("count", 0))')"
+if [[ "$pf_mal_count" == "1" ]]; then
+  echo -e "${GREEN}✓${NC} parse-findings handles malformed JSON (trailing commas)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} parse-findings malformed JSON handling wrong (expected 1, got $pf_mal_count)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: --register auto gap add
+# Need an epic for gap registration
+REG_EPIC_JSON="$($FLOWCTL epic create --title "Findings register" --json)"
+REG_EPIC="$(echo "$REG_EPIC_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$REG_EPIC" --title "Task 1" --json >/dev/null
+
+FINDINGS_REG="$TEST_DIR/findings_register.txt"
+cat > "$FINDINGS_REG" <<'FINDINGS_EOF'
+<findings>
+[
+  {
+    "title": "SQL injection vulnerability",
+    "severity": "critical",
+    "location": "src/db.py:99",
+    "recommendation": "Use parameterized queries"
+  },
+  {
+    "title": "Minor typo in comment",
+    "severity": "minor",
+    "location": "src/main.py:5",
+    "recommendation": "Fix typo"
+  }
+]
+</findings>
+FINDINGS_EOF
+
+pf_reg="$($FLOWCTL parse-findings --file "$FINDINGS_REG" --epic "$REG_EPIC" --register --source plan-review --json)"
+pf_reg_registered="$(echo "$pf_reg" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("registered", 0))')"
+if [[ "$pf_reg_registered" == "1" ]]; then
+  echo -e "${GREEN}✓${NC} parse-findings --register adds critical/major gaps (skips minor)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} parse-findings --register wrong count (expected 1, got $pf_reg_registered)"
+  FAIL=$((FAIL + 1))
+fi
+
+# Verify the gap was actually created
+gap_reg_check="$($FLOWCTL gap list --epic "$REG_EPIC" --json | "$PYTHON_BIN" -c '
+import json, sys
+data = json.load(sys.stdin)
+gaps = data.get("gaps", [])
+sql_gaps = [g for g in gaps if "SQL injection" in g.get("capability", "")]
+print(len(sql_gaps))
+')"
+if [[ "$gap_reg_check" == "1" ]]; then
+  echo -e "${GREEN}✓${NC} parse-findings --register actually created the gap"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} parse-findings --register gap not found in registry (found $gap_reg_check)"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results
diff --git a/scripts/tests/test_scheduling.sh b/scripts/tests/test_scheduling.sh
new file mode 100755
index 00000000..1b5de636
--- /dev/null
+++ b/scripts/tests/test_scheduling.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# Tests: next plan/work/none, priority scheduling, artifact file resilience
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== scheduling tests ===${NC}"
+
+echo -e "${YELLOW}--- next: plan/work/none + priority ---${NC}"
+# Capture epic ID from create output (fn-N-xxx format)
+EPIC1_JSON="$($FLOWCTL epic create --title "Epic One" --json)"
+EPIC1="$(echo "$EPIC1_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$EPIC1" --title "Low pri" --priority 5 --json >/dev/null
+$FLOWCTL task create --epic "$EPIC1" --title "High pri" --priority 1 --json >/dev/null
+
+plan_json="$($FLOWCTL next --require-plan-review --json)"
+"$PYTHON_BIN" - "$plan_json" "$EPIC1" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+expected_epic = sys.argv[2]
+assert data["status"] == "plan"
+assert data["epic"] == expected_epic, f"Expected {expected_epic}, got {data['epic']}"
+PY
+echo -e "${GREEN}✓${NC} next plan"
+PASS=$((PASS + 1))
+
+$FLOWCTL epic review "$EPIC1" ship --json >/dev/null
+work_json="$($FLOWCTL next --json)"
+"$PYTHON_BIN" - "$work_json" "$EPIC1" <<'PY'
+import json, sys
+data = json.loads(sys.argv[1])
+expected_epic = sys.argv[2]
+assert data["status"] == "work"
+assert data["task"] == f"{expected_epic}.2", f"Expected {expected_epic}.2, got {data['task']}"
+PY
+echo -e "${GREEN}✓${NC} next work priority"
+PASS=$((PASS + 1))
+
+$FLOWCTL start "${EPIC1}.2" --json >/dev/null
+$FLOWCTL done "${EPIC1}.2" --summary-file "$TEST_DIR/summary.md" --evidence-json "$TEST_DIR/evidence.json" --json >/dev/null
+$FLOWCTL start "${EPIC1}.1" --json >/dev/null
+$FLOWCTL done "${EPIC1}.1" --summary-file "$TEST_DIR/summary.md" --evidence-json "$TEST_DIR/evidence.json" --json >/dev/null
+none_json="$($FLOWCTL next --json)"
+"$PYTHON_BIN" - <<'PY' "$none_json"
+import json, sys
+data = json.loads(sys.argv[1])
+assert data["status"] == "none"
+PY
+echo -e "${GREEN}✓${NC} next none"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- artifact files in tasks dir (GH-21) ---${NC}"
+# Create artifact files that match glob but aren't valid task files
+# This simulates Claude writing evidence/summary files to .flow/tasks/
+cat > ".flow/tasks/${EPIC1}.1-evidence.json" << 'EOF'
+{"commits":["abc123"],"tests":["npm test"],"prs":[]}
+EOF
+cat > ".flow/tasks/${EPIC1}.1-summary.json" << 'EOF'
+{"summary":"Task completed successfully"}
+EOF
+# Test that next still works with artifact files present
+set +e
+next_result="$($FLOWCTL next --json 2>&1)"
+next_rc=$?
+set -e
+if [[ "$next_rc" -eq 0 ]]; then
+  echo -e "${GREEN}✓${NC} next ignores artifact files"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} next crashes on artifact files: $next_result"
+  FAIL=$((FAIL + 1))
+fi
+# Test that list still works
+set +e
+list_result="$($FLOWCTL list --json 2>&1)"
+list_rc=$?
+set -e
+if [[ "$list_rc" -eq 0 ]]; then
+  echo -e "${GREEN}✓${NC} list ignores artifact files"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} list crashes on artifact files: $list_result"
+  FAIL=$((FAIL + 1))
+fi
+# Test that ready still works
+set +e
+ready_result="$($FLOWCTL ready --epic "$EPIC1" --json 2>&1)"
+ready_rc=$?
+set -e
+if [[ "$ready_rc" -eq 0 ]]; then
+  echo -e "${GREEN}✓${NC} ready ignores artifact files"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} ready crashes on artifact files: $ready_result"
+  FAIL=$((FAIL + 1))
+fi
+# Test that show (with tasks) still works
+set +e
+show_result="$($FLOWCTL show "$EPIC1" --json 2>&1)"
+show_rc=$?
+set -e
+if [[ "$show_rc" -eq 0 ]]; then
+  echo -e "${GREEN}✓${NC} show ignores artifact files"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} show crashes on artifact files: $show_result"
+  FAIL=$((FAIL + 1))
+fi
+# Test that validate still works
+set +e
+validate_result="$($FLOWCTL validate --epic "$EPIC1" --json 2>&1)"
+validate_rc=$?
+set -e
+if [[ "$validate_rc" -eq 0 ]]; then
+  echo -e "${GREEN}✓${NC} validate ignores artifact files"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} validate crashes on artifact files: $validate_result"
+  FAIL=$((FAIL + 1))
+fi
+# Cleanup artifact files
+rm -f ".flow/tasks/${EPIC1}.1-evidence.json" ".flow/tasks/${EPIC1}.1-summary.json"
+
+print_results
diff --git a/scripts/tests/test_worker.sh b/scripts/tests/test_worker.sh
new file mode 100755
index 00000000..6e086ba9
--- /dev/null
+++ b/scripts/tests/test_worker.sh
@@ -0,0 +1,233 @@
+#!/usr/bin/env bash
+# Tests: context hints, build_review_prompt, worker-prompt, worker-phase
+source "$(cd "$(dirname "$0")" && pwd)/common.sh"
+
+echo -e "${YELLOW}=== worker tests ===${NC}"
+
+# Create epic + tasks for worker tests
+EPIC1_JSON="$($FLOWCTL epic create --title "Worker Epic" --json)"
+EPIC1="$(echo "$EPIC1_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$EPIC1" --title "Task 1" --json >/dev/null
+
+echo -e "${YELLOW}--- context hints ---${NC}"
+# Create files in same commit, then modify one to test context hints
+mkdir -p "$TEST_DIR/repo/src"
+# First commit: both auth.py and handler.py together
+cat > "$TEST_DIR/repo/src/auth.py" << 'EOF'
+def validate_token(token: str) -> bool:
+    """Validate JWT token."""
+    return len(token) > 10
+
+class User:
+    def __init__(self, name: str):
+        self.name = name
+EOF
+cat > "$TEST_DIR/repo/src/handler.py" << 'EOF'
+from auth import validate_token, User
+
+def handle_request(token: str):
+    if validate_token(token):
+        return User("test")
+    return None
+EOF
+git -C "$TEST_DIR/repo" add src/
+git -C "$TEST_DIR/repo" commit -m "Add auth and handler" >/dev/null
+
+# Second commit: only modify auth.py (handler.py stays unchanged)
+cat > "$TEST_DIR/repo/src/auth.py" << 'EOF'
+def validate_token(token: str) -> bool:
+    """Validate JWT token with expiry check."""
+    if len(token) < 10:
+        return False
+    return True
+
+class User:
+    def __init__(self, name: str, email: str = ""):
+        self.name = name
+        self.email = email
+EOF
+git -C "$TEST_DIR/repo" add src/auth.py
+git -C "$TEST_DIR/repo" commit -m "Update auth with expiry" >/dev/null
+
+# Test context hints: should find handler.py referencing validate_token/User
+cd "$TEST_DIR/repo"
+hints_output="$(PYTHONPATH="$PLUGIN_ROOT/scripts" "$PYTHON_BIN" -c "
+from flowctl import gather_context_hints
+hints = gather_context_hints('HEAD~1')
+print(hints)
+" 2>&1)"
+
+# Verify hints mention handler.py referencing validate_token or User
+if echo "$hints_output" | grep -q "handler.py"; then
+  echo -e "${GREEN}✓${NC} context hints finds references"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} context hints finds references (got: $hints_output)"
+  FAIL=$((FAIL + 1))
+fi
+
+echo -e "${YELLOW}--- build_review_prompt ---${NC}"
+cd "$TEST_DIR/repo"
+# Test that build_review_prompt generates proper structure
+"$PYTHON_BIN" - "$PLUGIN_ROOT/scripts" <<'PY'
+import sys
+sys.path.insert(0, sys.argv[1])
+from flowctl import build_review_prompt
+
+# Test impl prompt has all 7 criteria
+impl_prompt = build_review_prompt("impl", "Test spec", "Test hints", "Test diff")
+assert "<review_instructions>" in impl_prompt
+assert "Correctness" in impl_prompt
+assert "Simplicity" in impl_prompt
+assert "DRY" in impl_prompt
+assert "Architecture" in impl_prompt
+assert "Edge Cases" in impl_prompt
+assert "Tests" in impl_prompt
+assert "Security" in impl_prompt
+assert "<verdict>SHIP</verdict>" in impl_prompt
+assert "File:Line" in impl_prompt  # Structured output format
+
+# Test plan prompt has all 7 criteria
+plan_prompt = build_review_prompt("plan", "Test spec", "Test hints")
+assert "Completeness" in plan_prompt
+assert "Feasibility" in plan_prompt
+assert "Clarity" in plan_prompt
+assert "Architecture" in plan_prompt
+assert "Risks" in plan_prompt
+assert "Scope" in plan_prompt
+assert "Testability" in plan_prompt
+assert "<verdict>SHIP</verdict>" in plan_prompt
+
+# Test context hints and diff are included
+assert "<context_hints>" in impl_prompt
+assert "Test hints" in impl_prompt
+assert "<diff_summary>" in impl_prompt
+assert "Test diff" in impl_prompt
+assert "<spec>" in impl_prompt
+assert "Test spec" in impl_prompt
+PY
+echo -e "${GREEN}✓${NC} build_review_prompt has full criteria"
+PASS=$((PASS + 1))
+
+echo -e "${YELLOW}--- worker-prompt ---${NC}"
+
+# Copy agents directory so worker-phase can find worker.md
+cp -r "$PLUGIN_ROOT/agents" "$TEST_DIR/repo/agents"
+
+# Test: worker-prompt default output (bootstrap mode)
+wp_json="$(CLAUDE_PLUGIN_ROOT="$TEST_DIR/repo" $FLOWCTL worker-prompt --task "${EPIC1}.1" --json)"
+wp_mode="$(echo "$wp_json" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["mode"])')"
+wp_tokens="$(echo "$wp_json" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["estimated_tokens"])')"
+if [[ "$wp_mode" == "bootstrap" ]] && [[ "$wp_tokens" -gt 0 ]] && [[ "$wp_tokens" -lt 300 ]]; then
+  echo -e "${GREEN}✓${NC} worker-prompt default: bootstrap mode, ${wp_tokens} tokens (<300)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-prompt default: expected mode=bootstrap and <300 tokens, got mode=$wp_mode tokens=$wp_tokens"
+  FAIL=$((FAIL + 1))
+fi
+
+echo -e "${YELLOW}--- worker-phase ---${NC}"
+
+# Create a fresh epic+task for phase testing
+EPIC_PH_JSON="$($FLOWCTL epic create --title "Phase test" --json)"
+EPIC_PH="$(echo "$EPIC_PH_JSON" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["id"])')"
+$FLOWCTL task create --epic "$EPIC_PH" --title "Phase task" --json >/dev/null
+$FLOWCTL start "${EPIC_PH}.1" --json >/dev/null
+
+# Test: worker-phase next returns phase 1 initially (worktree+teams default)
+wph_next="$($FLOWCTL worker-phase next --task "${EPIC_PH}.1" --json)"
+wph_phase="$(echo "$wph_next" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["phase"])')"
+wph_done="$(echo "$wph_next" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["all_done"])')"
+if [[ "$wph_phase" == "1" ]] && [[ "$wph_done" == "False" ]]; then
+  echo -e "${GREEN}✓${NC} worker-phase next: initial phase is 1 (worktree+teams default)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-phase next: expected phase=1 all_done=False, got phase=$wph_phase all_done=$wph_done"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: worker-phase done phase 1 -> next returns phase 2
+wph_next1="$wph_next"
+$FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase 1 --json >/dev/null
+wph_next1b="$($FLOWCTL worker-phase next --task "${EPIC_PH}.1" --json)"
+wph_phase1b="$(echo "$wph_next1b" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["phase"])')"
+if [[ "$wph_phase1b" == "2" ]]; then
+  echo -e "${GREEN}✓${NC} worker-phase done->next: advances to phase 2"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-phase done->next: expected phase=2, got $wph_phase1b"
+  FAIL=$((FAIL + 1))
+fi
+
+# Advance through phase 2 and 5 to test 6
+$FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase 2 --json >/dev/null
+$FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase 5 --json >/dev/null
+wph_next6="$($FLOWCTL worker-phase next --task "${EPIC_PH}.1" --json)"
+wph_phase6="$(echo "$wph_next6" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["phase"])')"
+if [[ "$wph_phase6" == "6" ]]; then
+  echo -e "${GREEN}✓${NC} worker-phase done->next: advances to phase 6"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-phase done->next: expected phase=6, got $wph_phase6"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: worker-phase skip detection — try to complete phase 10 before phase 6
+wph_skip_err="$($FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase 10 --json 2>&1 || true)"
+if echo "$wph_skip_err" | "$PYTHON_BIN" -c 'import json,sys; d=json.load(sys.stdin); assert d.get("error") or not d.get("success")' 2>/dev/null; then
+  echo -e "${GREEN}✓${NC} worker-phase skip detection: rejects out-of-order phase"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-phase skip detection: expected error for out-of-order, got: $wph_skip_err"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: worker-phase next returns content field (may be empty in streamlined mode)
+wph_has_content="$(echo "$wph_next1" | "$PYTHON_BIN" -c 'import json,sys; d=json.load(sys.stdin); print("content" in d)')"
+if [[ "$wph_has_content" == "True" ]]; then
+  echo -e "${GREEN}✓${NC} worker-phase next: content field present"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-phase next: content field missing"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: worker-phase next returns different titles for different phases (phase 1 vs phase 2)
+wph_title_p1="$(echo "$wph_next1" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("title",""))')"
+wph_title_p2="$(echo "$wph_next1b" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin).get("title",""))')"
+if [[ "$wph_title_p1" != "$wph_title_p2" ]] && [[ -n "$wph_title_p2" ]]; then
+  echo -e "${GREEN}✓${NC} worker-phase next: title changes between phases (1 vs 2)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-phase next: expected different title for phase 1 vs 2"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: worker-prompt --bootstrap outputs <300 tokens
+wp_boot_json="$(CLAUDE_PLUGIN_ROOT="$TEST_DIR/repo" $FLOWCTL worker-prompt --task "${EPIC1}.1" --bootstrap --json)"
+wp_boot_tokens="$(echo "$wp_boot_json" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["estimated_tokens"])')"
+wp_boot_mode="$(echo "$wp_boot_json" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["mode"])')"
+if [[ "$wp_boot_mode" == "bootstrap" ]] && [[ "$wp_boot_tokens" -lt 300 ]]; then
+  echo -e "${GREEN}✓${NC} worker-prompt --bootstrap: mode=bootstrap, ${wp_boot_tokens} tokens (<300)"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-prompt --bootstrap: expected mode=bootstrap and <300 tokens, got mode=$wp_boot_mode tokens=$wp_boot_tokens"
+  FAIL=$((FAIL + 1))
+fi
+
+# Test: complete all remaining default phases -> all_done
+# Phases 1, 2, 5 already done above; complete remaining: 6, 7, 9, 10, 11, 12
+for phase in 6 7 9 10 11 12; do
+  $FLOWCTL worker-phase done --task "${EPIC_PH}.1" --phase "$phase" --json >/dev/null
+done
+wph_final="$($FLOWCTL worker-phase next --task "${EPIC_PH}.1" --json)"
+wph_all_done="$(echo "$wph_final" | "$PYTHON_BIN" -c 'import json,sys; print(json.load(sys.stdin)["all_done"])')"
+if [[ "$wph_all_done" == "True" ]]; then
+  echo -e "${GREEN}✓${NC} worker-phase lifecycle: all phases complete"
+  PASS=$((PASS + 1))
+else
+  echo -e "${RED}✗${NC} worker-phase lifecycle: expected all_done=True, got $wph_all_done"
+  FAIL=$((FAIL + 1))
+fi
+
+print_results

From 69f65def1bb3612e2cfbf78923eabad409eccc5b Mon Sep 17 00:00:00 2001
From: z23cc <admin@z23.cc>
Date: Tue, 7 Apr 2026 17:48:45 +0800
Subject: [PATCH 4/7] feat(ci): add shell integration tests job and expand Rust
 test coverage [fn-140.5]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add parallel `integration` CI job that builds flowctl and runs
scripts/tests/run_all.sh with git configured for test isolation.

Add four new Rust integration test files covering approval, log,
outputs, and doctor workflows — following existing temp-dir + JSON
assertion patterns.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/ci.yml                      |  31 ++++
 .../crates/flowctl-cli/tests/approval_test.rs | 146 ++++++++++++++++++
 .../crates/flowctl-cli/tests/doctor_test.rs   |  84 ++++++++++
 flowctl/crates/flowctl-cli/tests/log_test.rs  | 114 ++++++++++++++
 .../crates/flowctl-cli/tests/outputs_test.rs  | 132 ++++++++++++++++
 5 files changed, 507 insertions(+)
 create mode 100644 flowctl/crates/flowctl-cli/tests/approval_test.rs
 create mode 100644 flowctl/crates/flowctl-cli/tests/doctor_test.rs
 create mode 100644 flowctl/crates/flowctl-cli/tests/log_test.rs
 create mode 100644 flowctl/crates/flowctl-cli/tests/outputs_test.rs

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e9083a54..5bde7d6f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -74,6 +74,37 @@ jobs:
       - name: Audit dependencies
         run: cargo install cargo-audit && cargo audit
 
+  integration:
+    name: Shell Integration Tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo registry & build
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            flowctl/target
+          key: ${{ runner.os }}-cargo-integration-${{ hashFiles('flowctl/Cargo.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-cargo-integration-
+
+      - name: Build flowctl
+        run: cd flowctl && cargo build --release
+
+      - name: Configure git for tests
+        run: |
+          git config --global user.email "ci@test.local"
+          git config --global user.name "CI Test"
+
+      - name: Run shell integration tests
+        run: bash scripts/tests/run_all.sh
+
   coverage:
     name: Test Coverage
     runs-on: ubuntu-latest
diff --git a/flowctl/crates/flowctl-cli/tests/approval_test.rs b/flowctl/crates/flowctl-cli/tests/approval_test.rs
new file mode 100644
index 00000000..88e07b73
--- /dev/null
+++ b/flowctl/crates/flowctl-cli/tests/approval_test.rs
@@ -0,0 +1,146 @@
+//! Integration tests for the approval request/resolve/list workflow.
+
+use serde_json::Value;
+use std::path::Path;
+use std::process::Command;
+
+fn flowctl_bin() -> std::path::PathBuf {
+    let path = std::path::PathBuf::from(env!("CARGO_BIN_EXE_flowctl"));
+    assert!(path.exists(), "flowctl binary not found at {path:?}");
+    path
+}
+
+fn run(work_dir: &Path, args: &[&str]) -> (String, i32) {
+    let mut cmd_args: Vec<&str> = vec!["--json"];
+    cmd_args.extend_from_slice(args);
+
+    let output = Command::new(flowctl_bin())
+        .args(&cmd_args)
+        .current_dir(work_dir)
+        .output()
+        .expect("Failed to run flowctl");
+
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let combined = if stdout.trim().is_empty() { stderr } else { stdout };
+    (combined, output.status.code().unwrap_or(-1))
+}
+
+fn parse_json(output: &str) -> Option<Value> {
+    for line in output.lines() {
+        let trimmed = line.trim();
+        if trimmed.starts_with('{') || trimmed.starts_with('[') {
+            if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
+                return Some(v);
+            }
+        }
+    }
+    serde_json::from_str(output.trim()).ok()
+}
+
+/// Set up .flow with an epic and a task, return (tmp_dir, epic_id, task_id).
+fn setup(prefix: &str) -> (tempfile::TempDir, String, String) {
+    let dir = tempfile::Builder::new().prefix(prefix).tempdir().unwrap();
+    run(dir.path(), &["init"]);
+
+    let (epic_out, _) = run(dir.path(), &["epic", "create", "--title", "Approval Epic"]);
+    let epic_id = parse_json(&epic_out).unwrap()["id"]
+        .as_str()
+        .unwrap()
+        .to_string();
+
+    let (task_out, _) = run(
+        dir.path(),
+        &["task", "create", "--epic", &epic_id, "--title", "Approval Task"],
+    );
+    let task_id = parse_json(&task_out).unwrap()["id"]
+        .as_str()
+        .unwrap()
+        .to_string();
+
+    (dir, epic_id, task_id)
+}
+
+#[test]
+fn approval_create_and_list() {
+    let (dir, _epic_id, task_id) = setup("approval_cl_");
+
+    // Create an approval request
+    let (out, exit) = run(
+        dir.path(),
+        &[
+            "approval", "create",
+            "--task", &task_id,
+            "--kind", "generic",
+            "--payload", r#"{"message":"need review"}"#,
+        ],
+    );
+    assert_eq!(exit, 0, "approval create failed: {out}");
+    let json = parse_json(&out).expect("approval create should return JSON");
+    assert!(json.get("id").is_some(), "approval should have an id");
+
+    // List approvals — should have at least one
+    let (list_out, list_exit) = run(dir.path(), &["approval", "list"]);
+    assert_eq!(list_exit, 0, "approval list failed: {list_out}");
+    let list_json = parse_json(&list_out).expect("approval list should return JSON");
+    // The list wraps results in "data"
+    let approvals = list_json["data"]
+        .as_array()
+        .or_else(|| list_json.as_array())
+        .expect("should have a data array");
+    assert!(!approvals.is_empty(), "should have at least one approval");
+}
+
+#[test]
+fn approval_approve_resolves() {
+    let (dir, _epic_id, task_id) = setup("approval_ap_");
+
+    // Create
+    let (out, _) = run(
+        dir.path(),
+        &[
+            "approval", "create",
+            "--task", &task_id,
+            "--kind", "file_access",
+            "--payload", r#"{"files":["src/main.rs"]}"#,
+        ],
+    );
+    let approval_id = parse_json(&out).unwrap()["id"]
+        .as_str()
+        .unwrap()
+        .to_string();
+
+    // Approve it
+    let (approve_out, approve_exit) = run(
+        dir.path(),
+        &["approval", "approve", &approval_id],
+    );
+    assert_eq!(approve_exit, 0, "approval approve failed: {approve_out}");
+}
+
+#[test]
+fn approval_reject_resolves() {
+    let (dir, _epic_id, task_id) = setup("approval_rj_");
+
+    // Create
+    let (out, _) = run(
+        dir.path(),
+        &[
+            "approval", "create",
+            "--task", &task_id,
+            "--kind", "generic",
+            "--payload", r#"{"question":"should we proceed?"}"#,
+        ],
+    );
+    let approval_id = parse_json(&out).unwrap()["id"]
+        .as_str()
+        .unwrap()
+        .to_string();
+
+    // Reject it
+    let (reject_out, reject_exit) = run(
+        dir.path(),
+        &["approval", "reject", &approval_id, "--reason", "not needed"],
+    );
+    assert_eq!(reject_exit, 0, "approval reject failed: {reject_out}");
+}
diff --git a/flowctl/crates/flowctl-cli/tests/doctor_test.rs b/flowctl/crates/flowctl-cli/tests/doctor_test.rs
new file mode 100644
index 00000000..8cbdc16c
--- /dev/null
+++ b/flowctl/crates/flowctl-cli/tests/doctor_test.rs
@@ -0,0 +1,84 @@
+//! Integration tests for the doctor command (state health diagnostics).
+
+use serde_json::Value;
+use std::path::Path;
+use std::process::Command;
+
+fn flowctl_bin() -> std::path::PathBuf {
+    let path = std::path::PathBuf::from(env!("CARGO_BIN_EXE_flowctl"));
+    assert!(path.exists(), "flowctl binary not found at {path:?}");
+    path
+}
+
+fn run(work_dir: &Path, args: &[&str]) -> (String, i32) {
+    let mut cmd_args: Vec<&str> = vec!["--json"];
+    cmd_args.extend_from_slice(args);
+
+    let output = Command::new(flowctl_bin())
+        .args(&cmd_args)
+        .current_dir(work_dir)
+        .output()
+        .expect("Failed to run flowctl");
+
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let combined = if stdout.trim().is_empty() { stderr } else { stdout };
+    (combined, output.status.code().unwrap_or(-1))
+}
+
+fn parse_json(output: &str) -> Option<Value> {
+    for line in output.lines() {
+        let trimmed = line.trim();
+        if trimmed.starts_with('{') || trimmed.starts_with('[') {
+            if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
+                return Some(v);
+            }
+        }
+    }
+    serde_json::from_str(output.trim()).ok()
+}
+
+#[test]
+fn doctor_healthy_state() {
+    let dir = tempfile::Builder::new().prefix("doctor_ok_").tempdir().unwrap();
+    run(dir.path(), &["init"]);
+
+    let (out, exit) = run(dir.path(), &["doctor"]);
+    assert_eq!(exit, 0, "doctor should pass on healthy state: {out}");
+    let json = parse_json(&out).expect("doctor should return JSON");
+    // Should have a healthy indicator
+    assert!(
+        json.get("healthy").is_some() || json.get("checks").is_some() || json.get("status").is_some(),
+        "doctor output should contain health info: {json}"
+    );
+}
+
+#[test]
+fn doctor_no_flow_dir() {
+    let dir = tempfile::Builder::new().prefix("doctor_nf_").tempdir().unwrap();
+
+    let (_out, exit) = run(dir.path(), &["doctor"]);
+    // Without .flow/ dir, doctor should fail or report unhealthy
+    assert_ne!(exit, 0, "doctor without .flow/ should fail");
+}
+
+#[test]
+fn doctor_with_data() {
+    let dir = tempfile::Builder::new().prefix("doctor_wd_").tempdir().unwrap();
+    run(dir.path(), &["init"]);
+
+    // Create some state
+    let (epic_out, _) = run(dir.path(), &["epic", "create", "--title", "Doctor Epic"]);
+    let epic_id = parse_json(&epic_out).unwrap()["id"]
+        .as_str()
+        .unwrap()
+        .to_string();
+    run(
+        dir.path(),
+        &["task", "create", "--epic", &epic_id, "--title", "Doctor Task"],
+    );
+
+    let (out, exit) = run(dir.path(), &["doctor"]);
+    assert_eq!(exit, 0, "doctor should pass with data: {out}");
+    assert!(parse_json(&out).is_some(), "doctor should return valid JSON");
+}
diff --git a/flowctl/crates/flowctl-cli/tests/log_test.rs b/flowctl/crates/flowctl-cli/tests/log_test.rs
new file mode 100644
index 00000000..1c65bc4e
--- /dev/null
+++ b/flowctl/crates/flowctl-cli/tests/log_test.rs
@@ -0,0 +1,114 @@
+//! Integration tests for the decision log (log decision / log decisions).
+
+use serde_json::Value;
+use std::path::Path;
+use std::process::Command;
+
+fn flowctl_bin() -> std::path::PathBuf {
+    let path = std::path::PathBuf::from(env!("CARGO_BIN_EXE_flowctl"));
+    assert!(path.exists(), "flowctl binary not found at {path:?}");
+    path
+}
+
+fn run(work_dir: &Path, args: &[&str]) -> (String, i32) {
+    let mut cmd_args: Vec<&str> = vec!["--json"];
+    cmd_args.extend_from_slice(args);
+
+    let output = Command::new(flowctl_bin())
+        .args(&cmd_args)
+        .current_dir(work_dir)
+        .output()
+        .expect("Failed to run flowctl");
+
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let combined = if stdout.trim().is_empty() { stderr } else { stdout };
+    (combined, output.status.code().unwrap_or(-1))
+}
+
+fn parse_json(output: &str) -> Option<Value> {
+    for line in output.lines() {
+        let trimmed = line.trim();
+        if trimmed.starts_with('{') || trimmed.starts_with('[') {
+            if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
+                return Some(v);
+            }
+        }
+    }
+    serde_json::from_str(output.trim()).ok()
+}
+
+fn init_flow(prefix: &str) -> tempfile::TempDir {
+    let dir = tempfile::Builder::new().prefix(prefix).tempdir().unwrap();
+    run(dir.path(), &["init"]);
+    dir
+}
+
+#[test]
+fn log_decision_and_query() {
+    let dir = init_flow("log_dq_");
+
+    // Record a decision
+    let (out, exit) = run(
+        dir.path(),
+        &[
+            "log", "decision",
+            "--key", "review_backend",
+            "--value", "rp-mcp",
+            "--reason", "RP available and faster",
+        ],
+    );
+    assert_eq!(exit, 0, "log decision failed: {out}");
+    let json = parse_json(&out).expect("log decision should return JSON");
+    assert!(json.get("id").is_some() || json.get("key").is_some(), "should have id or key");
+
+    // Query decisions
+    let (list_out, list_exit) = run(dir.path(), &["log", "decisions"]);
+    assert_eq!(list_exit, 0, "log decisions failed: {list_out}");
+    let list_json = parse_json(&list_out).expect("log decisions should return JSON");
+    let decisions = list_json["decisions"]
+        .as_array()
+        .or_else(|| list_json.as_array())
+        .expect("should have a decisions array");
+    assert!(!decisions.is_empty(), "should have at least one decision");
+}
+
+#[test]
+fn log_decision_with_epic_scope() {
+    let dir = init_flow("log_ep_");
+
+    // Create an epic to scope to
+    let (epic_out, _) = run(dir.path(), &["epic", "create", "--title", "Log Epic"]);
+    let epic_id = parse_json(&epic_out).unwrap()["id"]
+        .as_str()
+        .unwrap()
+        .to_string();
+
+    // Record a scoped decision
+    let (out, exit) = run(
+        dir.path(),
+        &[
+            "log", "decision",
+            "--key", "branch_strategy",
+            "--value", "worktree",
+            "--reason", "parallel work needed",
+            "--epic", &epic_id,
+        ],
+    );
+    assert_eq!(exit, 0, "scoped log decision failed: {out}");
+
+    // Query scoped
+    let (list_out, list_exit) = run(
+        dir.path(),
+        &["log", "decisions", "--epic", &epic_id],
+    );
+    assert_eq!(list_exit, 0, "scoped log decisions failed: {list_out}");
+}
+
+#[test]
+fn log_decisions_empty() {
+    let dir = init_flow("log_empty_");
+
+    let (out, exit) = run(dir.path(), &["log", "decisions"]);
+    assert_eq!(exit, 0, "log decisions on empty should succeed: {out}");
+}
diff --git a/flowctl/crates/flowctl-cli/tests/outputs_test.rs b/flowctl/crates/flowctl-cli/tests/outputs_test.rs
new file mode 100644
index 00000000..e671a234
--- /dev/null
+++ b/flowctl/crates/flowctl-cli/tests/outputs_test.rs
@@ -0,0 +1,132 @@
+//! Integration tests for the outputs write/list/show workflow.
+
+use serde_json::Value;
+use std::path::Path;
+use std::process::Command;
+
+fn flowctl_bin() -> std::path::PathBuf {
+    let path = std::path::PathBuf::from(env!("CARGO_BIN_EXE_flowctl"));
+    assert!(path.exists(), "flowctl binary not found at {path:?}");
+    path
+}
+
+fn run(work_dir: &Path, args: &[&str]) -> (String, i32) {
+    let mut cmd_args: Vec<&str> = vec!["--json"];
+    cmd_args.extend_from_slice(args);
+
+    let output = Command::new(flowctl_bin())
+        .args(&cmd_args)
+        .current_dir(work_dir)
+        .output()
+        .expect("Failed to run flowctl");
+
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let combined = if stdout.trim().is_empty() { stderr } else { stdout };
+    (combined, output.status.code().unwrap_or(-1))
+}
+
+fn parse_json(output: &str) -> Option<Value> {
+    for line in output.lines() {
+        let trimmed = line.trim();
+        if trimmed.starts_with('{') || trimmed.starts_with('[') {
+            if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
+                return Some(v);
+            }
+        }
+    }
+    serde_json::from_str(output.trim()).ok()
+}
+
+/// Set up .flow with an epic and a task, return (tmp_dir, epic_id, task_id).
+fn setup(prefix: &str) -> (tempfile::TempDir, String, String) {
+    let dir = tempfile::Builder::new().prefix(prefix).tempdir().unwrap();
+    run(dir.path(), &["init"]);
+
+    let (epic_out, _) = run(dir.path(), &["epic", "create", "--title", "Outputs Epic"]);
+    let epic_id = parse_json(&epic_out).unwrap()["id"]
+        .as_str()
+        .unwrap()
+        .to_string();
+
+    let (task_out, _) = run(
+        dir.path(),
+        &["task", "create", "--epic", &epic_id, "--title", "Outputs Task"],
+    );
+    let task_id = parse_json(&task_out).unwrap()["id"]
+        .as_str()
+        .unwrap()
+        .to_string();
+
+    (dir, epic_id, task_id)
+}
+
+#[test]
+fn outputs_write_and_list() {
+    let (dir, epic_id, task_id) = setup("outputs_wl_");
+
+    // Write output content from a file
+    let content_file = dir.path().join("output_content.md");
+    std::fs::write(&content_file, "## Summary\nTask completed successfully.\n").unwrap();
+
+    let (out, exit) = run(
+        dir.path(),
+        &[
+            "outputs", "write", &task_id,
+            "--file", content_file.to_str().unwrap(),
+        ],
+    );
+    assert_eq!(exit, 0, "outputs write failed: {out}");
+
+    // List outputs for the epic
+    let (list_out, list_exit) = run(
+        dir.path(),
+        &["outputs", "list", "--epic", &epic_id],
+    );
+    assert_eq!(list_exit, 0, "outputs list failed: {list_out}");
+    let list_json = parse_json(&list_out).expect("outputs list should return JSON");
+    // Outputs list wraps results in "entries"
+    let outputs = list_json["entries"]
+        .as_array()
+        .or_else(|| list_json.as_array())
+        .expect("should have an entries array");
+    assert!(!outputs.is_empty(), "should have at least one output");
+}
+
+#[test]
+fn outputs_show() {
+    let (dir, _epic_id, task_id) = setup("outputs_sh_");
+
+    // Write output content
+    let content_file = dir.path().join("show_content.md");
+    std::fs::write(&content_file, "## Details\nImplementation notes here.\n").unwrap();
+
+    let (_, write_exit) = run(
+        dir.path(),
+        &[
+            "outputs", "write", &task_id,
+            "--file", content_file.to_str().unwrap(),
+        ],
+    );
+    assert_eq!(write_exit, 0);
+
+    // Show the output
+    let (show_out, show_exit) = run(dir.path(), &["outputs", "show", &task_id]);
+    assert_eq!(show_exit, 0, "outputs show failed: {show_out}");
+    // Output should contain our content
+    assert!(
+        show_out.contains("Implementation notes") || parse_json(&show_out).is_some(),
+        "show output should contain our content or be valid JSON"
+    );
+}
+
+#[test]
+fn outputs_list_empty() {
+    let (dir, epic_id, _task_id) = setup("outputs_em_");
+
+    let (out, exit) = run(
+        dir.path(),
+        &["outputs", "list", "--epic", &epic_id],
+    );
+    assert_eq!(exit, 0, "outputs list on empty should succeed: {out}");
+}

From 38e1dabd4b2716c4085d4019441f54aef7b49862 Mon Sep 17 00:00:00 2001
From: z23cc <admin@z23.cc>
Date: Tue, 7 Apr 2026 17:48:55 +0800
Subject: [PATCH 5/7] docs(flowctl): document all CLI commands and fix stale
 entries [fn-140.3]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add documentation for ~30 previously undocumented commands: doctor,
review-backend, dag, estimate, replay, diff, plan-depth, approval
(create/list/show/approve/reject), log (decision/decisions), outputs
(write/list/show), ralph (pause/resume/stop/status), scout-cache
(get/set/clear), skill (register/match), hook (7 subcommands), stats
(summary/epic/weekly/tokens/bottlenecks/dora/rollup/cleanup), files,
lock, unlock, heartbeat, lock-check, queue, fail, restart, dep rm,
task skip, task split, epic reopen/title/archive/clean/audit/add-dep/
rm-dep/auto-exec, export, import, completions.

Fix stale command names: epic set-plan → epic plan, epic
set-plan-review-status → epic review, epic set-completion-review-status
→ epic completion, epic set-branch → epic branch, task
set-description/set-acceptance/set-spec → task spec (unified).

Remove stale prep-chat entry (command removed from CLI).
Update Available Commands summary at top of file.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/flowctl.md | 538 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 472 insertions(+), 66 deletions(-)

diff --git a/docs/flowctl.md b/docs/flowctl.md
index 1cd5ea71..def6cc28 100644
--- a/docs/flowctl.md
+++ b/docs/flowctl.md
@@ -7,7 +7,13 @@ CLI for `.flow/` task tracking. Agents must use flowctl for all writes.
 ## Available Commands
 
 ```
-init, detect, epic, task, dep, gap, show, epics, files, lock, unlock, lock-check, tasks, list, cat, ready, queue, next, start, done, restart, block, validate, config, invariants, guard, stack, review-backend, memory, parse-findings, prep-chat, rp, codex, checkpoint, status, state-path, worker-prompt, worker-phase, doctor, ralph
+init, detect, status, doctor, validate, state-path, review-backend, parse-findings,
+guard, worker-prompt, dag, estimate, replay, diff, plan-depth,
+config, epic, task, dep, approval, gap, log, memory, outputs, checkpoint,
+stack, invariants, ralph, scout-cache, skill, rp, codex, hook, stats, worker-phase,
+show, epics, tasks, list, cat, files, lock, unlock, heartbeat, lock-check,
+ready, next, queue, start, done, restart, block, fail,
+export, import, completions
 ```
 
 ## Multi-User Safety
@@ -87,36 +93,38 @@ Output:
 {"success": true, "id": "fn-1-epic-title", "title": "Epic title", "spec_path": ".flow/specs/fn-1-epic-title.md"}
 ```
 
-### epic set-plan
+### epic plan
 
 Overwrite epic spec from file.
 
 ```bash
-flowctl epic set-plan fn-1 --file plan.md [--json]
+flowctl epic plan fn-1 --file plan.md [--json]
 ```
 
-### epic set-plan-review-status
+Use `-` as file to read from stdin.
 
-Set plan review status and timestamp.
+### epic review
+
+Set plan review status.
 
 ```bash
-flowctl epic set-plan-review-status fn-1 --status ship|needs_work|unknown [--json]
+flowctl epic review fn-1 ship|needs_work|unknown [--json]
 ```
 
-### epic set-completion-review-status
+### epic completion
 
-Set completion review status and timestamp.
+Set completion review status.
 
 ```bash
-flowctl epic set-completion-review-status fn-1 --status ship|needs_work|unknown [--json]
+flowctl epic completion fn-1 ship|needs_work|unknown [--json]
 ```
 
-### epic set-branch
+### epic branch
 
-Set epic branch_name.
+Set epic branch name.
 
 ```bash
-flowctl epic set-branch fn-1 --branch "fn-1-epic" [--json]
+flowctl epic branch fn-1 fn-1-epic [--json]
 ```
 
 ### epic close
@@ -157,31 +165,28 @@ Output:
 {"success": true, "id": "fn-1.4", "epic": "fn-1", "title": "Task title", "depends_on": ["fn-1.2", "fn-1.3"]}
 ```
 
-### task set-description
+### task spec
 
-Set task description section.
+Set task spec: full file or individual sections.
 
 ```bash
-flowctl task set-description fn-1.2 --file desc.md [--json]
-```
+# Full spec from file
+flowctl task spec fn-1.2 --file spec.md [--json]
 
-### task set-acceptance
+# Individual sections
+flowctl task spec fn-1.2 --desc desc.md --accept accept.md [--json]
 
-Set task acceptance section.
-
-```bash
-flowctl task set-acceptance fn-1.2 --file accept.md [--json]
+# With investigation targets
+flowctl task spec fn-1.2 --investigation targets.md [--json]
 ```
 
-### task set-spec
-
-Set description and acceptance in one call (fewer writes).
-
-```bash
-flowctl task set-spec fn-1.2 --description desc.md --acceptance accept.md [--json]
-```
+Options:
+- `--file FILE`: Full spec file (replaces entire body)
+- `--desc FILE`: Description section file (alias: `--description`)
+- `--accept FILE`: Acceptance section file (alias: `--acceptance`)
+- `--investigation FILE`: Investigation targets section file
 
-Both `--description` and `--acceptance` are optional; supply one or both.
+All section flags are optional; supply one or more.
 
 ### task reset
 
@@ -661,43 +666,6 @@ Output:
 
 Without `--register`, the `registered` field is omitted.
 
-### prep-chat
-
-Generate properly escaped JSON for RepoPrompt chat. Avoids shell escaping issues with complex prompts.
-Optional legacy positional arg is ignored; do not pass epic/task IDs.
-
-```bash
-# Write message to file (avoids escaping issues)
-cat > /tmp/prompt.md << 'EOF'
-Your multi-line prompt with "quotes", $variables, and `backticks`.
-EOF
-
-# Generate JSON
-flowctl prep-chat \
-  --message-file /tmp/prompt.md \
-  --mode chat \
-  [--new-chat] \
-  [--chat-name "Review Name"] \
-  [--selected-paths file1.ts file2.ts] \
-  [-o /tmp/payload.json]
-
-# Prefer flowctl rp chat-send (uses this internally)
-flowctl rp chat-send --window W --tab T --message-file /tmp/prompt.md
-```
-
-Options:
-- `--message-file FILE` (required): File containing the message text
-- `--mode {chat,ask}`: Chat mode (default: chat)
-- `--new-chat`: Start a new chat session
-- `--chat-name NAME`: Name for the new chat
-- `--selected-paths FILE...`: Files to include in context (for follow-ups)
-- `-o, --output FILE`: Write JSON to file (default: stdout)
-
-Output (stdout or file):
-```json
-{"message": "...", "mode": "chat", "new_chat": true, "chat_name": "...", "selected_paths": ["..."]}
-```
-
 ### rp
 
 RepoPrompt wrappers (preferred for reviews). Requires RepoPrompt 1.5.68+.
@@ -916,6 +884,444 @@ Source values:
 - `git-common-dir` — `git --git-common-dir` (shared across worktrees)
 - `fallback` — `.flow/state` (non-git or old git)
 
+### doctor
+
+Run comprehensive state health diagnostics.
+
+```bash
+flowctl doctor [--workflow] [--json]
+```
+
+Options:
+- `--workflow`: Run workflow-specific health checks (backend config, tools, locks)
+
+### review-backend
+
+Get review backend and compare review receipts.
+
+```bash
+# Detect configured backend
+flowctl review-backend [--json]
+
+# Compare receipts from specific files
+flowctl review-backend --compare receipt1.json,receipt2.json [--json]
+
+# Auto-discover receipts for an epic
+flowctl review-backend --epic fn-1 [--json]
+```
+
+Options:
+- `--compare FILES`: Comma-separated review receipt file paths
+- `--epic EPIC_ID`: Auto-discover review receipts for epic
+
+### dag
+
+Render ASCII DAG of task dependencies for an epic.
+
+```bash
+flowctl dag fn-1 [--json]
+```
+
+Also available via `flowctl status --dag --epic fn-1`.
+
+### estimate
+
+Estimate remaining time for an epic based on historical task durations.
+
+```bash
+flowctl estimate --epic fn-1 [--json]
+```
+
+### replay
+
+Replay an epic: reset all tasks to `todo` for re-execution.
+
+```bash
+flowctl replay fn-1 [--dry-run] [--force] [--json]
+```
+
+Options:
+- `--dry-run`: Show what would be reset without doing it
+- `--force`: Allow replay even if tasks are `in_progress`
+
+### diff
+
+Show git diff summary for an epic's branch.
+
+```bash
+flowctl diff fn-1 [--json]
+```
+
+### plan-depth
+
+Classify request depth for adaptive plan step selection.
+
+```bash
+flowctl plan-depth --request "Add OAuth support" [--json]
+```
+
+### approval
+
+Approval commands for requesting/resolving blocking decisions (Teams mode).
+
+```bash
+# Create a pending approval
+flowctl approval create --task fn-1.2 --kind file_access --payload '{"file":"src/auth.rs"}' [--json]
+flowctl approval create --task fn-1.2 --kind mutation --payload @request.json [--json]
+
+# List approvals
+flowctl approval list [--pending] [--json]
+
+# Show a single approval (optionally wait for resolution)
+flowctl approval show <id> [--wait] [--timeout 300] [--json]
+
+# Approve or reject
+flowctl approval approve <id> [--json]
+flowctl approval reject <id> [--reason "..."] [--json]
+```
+
+Approval kinds: `file_access`, `mutation`, `generic`. Payload accepts inline JSON or `@path/to/file.json`.
+
+### log
+
+Decision logging for workflow traceability.
+
+```bash
+# Record a decision
+flowctl log decision --key "review_backend" --value "rp-mcp" --reason "RP available" [--epic fn-1] [--task fn-1.2] [--json]
+
+# Query stored decisions
+flowctl log decisions [--epic fn-1] [--limit 20] [--json]
+```
+
+### outputs
+
+Narrative handoff between tasks. Workers write outputs in Phase 9; successors read them during Phase 2 re-anchor.
+
+```bash
+# Write output for a task (from file or stdin)
+flowctl outputs write fn-1.3 --file output.md [--json]
+flowctl outputs write fn-1.3 --file - [--json]  # stdin
+
+# List outputs for an epic (newest-first)
+flowctl outputs list --epic fn-1 [--limit 10] [--json]
+
+# Show full output content
+flowctl outputs show fn-1.3 [--json]
+```
+
+### ralph
+
+Ralph autonomous run control.
+
+```bash
+flowctl ralph pause [--run <id>] [--json]
+flowctl ralph resume [--run <id>] [--json]
+flowctl ralph stop [--run <id>] [--json]
+flowctl ralph status [--run <id>] [--json]
+```
+
+Run ID is auto-detected if only one active run exists.
+
+### scout-cache
+
+Scout result cache commands. Caches keyed by scout type + git commit hash.
+
+```bash
+# Get cached result
+flowctl scout-cache get --scout-type repo [--commit <hash>] [--json]
+
+# Set (cache) a result
+flowctl scout-cache set --scout-type repo --result '{"findings":[]}' [--commit <hash>] [--json]
+flowctl scout-cache set --scout-type capability --result @result.json [--json]
+
+# Clear all cached results
+flowctl scout-cache clear [--json]
+```
+
+Commit hash auto-detected from HEAD if omitted. Result accepts inline JSON or `@path/to/file.json`.
+
+### skill
+
+Skill registry commands with semantic vector search.
+
+```bash
+# Scan and register skills from skills/*/SKILL.md
+flowctl skill register [--dir /path/to/plugin] [--json]
+
+# Semantic search against registered skills
+flowctl skill match "implement OAuth" [--limit 5] [--threshold 0.70] [--json]
+```
+
+Options for `match`:
+- `--limit N`: Maximum results (default: 5)
+- `--threshold F`: Minimum cosine similarity (default: 0.70)
+
+### hook
+
+Claude Code hook scripts (invoked automatically by hooks.json, not manually).
+
+```bash
+flowctl hook auto-memory        # Extract session memories (Stop hook)
+flowctl hook ralph-guard        # Enforce Ralph workflow rules
+flowctl hook commit-gate        # Gate commit on guard pass
+flowctl hook pre-compact        # Inject .flow/ state into compaction
+flowctl hook subagent-context   # Inject active task context for subagents
+flowctl hook task-completed     # Sync Claude task completion with .flow/
+flowctl hook rtk-rewrite        # Rewrite Bash commands via rtk optimizer
+```
+
+All hooks read JSON from stdin and use exit codes 0 (allow) and 2 (block).
+
+### stats
+
+Stats dashboard with summary, trends, tokens, and DORA metrics.
+
+```bash
+# Overall summary
+flowctl stats summary [--json]
+
+# Per-epic breakdown
+flowctl stats epic [--id fn-1] [--json]
+
+# Weekly trends
+flowctl stats weekly [--weeks 8] [--json]
+
+# Token/cost breakdown
+flowctl stats tokens [--epic fn-1] [--json]
+
+# Bottleneck analysis
+flowctl stats bottlenecks [--limit 10] [--json]
+
+# DORA metrics
+flowctl stats dora [--json]
+
+# Maintenance
+flowctl stats rollup [--json]    # Generate monthly rollups
+flowctl stats cleanup [--json]   # Delete old events/rollups
+```
+
+### files
+
+Show file ownership map for an epic.
+
+```bash
+flowctl files --epic fn-1 [--json]
+```
+
+Shows which tasks own which files and detects ownership conflicts.
+
+### lock
+
+Lock files for a task (Teams mode). Prevents other workers from modifying locked files.
+
+```bash
+flowctl lock --task fn-1.2 --files src/auth.rs,src/config.rs [--mode write] [--json]
+```
+
+Options:
+- `--task ID` (required): Task ID that owns the files
+- `--files PATHS` (required): Comma-separated file paths
+- `--mode MODE`: Lock mode — `read`, `write`, or `directory_add` (default: `write`)
+
+### unlock
+
+Unlock files for a task (Teams mode).
+
+```bash
+flowctl unlock --task fn-1.2 [--files src/auth.rs] [--json]
+flowctl unlock --all [--json]
+```
+
+Options:
+- `--task ID`: Task ID to unlock files for
+- `--files PATHS`: Specific files to unlock (all task files if omitted)
+- `--all`: Clear ALL file locks (used between waves)
+
+### heartbeat
+
+Extend lock TTL for a task (Teams mode heartbeat).
+
+```bash
+flowctl heartbeat --task fn-1.2 [--json]
+```
+
+### lock-check
+
+Check file lock status (Teams mode).
+
+```bash
+flowctl lock-check [--file src/auth.rs] [--json]
+```
+
+Shows all active locks, or lock state for a specific file.
+
+### queue
+
+Show multi-epic queue status.
+
+```bash
+flowctl queue [--json]
+```
+
+### fail
+
+Mark task as failed. Triggers `upstream_failed` propagation to downstream dependents.
+
+```bash
+flowctl fail fn-1.2 [--reason "..."] [--force] [--json]
+```
+
+Options:
+- `--reason TEXT`: Reason for failure
+- `--force`: Skip status checks
+
+### restart
+
+Restart task and cascade-reset downstream dependents.
+
+```bash
+flowctl restart fn-1.2 [--dry-run] [--force] [--json]
+```
+
+Options:
+- `--dry-run`: Show what would be reset without doing it
+- `--force`: Allow restart even if tasks are `in_progress`
+
+### dep rm
+
+Remove a dependency between tasks.
+
+```bash
+flowctl dep rm fn-1.3 fn-1.2 [--json]
+```
+
+### task skip
+
+Skip a task (mark as permanently skipped). Downstream deps treat skipped as satisfied.
+
+```bash
+flowctl task skip fn-1.2 [--reason "Not needed after refactor"] [--json]
+```
+
+### task split
+
+Split a task into sub-tasks (runtime DAG mutation).
+
+```bash
+flowctl task split fn-1.2 --titles "Parse config|Validate config|Apply config" [--chain] [--json]
+```
+
+Options:
+- `--titles TEXT` (required): Sub-task titles separated by `|`
+- `--chain`: Chain sub-tasks sequentially (each depends on the previous)
+
+### epic reopen
+
+Reopen a closed epic.
+
+```bash
+flowctl epic reopen fn-1 [--json]
+```
+
+### epic title
+
+Rename an epic's title.
+
+```bash
+flowctl epic title fn-1 --title "New title" [--json]
+```
+
+### epic archive
+
+Archive a closed epic to `.flow/.archive/`.
+
+```bash
+flowctl epic archive fn-1 [--force] [--json]
+```
+
+Options:
+- `--force`: Archive even if not closed
+
+### epic clean
+
+Archive all closed epics at once.
+
+```bash
+flowctl epic clean [--json]
+```
+
+### epic audit
+
+Audit epic task-coverage vs original spec (advisory only).
+
+```bash
+flowctl epic audit fn-1 [--force] [--json]
+```
+
+Assembles epic spec, task list, and prior audit context into a payload for the epic-auditor agent. Writes to `.flow/reviews/epic-audit-<id>-<timestamp>.json`. Never mutates epic/tasks/gaps.
+
+Options:
+- `--force`: Force a new audit even if a recent (<24h) receipt exists
+
+### epic add-dep
+
+Add epic-level dependency.
+
+```bash
+flowctl epic add-dep fn-2 fn-1 [--json]
+```
+
+Makes `fn-2` depend on `fn-1`.
+
+### epic rm-dep
+
+Remove epic-level dependency.
+
+```bash
+flowctl epic rm-dep fn-2 fn-1 [--json]
+```
+
+### epic auto-exec
+
+Set or clear auto-execute pending marker.
+
+```bash
+flowctl epic auto-exec fn-1 --pending [--json]
+flowctl epic auto-exec fn-1 --done [--json]
+```
+
+### export
+
+Export epics/tasks from DB to Markdown files.
+
+```bash
+flowctl export [--epic fn-1] [--format md] [--json]
+```
+
+Exports all epics if `--epic` is omitted.
+
+### import
+
+Import epics/tasks from Markdown files into DB (alias for reindex).
+
+```bash
+flowctl import [--json]
+```
+
+Rebuilds the DB from `.flow/` Markdown files.
+
+### completions
+
+Generate shell completions.
+
+```bash
+flowctl completions bash > ~/.bash_completion.d/flowctl
+flowctl completions zsh > ~/.zfunc/_flowctl
+flowctl completions fish > ~/.config/fish/completions/flowctl.fish
+```
+
+Supported shells: `bash`, `elvish`, `fish`, `powershell`, `zsh`.
 
 ## Ralph Receipts
 

From f7e86afe2c2d656deb46154389e2622996057b8f Mon Sep 17 00:00:00 2001
From: z23cc <admin@z23.cc>
Date: Tue, 7 Apr 2026 17:55:44 +0800
Subject: [PATCH 6/7] perf(hooks): cache RTK probe result to avoid fork on
 every Bash call [fn-140.6]

The rtk_rewrite hook previously ran `command -v rtk` on every Bash tool
call. For the 99% of users without rtk installed, this was a wasted
subprocess fork. Cache the probe result in $TMPDIR/flowctl-rtk-probe
with a 1-hour TTL so subsequent calls skip the probe entirely.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../src/commands/hook/rtk_rewrite.rs          | 57 ++++++++++++++++---
 1 file changed, 48 insertions(+), 9 deletions(-)

diff --git a/flowctl/crates/flowctl-cli/src/commands/hook/rtk_rewrite.rs b/flowctl/crates/flowctl-cli/src/commands/hook/rtk_rewrite.rs
index a594ea61..0af7d255 100644
--- a/flowctl/crates/flowctl-cli/src/commands/hook/rtk_rewrite.rs
+++ b/flowctl/crates/flowctl-cli/src/commands/hook/rtk_rewrite.rs
@@ -1,11 +1,57 @@
 //! RTK Rewrite hook: rewrite Bash commands via rtk token optimizer (PreToolUse hook).
 
+use std::fs;
+use std::path::PathBuf;
 use std::process::Command;
+use std::time::SystemTime;
 
 use serde_json::json;
 
 use super::common::read_stdin_json;
 
+/// Cache TTL for RTK probe result (1 hour).
+const RTK_PROBE_CACHE_TTL_SECS: u64 = 3600;
+
+/// Returns the path for the RTK probe cache file.
+fn rtk_probe_cache_path() -> PathBuf {
+    let tmp = std::env::var("TMPDIR").unwrap_or_else(|_| "/tmp".into());
+    PathBuf::from(tmp).join("flowctl-rtk-probe")
+}
+
+/// Check if rtk is available, using a file-based cache to avoid repeated `command -v rtk` calls.
+/// Returns true if rtk is installed and available.
+fn is_rtk_available() -> bool {
+    let cache_path = rtk_probe_cache_path();
+
+    // Check cache: if file exists and is fresh, use cached result
+    if let Ok(metadata) = fs::metadata(&cache_path) {
+        let is_fresh = metadata
+            .modified()
+            .ok()
+            .and_then(|mtime| SystemTime::now().duration_since(mtime).ok())
+            .map(|age| age.as_secs() < RTK_PROBE_CACHE_TTL_SECS)
+            .unwrap_or(false);
+
+        if is_fresh {
+            if let Ok(content) = fs::read_to_string(&cache_path) {
+                return content.trim() == "found";
+            }
+        }
+    }
+
+    // Cache miss or stale — probe for rtk
+    let available = Command::new("sh")
+        .args(["-c", "command -v rtk"])
+        .output()
+        .map(|o| o.status.success())
+        .unwrap_or(false);
+
+    // Write result to cache (best-effort)
+    let _ = fs::write(&cache_path, if available { "found" } else { "not-found" });
+
+    available
+}
+
 pub fn cmd_rtk_rewrite() {
     let hook_input = read_stdin_json();
 
@@ -20,15 +66,8 @@ pub fn cmd_rtk_rewrite() {
         std::process::exit(0);
     }
 
-    // Check if rtk is installed
-    let rtk_available = Command::new("sh")
-        .args(["-c", "command -v rtk"])
-        .output()
-        .map(|o| o.status.success())
-        .unwrap_or(false);
-
-    if !rtk_available {
-        // rtk not installed — silent passthrough
+    if !is_rtk_available() {
+        // rtk not installed — silent passthrough (cached fast path)
         std::process::exit(0);
     }
 

From 1629ea69e4cad48e3d234489ef06122594ec6be8 Mon Sep 17 00:00:00 2001
From: z23cc <admin@z23.cc>
Date: Tue, 7 Apr 2026 21:36:52 +0800
Subject: [PATCH 7/7] feat(flowctl): add event-sourced pipeline-first
 architecture [fn-2]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three pillars implemented:
1. Event Sourcing — FlowEvent enums (EpicEvent + TaskEvent) with typed
   payloads, event_store table with (stream_id, version) unique index,
   EventStoreRepo with append/query/rebuild, criterion benchmarks
2. Pipeline Phase System — PipelinePhase state machine (plan → plan-review
   → work → impl-review → close), `flowctl phase next/done` CLI commands,
   pipeline_progress table for per-epic phase tracking
3. Unified Entry Skill — flow-code-run SKILL.md with phase loop pattern,
   5 existing orchestration skills marked deprecated

Event emission is additive: lifecycle methods (start, done, block, fail,
restart) and ChangesApplier now emit typed events alongside existing
JSON/DB state mutations. Event store is authoritative; existing tables
serve as materialized read model.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                                     |   5 +-
 commands/flow-code/run.md                     |  12 +
 flowctl/Cargo.lock                            | 169 ++++++++-
 .../src/commands/workflow/lifecycle.rs        |  48 +++
 .../flowctl-cli/src/commands/workflow/mod.rs  |   4 +-
 .../src/commands/workflow/pipeline_phase.rs   | 169 +++++++++
 flowctl/crates/flowctl-cli/src/main.rs        |  16 +-
 flowctl/crates/flowctl-core/src/events.rs     | 192 ++++++++++
 flowctl/crates/flowctl-core/src/lib.rs        |   3 +
 flowctl/crates/flowctl-core/src/pipeline.rs   | 173 +++++++++
 flowctl/crates/flowctl-db/Cargo.toml          |   6 +
 .../crates/flowctl-db/benches/event_store.rs  |  68 ++++
 flowctl/crates/flowctl-db/src/lib.rs          |   6 +-
 flowctl/crates/flowctl-db/src/migration.rs    |  49 ++-
 flowctl/crates/flowctl-db/src/pool.rs         |   2 +
 .../crates/flowctl-db/src/repo/event_store.rs | 343 ++++++++++++++++++
 flowctl/crates/flowctl-db/src/repo/mod.rs     |   2 +
 flowctl/crates/flowctl-db/src/schema.sql      |  24 ++
 flowctl/crates/flowctl-service/src/changes.rs |  46 ++-
 .../crates/flowctl-service/src/lifecycle.rs   |  40 ++
 flowctl/tests/cmd/next_json.toml              |   2 +-
 flowctl/tests/cmd/validate_json.toml          |   1 -
 skills/flow-code-epic-review/SKILL.md         |   3 +
 skills/flow-code-impl-review/SKILL.md         |   3 +
 skills/flow-code-plan-review/SKILL.md         |   3 +
 skills/flow-code-plan/SKILL.md                |   3 +
 skills/flow-code-run/SKILL.md                 |  87 +++++
 skills/flow-code-work/SKILL.md                |   3 +
 28 files changed, 1470 insertions(+), 12 deletions(-)
 create mode 100644 commands/flow-code/run.md
 create mode 100644 flowctl/crates/flowctl-cli/src/commands/workflow/pipeline_phase.rs
 create mode 100644 flowctl/crates/flowctl-core/src/events.rs
 create mode 100644 flowctl/crates/flowctl-core/src/pipeline.rs
 create mode 100644 flowctl/crates/flowctl-db/benches/event_store.rs
 create mode 100644 flowctl/crates/flowctl-db/src/repo/event_store.rs
 create mode 100644 skills/flow-code-run/SKILL.md

diff --git a/CLAUDE.md b/CLAUDE.md
index 90c716d2..2113e779 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -18,7 +18,7 @@ hooks/hooks.json         → Ralph workflow guards (active when FLOW_RALPH=1)
 docs/                    → Architecture docs, CI examples
 ```
 
-**Skills**: 8 core + 22 extensions. See `docs/skills.md` for the full classification. Core workflow: plan → plan-review → work → impl-review → epic-review.
+**Skills**: 8 core + 22 extensions. See `docs/skills.md` for the full classification. Core workflow: `flow-code-run` (unified phase loop) or individual skills: plan → plan-review → work → impl-review → epic-review.
 
 **Key invariant**: The `bin/flowctl` Rust binary is the single source of truth for `.flow/` state. Always invoke as:
 ```bash
@@ -28,6 +28,9 @@ $FLOWCTL <command>
 
 ## Primary Workflow
 
+**Unified entry point** (preferred): `/flow-code:run "description"` — drives the entire pipeline (plan → plan-review → work → impl-review → close) via `flowctl phase next/done`. One command, zero manual phase transitions.
+
+Individual phase commands (deprecated, still functional):
 1. `/flow-code:plan "description"` → creates epic + tasks in `.flow/`
 2. `/flow-code:plan-review` → Carmack-level review via RepoPrompt or Codex
 3. `/flow-code:work <epic-id>` → executes tasks with Teams mode (auto-parallel with file locking)
diff --git a/commands/flow-code/run.md b/commands/flow-code/run.md
new file mode 100644
index 00000000..6e54aa2d
--- /dev/null
+++ b/commands/flow-code/run.md
@@ -0,0 +1,12 @@
+---
+name: run
+description: Unified pipeline entry point for plan-first development
+---
+
+# IMPORTANT: This command MUST invoke the skill flow-code-run
+
+The ONLY purpose of this command is to call the flow-code-run skill. You MUST use that skill now.
+
+**User request:** $ARGUMENTS
+
+Pass the user request to the skill. The skill handles all pipeline logic.
diff --git a/flowctl/Cargo.lock b/flowctl/Cargo.lock
index 13d5100d..47acdfbf 100644
--- a/flowctl/Cargo.lock
+++ b/flowctl/Cargo.lock
@@ -55,6 +55,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
 [[package]]
 name = "annotate-snippets"
 version = "0.12.5"
@@ -280,6 +286,12 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
 [[package]]
 name = "castaway"
 version = "0.2.4"
@@ -334,6 +346,33 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
 [[package]]
 name = "clang-sys"
 version = "1.8.1"
@@ -470,6 +509,44 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "futures",
+ "is-terminal",
+ "itertools 0.10.5",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "tokio",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools 0.10.5",
+]
+
 [[package]]
 name = "crossbeam-deque"
 version = "0.8.6"
@@ -495,6 +572,12 @@ version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
 [[package]]
 name = "crypto-common"
 version = "0.1.7"
@@ -779,6 +862,7 @@ name = "flowctl-db"
 version = "0.1.0"
 dependencies = [
  "chrono",
+ "criterion",
  "fastembed",
  "flowctl-core",
  "libsql",
@@ -985,6 +1069,17 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
 
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "zerocopy 0.8.48",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
@@ -1013,6 +1108,12 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
 [[package]]
 name = "hf-hub"
 version = "0.4.3"
@@ -1343,6 +1444,17 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "is-terminal"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "is_ci"
 version = "1.2.0"
@@ -1355,6 +1467,15 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
 
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itertools"
 version = "0.14.0"
@@ -1750,6 +1871,12 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
 [[package]]
 name = "option-ext"
 version = "0.2.0"
@@ -1865,6 +1992,34 @@ version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
 
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
 [[package]]
 name = "portable-atomic"
 version = "1.13.1"
@@ -2045,7 +2200,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f"
 dependencies = [
  "either",
- "itertools",
+ "itertools 0.14.0",
  "rayon",
 ]
 
@@ -2674,6 +2829,16 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "tinyvec"
 version = "1.10.0"
@@ -2702,7 +2867,7 @@ dependencies = [
  "derive_builder",
  "esaxx-rs",
  "getrandom 0.3.4",
- "itertools",
+ "itertools 0.14.0",
  "log",
  "macro_rules_attribute",
  "monostate",
diff --git a/flowctl/crates/flowctl-cli/src/commands/workflow/lifecycle.rs b/flowctl/crates/flowctl-cli/src/commands/workflow/lifecycle.rs
index 4bc1bd5b..648c88a7 100644
--- a/flowctl/crates/flowctl-cli/src/commands/workflow/lifecycle.rs
+++ b/flowctl/crates/flowctl-cli/src/commands/workflow/lifecycle.rs
@@ -7,6 +7,7 @@ use serde_json::json;
 use crate::output::{error_exit, json_output};
 
 use flowctl_core::state_machine::Status;
+use flowctl_db::EventStoreRepo;
 use flowctl_service::lifecycle::{
     BlockTaskRequest, DoneTaskRequest, FailTaskRequest, RestartTaskRequest, StartTaskRequest,
 };
@@ -234,3 +235,50 @@ pub fn cmd_restart(json_mode: bool, id: String, dry_run: bool, force: bool) {
         Err(e) => error_exit(&e.to_string()),
     }
 }
+
+pub fn cmd_events(json_mode: bool, epic_id: String) {
+    let _flow_dir = ensure_flow_exists();
+    let conn = try_open_lsql_conn();
+
+    let conn = match conn {
+        Some(c) => c,
+        None => {
+            error_exit("Cannot open database for event store query");
+        }
+    };
+
+    let repo = EventStoreRepo::new(conn);
+
+    // Query both the epic stream and all task streams for this epic
+    let prefixes = vec![
+        format!("epic:{epic_id}"),
+        format!("task:{epic_id}."),
+    ];
+
+    match block_on(repo.query_by_stream_prefixes(&prefixes)) {
+        Ok(events) => {
+            if json_mode {
+                let items: Vec<serde_json::Value> = events
+                    .iter()
+                    .map(|e| serde_json::to_value(e).unwrap_or_default())
+                    .collect();
+                json_output(json!({
+                    "epic": epic_id,
+                    "count": events.len(),
+                    "events": items,
+                }));
+            } else if events.is_empty() {
+                println!("No events found for epic {epic_id}");
+            } else {
+                println!("Events for epic {} ({} total):\n", epic_id, events.len());
+                for e in &events {
+                    println!(
+                        "  [{}] {} v{} — {} ({})",
+                        e.event_id, e.stream_id, e.version, e.event_type, e.created_at,
+                    );
+                }
+            }
+        }
+        Err(e) => error_exit(&format!("Failed to query events: {e}")),
+    }
+}
diff --git a/flowctl/crates/flowctl-cli/src/commands/workflow/mod.rs b/flowctl/crates/flowctl-cli/src/commands/workflow/mod.rs
index 8dd11e4c..e81b2e3a 100644
--- a/flowctl/crates/flowctl-cli/src/commands/workflow/mod.rs
+++ b/flowctl/crates/flowctl-cli/src/commands/workflow/mod.rs
@@ -3,11 +3,13 @@
 
 mod lifecycle;
 mod phase;
+mod pipeline_phase;
 mod scheduling;
 
 // Re-export all public items so callers see the same API.
-pub use lifecycle::{cmd_block, cmd_done, cmd_fail, cmd_restart, cmd_start};
+pub use lifecycle::{cmd_block, cmd_done, cmd_events, cmd_fail, cmd_restart, cmd_start};
 pub use phase::{dispatch_worker_phase, WorkerPhaseCmd};
+pub use pipeline_phase::{dispatch_pipeline_phase, PipelinePhaseCmd};
 pub use scheduling::{cmd_next, cmd_queue, cmd_ready};
 
 use std::collections::HashMap;
diff --git a/flowctl/crates/flowctl-cli/src/commands/workflow/pipeline_phase.rs b/flowctl/crates/flowctl-cli/src/commands/workflow/pipeline_phase.rs
new file mode 100644
index 00000000..3da1aadf
--- /dev/null
+++ b/flowctl/crates/flowctl-cli/src/commands/workflow/pipeline_phase.rs
@@ -0,0 +1,169 @@
+//! Pipeline phase commands: `flowctl phase next` and `flowctl phase done`.
+//!
+//! These commands manage the epic-level pipeline progression stored in the
+//! `pipeline_progress` table. Distinct from worker-phase (task-level phases).
+
+use clap::Subcommand;
+use serde_json::json;
+
+use flowctl_core::pipeline::PipelinePhase;
+
+use crate::output::{error_exit, json_output};
+
+use super::require_db;
+
+/// Pipeline phase subcommands.
+#[derive(Subcommand, Debug)]
+pub enum PipelinePhaseCmd {
+    /// Show current pipeline phase for an epic.
+    Next {
+        /// Epic ID.
+        #[arg(long)]
+        epic: String,
+    },
+    /// Mark current phase as done and advance to next.
+    Done {
+        /// Epic ID.
+        #[arg(long)]
+        epic: String,
+        /// Phase name to mark done (must match current phase).
+        #[arg(long)]
+        phase: String,
+    },
+}
+
+/// Dispatch pipeline phase subcommands.
+pub fn dispatch_pipeline_phase(cmd: &PipelinePhaseCmd, json: bool) {
+    match cmd {
+        PipelinePhaseCmd::Next { epic } => cmd_phase_next(json, epic),
+        PipelinePhaseCmd::Done { epic, phase } => cmd_phase_done(json, epic, phase),
+    }
+}
+
+/// Read current pipeline phase from DB. If no row exists, initialize to Plan.
+fn get_or_init_phase(epic_id: &str) -> PipelinePhase {
+    let conn = require_db();
+    let raw = conn.inner_conn();
+    let rt = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .expect("tokio runtime");
+
+    rt.block_on(async {
+        let mut rows = raw
+            .query(
+                "SELECT phase FROM pipeline_progress WHERE epic_id = ?1",
+                libsql::params![epic_id],
+            )
+            .await
+            .unwrap_or_else(|e| {
+                error_exit(&format!("DB query failed: {e}"));
+            });
+
+        if let Some(row) = rows.next().await.unwrap_or(None) {
+            let phase_str: String = row.get(0).unwrap_or_else(|_| "plan".to_string());
+            PipelinePhase::parse(&phase_str).unwrap_or(PipelinePhase::Plan)
+        } else {
+            // No row — initialize with Plan phase.
+            let now = chrono::Utc::now().to_rfc3339();
+            raw.execute(
+                "INSERT INTO pipeline_progress (epic_id, phase, started_at, updated_at) VALUES (?1, ?2, ?3, ?4)",
+                libsql::params![epic_id, "plan", now.clone(), now],
+            )
+            .await
+            .unwrap_or_else(|e| {
+                error_exit(&format!("DB insert failed: {e}"));
+            });
+            PipelinePhase::Plan
+        }
+    })
+}
+
+/// Update pipeline phase in DB.
+fn update_phase(epic_id: &str, new_phase: &PipelinePhase) {
+    let conn = require_db();
+    let raw = conn.inner_conn();
+    let rt = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .expect("tokio runtime");
+
+    rt.block_on(async {
+        let now = chrono::Utc::now().to_rfc3339();
+        raw.execute(
+            "UPDATE pipeline_progress SET phase = ?1, updated_at = ?2 WHERE epic_id = ?3",
+            libsql::params![new_phase.as_str(), now, epic_id],
+        )
+        .await
+        .unwrap_or_else(|e| {
+            error_exit(&format!("DB update failed: {e}"));
+        });
+    });
+}
+
+/// `flowctl phase next --epic <id> --json`
+fn cmd_phase_next(json: bool, epic_id: &str) {
+    let current = get_or_init_phase(epic_id);
+    let all_done = current.is_terminal();
+
+    if json {
+        json_output(json!({
+            "phase": current.as_str(),
+            "prompt": current.prompt_template(),
+            "all_done": all_done,
+        }));
+    } else {
+        println!("Phase: {}", current);
+        println!("Prompt: {}", current.prompt_template());
+        if all_done {
+            println!("Status: all phases complete");
+        }
+    }
+}
+
+/// `flowctl phase done --epic <id> --phase <name> --json`
+fn cmd_phase_done(json: bool, epic_id: &str, phase_name: &str) {
+    let requested = match PipelinePhase::parse(phase_name) {
+        Some(p) => p,
+        None => {
+            let valid: Vec<&str> = PipelinePhase::all().iter().map(|p| p.as_str()).collect();
+            error_exit(&format!(
+                "Unknown phase '{}'. Valid phases: {}",
+                phase_name,
+                valid.join(", ")
+            ));
+        }
+    };
+
+    let current = get_or_init_phase(epic_id);
+
+    if requested != current {
+        error_exit(&format!(
+            "Phase mismatch: current phase is '{}', but '{}' was requested. \
+             Phases must be completed in order.",
+            current, requested
+        ));
+    }
+
+    if current.is_terminal() {
+        error_exit("Pipeline is already at the terminal phase (close). No further advancement.");
+    }
+
+    let next_phase = current.next().expect("non-terminal phase has a next");
+    update_phase(epic_id, &next_phase);
+
+    if json {
+        json_output(json!({
+            "previous_phase": current.as_str(),
+            "phase": next_phase.as_str(),
+            "prompt": next_phase.prompt_template(),
+            "all_done": next_phase.is_terminal(),
+        }));
+    } else {
+        println!("Advanced: {} → {}", current, next_phase);
+        println!("Prompt: {}", next_phase.prompt_template());
+        if next_phase.is_terminal() {
+            println!("Status: all phases complete");
+        }
+    }
+}
diff --git a/flowctl/crates/flowctl-cli/src/main.rs b/flowctl/crates/flowctl-cli/src/main.rs
index d26ac9fa..32b67ef0 100644
--- a/flowctl/crates/flowctl-cli/src/main.rs
+++ b/flowctl/crates/flowctl-cli/src/main.rs
@@ -29,7 +29,7 @@ use commands::{
     stack::{InvariantsCmd, StackCmd},
     stats::StatsCmd,
     task::TaskCmd,
-    workflow::{self, WorkerPhaseCmd},
+    workflow::{self, PipelinePhaseCmd, WorkerPhaseCmd},
 };
 use output::OutputOpts;
 
@@ -266,6 +266,11 @@ enum Commands {
         #[command(subcommand)]
         cmd: WorkerPhaseCmd,
     },
+    /// Epic-level pipeline phase progression.
+    Phase {
+        #[command(subcommand)]
+        cmd: PipelinePhaseCmd,
+    },
     /// Classify request depth for adaptive plan step selection.
     PlanDepth {
         /// Request text to classify.
@@ -429,6 +434,13 @@ enum Commands {
         force: bool,
     },
 
+    /// Show event store history for an epic (all streams).
+    Events {
+        /// Epic ID.
+        #[arg(long)]
+        epic: String,
+    },
+
     // ── Data exchange ────────────────────────────────────────────────
     /// Export epics/tasks from DB to Markdown files.
     Export {
@@ -534,6 +546,7 @@ fn main() {
         Commands::Hook { cmd } => commands::hook::dispatch(&cmd),
         Commands::Stats { cmd } => commands::stats::dispatch(&cmd, json),
         Commands::WorkerPhase { cmd } => workflow::dispatch_worker_phase(&cmd, json),
+        Commands::Phase { cmd } => workflow::dispatch_pipeline_phase(&cmd, json),
         Commands::PlanDepth { request } => commands::plan_depth::cmd_plan_depth(json, &request),
 
         // Query
@@ -596,6 +609,7 @@ fn main() {
             workflow::cmd_block(json, id, reason_text)
         }
         Commands::Fail { id, reason, force } => workflow::cmd_fail(json, id, reason, force),
+        Commands::Events { epic } => workflow::cmd_events(json, epic),
 
         // Data exchange
         Commands::Export { epic, format } => admin::cmd_export(json, epic, format),
diff --git a/flowctl/crates/flowctl-core/src/events.rs b/flowctl/crates/flowctl-core/src/events.rs
new file mode 100644
index 00000000..8986b942
--- /dev/null
+++ b/flowctl/crates/flowctl-core/src/events.rs
@@ -0,0 +1,192 @@
+//! Event-sourced domain events for flowctl.
+//!
+//! Defines split enums for epic and task events, a unified `FlowEvent`
+//! wrapper, and `EventMetadata` for audit context. Stream IDs follow
+//! the convention `"epic:<id>"` / `"task:<id>"`.
+
+use serde::{Deserialize, Serialize};
+
+// ── Epic events ─────────────────────────────────────────────────────
+
+/// Domain events scoped to an epic lifecycle.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum EpicEvent {
+    /// Epic was created.
+    Created,
+    /// Plan spec was written / updated.
+    PlanWritten,
+    /// A pipeline phase started (e.g. plan-review, work).
+    PipelinePhaseStarted,
+    /// A pipeline phase completed.
+    PipelinePhaseCompleted,
+    /// Epic was closed (all tasks done).
+    Closed,
+    /// Catch-all for forward-compatible deserialization.
+    #[serde(other)]
+    Unknown,
+}
+
+// ── Task events ─────────────────────────────────────────────────────
+
+/// Domain events scoped to a task lifecycle.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum TaskEvent {
+    /// Task was created.
+    Created,
+    /// Task moved to in_progress.
+    Started,
+    /// Task completed successfully.
+    Completed,
+    /// Task failed (terminal).
+    Failed,
+    /// Task blocked on external dependency.
+    Blocked,
+    /// Task deliberately skipped.
+    Skipped,
+    /// Worker advanced to the next phase.
+    WorkerPhaseAdvanced,
+    /// File lock acquired for this task.
+    FileLocked,
+    /// File lock released for this task.
+    FileUnlocked,
+    /// Catch-all for forward-compatible deserialization.
+    #[serde(other)]
+    Unknown,
+}
+
+// ── Unified wrapper ─────────────────────────────────────────────────
+
+/// Tagged wrapper so a single `FlowEvent` column can hold either kind.
+///
+/// Uses internal tagging (`"scope": "epic"` / `"scope": "task"`) so that
+/// the `#[serde(other)]` catch-all on each inner enum works correctly.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "scope", content = "event", rename_all = "snake_case")]
+pub enum FlowEvent {
+    Epic(EpicEvent),
+    Task(TaskEvent),
+}
+
+// ── Metadata ────────────────────────────────────────────────────────
+
+/// Contextual metadata attached to every event.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct EventMetadata {
+    /// Who triggered the event (e.g. "worker", "user", "ralph").
+    pub actor: String,
+    /// The CLI command that produced the event (e.g. "flowctl done").
+    pub source_cmd: String,
+    /// Session identifier for correlation.
+    pub session_id: String,
+    /// ISO-8601 timestamp (populated by the service layer, not the caller).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub timestamp: Option<String>,
+}
+
+// ── Stream ID helpers ───────────────────────────────────────────────
+
+/// Build a stream ID for an epic: `"epic:<id>"`.
+pub fn epic_stream_id(epic_id: &str) -> String {
+    format!("epic:{epic_id}")
+}
+
+/// Build a stream ID for a task: `"task:<id>"`.
+pub fn task_stream_id(task_id: &str) -> String {
+    format!("task:{task_id}")
+}
+
+// ── Tests ───────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn epic_event_round_trip() {
+        let variants = vec![
+            EpicEvent::Created,
+            EpicEvent::PlanWritten,
+            EpicEvent::PipelinePhaseStarted,
+            EpicEvent::PipelinePhaseCompleted,
+            EpicEvent::Closed,
+        ];
+        for v in variants {
+            let json = serde_json::to_string(&v).unwrap();
+            let back: EpicEvent = serde_json::from_str(&json).unwrap();
+            assert_eq!(v, back, "round-trip failed for {json}");
+        }
+    }
+
+    #[test]
+    fn task_event_round_trip() {
+        let variants = vec![
+            TaskEvent::Created,
+            TaskEvent::Started,
+            TaskEvent::Completed,
+            TaskEvent::Failed,
+            TaskEvent::Blocked,
+            TaskEvent::Skipped,
+            TaskEvent::WorkerPhaseAdvanced,
+            TaskEvent::FileLocked,
+            TaskEvent::FileUnlocked,
+        ];
+        for v in variants {
+            let json = serde_json::to_string(&v).unwrap();
+            let back: TaskEvent = serde_json::from_str(&json).unwrap();
+            assert_eq!(v, back, "round-trip failed for {json}");
+        }
+    }
+
+    #[test]
+    fn flow_event_round_trip_epic() {
+        let ev = FlowEvent::Epic(EpicEvent::Created);
+        let json = serde_json::to_string(&ev).unwrap();
+        // Untagged: epic variant serialises as the inner string
+        let back: FlowEvent = serde_json::from_str(&json).unwrap();
+        assert_eq!(ev, back);
+    }
+
+    #[test]
+    fn flow_event_round_trip_task() {
+        let ev = FlowEvent::Task(TaskEvent::Completed);
+        let json = serde_json::to_string(&ev).unwrap();
+        let back: FlowEvent = serde_json::from_str(&json).unwrap();
+        assert_eq!(ev, back);
+    }
+
+    #[test]
+    fn metadata_round_trip() {
+        let meta = EventMetadata {
+            actor: "worker".into(),
+            source_cmd: "flowctl done".into(),
+            session_id: "sess-abc".into(),
+            timestamp: Some("2026-04-07T12:00:00Z".into()),
+        };
+        let json = serde_json::to_string(&meta).unwrap();
+        let back: EventMetadata = serde_json::from_str(&json).unwrap();
+        assert_eq!(meta, back);
+    }
+
+    #[test]
+    fn unknown_epic_event_tolerant_reader() {
+        // A future event type should deserialize to Unknown.
+        let json = r#""some_future_epic_event""#;
+        let ev: EpicEvent = serde_json::from_str(json).unwrap();
+        assert_eq!(ev, EpicEvent::Unknown);
+    }
+
+    #[test]
+    fn unknown_task_event_tolerant_reader() {
+        let json = r#""some_future_task_event""#;
+        let ev: TaskEvent = serde_json::from_str(json).unwrap();
+        assert_eq!(ev, TaskEvent::Unknown);
+    }
+
+    #[test]
+    fn stream_id_format() {
+        assert_eq!(epic_stream_id("fn-1"), "epic:fn-1");
+        assert_eq!(task_stream_id("fn-1.3"), "task:fn-1.3");
+    }
+}
diff --git a/flowctl/crates/flowctl-core/src/lib.rs b/flowctl/crates/flowctl-core/src/lib.rs
index f5ca5aae..b631e070 100644
--- a/flowctl/crates/flowctl-core/src/lib.rs
+++ b/flowctl/crates/flowctl-core/src/lib.rs
@@ -7,6 +7,7 @@
 pub mod approvals;
 pub mod changes;
 pub mod codex_sync;
+pub mod events;
 pub mod compress;
 pub mod config;
 pub mod dag;
@@ -15,6 +16,7 @@ pub mod frontmatter;
 pub mod id;
 pub mod json_store;
 pub mod outputs;
+pub mod pipeline;
 pub mod review_protocol;
 pub mod state_machine;
 pub mod types;
@@ -25,5 +27,6 @@ pub use dag::TaskDag;
 pub use error::CoreError;
 pub use id::{parse_id, slugify, EpicId, ParsedId, TaskId};
 pub use outputs::OutputEntry;
+pub use pipeline::PipelinePhase;
 pub use state_machine::{Status, Transition, TransitionError};
 pub use types::{Epic, Evidence, Phase, Task, TaskSize};
diff --git a/flowctl/crates/flowctl-core/src/pipeline.rs b/flowctl/crates/flowctl-core/src/pipeline.rs
new file mode 100644
index 00000000..32067a00
--- /dev/null
+++ b/flowctl/crates/flowctl-core/src/pipeline.rs
@@ -0,0 +1,173 @@
+//! Pipeline phase state machine for epic-level workflow progression.
+//!
+//! Phases form a linear sequence: Plan → PlanReview → Work → ImplReview → Close.
+//! No branching — each phase has exactly one successor (except Close, which is terminal).
+
+use serde::{Deserialize, Serialize};
+use std::fmt;
+
+/// Epic-level pipeline phases (linear sequence).
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum PipelinePhase {
+    Plan,
+    PlanReview,
+    Work,
+    ImplReview,
+    Close,
+}
+
+static ALL_PHASES: &[PipelinePhase] = &[
+    PipelinePhase::Plan,
+    PipelinePhase::PlanReview,
+    PipelinePhase::Work,
+    PipelinePhase::ImplReview,
+    PipelinePhase::Close,
+];
+
+impl PipelinePhase {
+    /// Returns the next phase in the pipeline, or `None` if this is the terminal phase.
+    pub fn next(&self) -> Option<PipelinePhase> {
+        match self {
+            PipelinePhase::Plan => Some(PipelinePhase::PlanReview),
+            PipelinePhase::PlanReview => Some(PipelinePhase::Work),
+            PipelinePhase::Work => Some(PipelinePhase::ImplReview),
+            PipelinePhase::ImplReview => Some(PipelinePhase::Close),
+            PipelinePhase::Close => None,
+        }
+    }
+
+    /// Whether this is the terminal phase (no successor).
+    pub fn is_terminal(&self) -> bool {
+        matches!(self, PipelinePhase::Close)
+    }
+
+    /// Ordered list of all pipeline phases.
+    pub fn all() -> &'static [PipelinePhase] {
+        ALL_PHASES
+    }
+
+    /// Short description of what this phase does.
+    pub fn prompt_template(&self) -> &'static str {
+        match self {
+            PipelinePhase::Plan => "Draft a structured build plan from the request",
+            PipelinePhase::PlanReview => "Review the plan for correctness and completeness",
+            PipelinePhase::Work => "Execute tasks according to the plan",
+            PipelinePhase::ImplReview => "Review the implementation for quality and correctness",
+            PipelinePhase::Close => "Finalize and close the epic",
+        }
+    }
+
+    /// Parse a phase from its snake_case string representation.
+    pub fn parse(s: &str) -> Option<PipelinePhase> {
+        match s {
+            "plan" => Some(PipelinePhase::Plan),
+            "plan_review" => Some(PipelinePhase::PlanReview),
+            "work" => Some(PipelinePhase::Work),
+            "impl_review" => Some(PipelinePhase::ImplReview),
+            "close" => Some(PipelinePhase::Close),
+            _ => None,
+        }
+    }
+
+    /// Return the snake_case name used for DB storage and JSON.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            PipelinePhase::Plan => "plan",
+            PipelinePhase::PlanReview => "plan_review",
+            PipelinePhase::Work => "work",
+            PipelinePhase::ImplReview => "impl_review",
+            PipelinePhase::Close => "close",
+        }
+    }
+}
+
+impl fmt::Display for PipelinePhase {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_phase_sequence() {
+        let mut phase = PipelinePhase::Plan;
+        let expected = [
+            PipelinePhase::PlanReview,
+            PipelinePhase::Work,
+            PipelinePhase::ImplReview,
+            PipelinePhase::Close,
+        ];
+        for exp in &expected {
+            phase = phase.next().expect("expected next phase");
+            assert_eq!(phase, *exp);
+        }
+        assert!(phase.next().is_none(), "Close should have no next phase");
+    }
+
+    #[test]
+    fn test_is_terminal() {
+        assert!(!PipelinePhase::Plan.is_terminal());
+        assert!(!PipelinePhase::PlanReview.is_terminal());
+        assert!(!PipelinePhase::Work.is_terminal());
+        assert!(!PipelinePhase::ImplReview.is_terminal());
+        assert!(PipelinePhase::Close.is_terminal());
+    }
+
+    #[test]
+    fn test_all_phases() {
+        let all = PipelinePhase::all();
+        assert_eq!(all.len(), 5);
+        assert_eq!(all[0], PipelinePhase::Plan);
+        assert_eq!(all[4], PipelinePhase::Close);
+    }
+
+    #[test]
+    fn test_prompt_template_not_empty() {
+        for phase in PipelinePhase::all() {
+            assert!(!phase.prompt_template().is_empty());
+        }
+    }
+
+    #[test]
+    fn test_parse_roundtrip() {
+        for phase in PipelinePhase::all() {
+            let s = phase.as_str();
+            let parsed = PipelinePhase::parse(s).expect("should parse");
+            assert_eq!(*phase, parsed);
+        }
+    }
+
+    #[test]
+    fn test_parse_invalid() {
+        assert!(PipelinePhase::parse("invalid").is_none());
+        assert!(PipelinePhase::parse("").is_none());
+    }
+
+    #[test]
+    fn test_serde_roundtrip() {
+        for phase in PipelinePhase::all() {
+            let json = serde_json::to_string(phase).unwrap();
+            let deserialized: PipelinePhase = serde_json::from_str(&json).unwrap();
+            assert_eq!(*phase, deserialized);
+        }
+    }
+
+    #[test]
+    fn test_display() {
+        assert_eq!(PipelinePhase::Plan.to_string(), "plan");
+        assert_eq!(PipelinePhase::PlanReview.to_string(), "plan_review");
+        assert_eq!(PipelinePhase::Close.to_string(), "close");
+    }
+
+    #[test]
+    fn test_invalid_transition_rejection() {
+        // Can't skip phases: plan -> work (must go through plan_review)
+        assert_ne!(PipelinePhase::Plan.next(), Some(PipelinePhase::Work));
+        // Can't go backwards: work -> plan_review
+        assert_ne!(PipelinePhase::Work.next(), Some(PipelinePhase::PlanReview));
+    }
+}
diff --git a/flowctl/crates/flowctl-db/Cargo.toml b/flowctl/crates/flowctl-db/Cargo.toml
index 1dc07ac9..f6820a63 100644
--- a/flowctl/crates/flowctl-db/Cargo.toml
+++ b/flowctl/crates/flowctl-db/Cargo.toml
@@ -23,3 +23,9 @@ workspace = true
 
 [dev-dependencies]
 tempfile = "3"
+criterion = { version = "0.5", features = ["async_tokio"] }
+tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
+
+[[bench]]
+name = "event_store"
+harness = false
diff --git a/flowctl/crates/flowctl-db/benches/event_store.rs b/flowctl/crates/flowctl-db/benches/event_store.rs
new file mode 100644
index 00000000..7510548f
--- /dev/null
+++ b/flowctl/crates/flowctl-db/benches/event_store.rs
@@ -0,0 +1,68 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+use flowctl_core::events::{EpicEvent, EventMetadata, FlowEvent};
+use flowctl_db::pool::open_memory_async;
+use flowctl_db::repo::EventStoreRepo;
+
+fn test_metadata() -> EventMetadata {
+    EventMetadata {
+        actor: "bench".into(),
+        source_cmd: "bench".into(),
+        session_id: "bench-sess".into(),
+        timestamp: None,
+    }
+}
+
+fn bench_append(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    c.bench_function("event_store_append", |b| {
+        b.iter(|| {
+            rt.block_on(async {
+                let (_db, conn) = open_memory_async().await.unwrap();
+                let repo = EventStoreRepo::new(conn);
+                for _ in 0..10 {
+                    repo.append(
+                        "epic:bench-1",
+                        &FlowEvent::Epic(EpicEvent::Created),
+                        &test_metadata(),
+                    )
+                    .await
+                    .unwrap();
+                }
+            });
+        });
+    });
+}
+
+fn bench_query_stream(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    // Pre-populate the DB outside the benchmark loop.
+    let (db, conn) = rt.block_on(async { open_memory_async().await.unwrap() });
+    rt.block_on(async {
+        let repo = EventStoreRepo::new(conn.clone());
+        for _ in 0..100 {
+            repo.append(
+                "epic:bench-q",
+                &FlowEvent::Epic(EpicEvent::PlanWritten),
+                &test_metadata(),
+            )
+            .await
+            .unwrap();
+        }
+    });
+
+    c.bench_function("event_store_query_stream_100", |b| {
+        b.iter(|| {
+            rt.block_on(async {
+                let repo = EventStoreRepo::new(conn.clone());
+                let _events = repo.query_stream("epic:bench-q").await.unwrap();
+            });
+        });
+    });
+
+    drop(db);
+}
+
+criterion_group!(benches, bench_append, bench_query_stream);
+criterion_main!(benches);
diff --git a/flowctl/crates/flowctl-db/src/lib.rs b/flowctl/crates/flowctl-db/src/lib.rs
index 46a7aade..8f6e460b 100644
--- a/flowctl/crates/flowctl-db/src/lib.rs
+++ b/flowctl/crates/flowctl-db/src/lib.rs
@@ -35,9 +35,9 @@ pub use metrics::StatsQuery;
 pub use skill::{SkillEntry, SkillMatch, SkillRepo};
 pub use pool::{cleanup, open_async, open_memory_async, resolve_db_path, resolve_libsql_path, resolve_state_dir};
 pub use repo::{
-    DepRepo, EpicRepo, EventRepo, EventRow, EvidenceRepo, FileLockRepo, FileOwnershipRepo,
-    GapRepo, GapRow, LockEntry, LockMode, PhaseProgressRepo, RuntimeRepo, ScoutCacheRepo,
-    TaskRepo, max_epic_num, max_task_num,
+    DepRepo, EpicRepo, EventRepo, EventRow, EventStoreRepo, EvidenceRepo, FileLockRepo,
+    FileOwnershipRepo, GapRepo, GapRow, LockEntry, LockMode, PhaseProgressRepo, RuntimeRepo,
+    ScoutCacheRepo, StoredEvent, TaskRepo, max_epic_num, max_task_num,
 };
 
 // Re-export libsql types for callers.
diff --git a/flowctl/crates/flowctl-db/src/migration.rs b/flowctl/crates/flowctl-db/src/migration.rs
index 822de8f6..890afbc4 100644
--- a/flowctl/crates/flowctl-db/src/migration.rs
+++ b/flowctl/crates/flowctl-db/src/migration.rs
@@ -8,7 +8,7 @@ use libsql::Connection;
 use crate::error::DbError;
 
 /// Current target schema version. Bump this when adding new migrations.
-const TARGET_VERSION: i64 = 4;
+const TARGET_VERSION: i64 = 5;
 
 /// Ensure `_meta` table exists and run any pending migrations.
 pub async fn migrate(conn: &Connection) -> Result<(), DbError> {
@@ -34,6 +34,10 @@ pub async fn migrate(conn: &Connection) -> Result<(), DbError> {
         migrate_v4(conn).await?;
     }
 
+    if current < 5 {
+        migrate_v5(conn).await?;
+    }
+
     // Update stored version to target.
     if current < TARGET_VERSION {
         set_version(conn, TARGET_VERSION).await?;
@@ -184,6 +188,49 @@ async fn migrate_v4(conn: &Connection) -> Result<(), DbError> {
     Ok(())
 }
 
+/// Migration v5: Add event_store and pipeline_progress tables for event sourcing.
+///
+/// These tables are created in `schema.sql` for fresh databases; this migration
+/// adds them to databases created before v5.
+async fn migrate_v5(conn: &Connection) -> Result<(), DbError> {
+    conn.execute(
+        "CREATE TABLE IF NOT EXISTS event_store (
+            event_id    INTEGER PRIMARY KEY AUTOINCREMENT,
+            stream_id   TEXT NOT NULL,
+            version     INTEGER NOT NULL,
+            event_type  TEXT NOT NULL,
+            payload     TEXT NOT NULL,
+            metadata    TEXT,
+            created_at  TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now'))
+        )",
+        (),
+    )
+    .await
+    .map_err(|e| DbError::Schema(format!("event_store creation failed: {e}")))?;
+
+    conn.execute(
+        "CREATE UNIQUE INDEX IF NOT EXISTS idx_event_store_stream_version
+            ON event_store(stream_id, version)",
+        (),
+    )
+    .await
+    .ok();
+
+    conn.execute(
+        "CREATE TABLE IF NOT EXISTS pipeline_progress (
+            epic_id     TEXT PRIMARY KEY,
+            phase       TEXT NOT NULL DEFAULT 'plan',
+            started_at  TEXT,
+            updated_at  TEXT
+        )",
+        (),
+    )
+    .await
+    .map_err(|e| DbError::Schema(format!("pipeline_progress creation failed: {e}")))?;
+
+    Ok(())
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/flowctl/crates/flowctl-db/src/pool.rs b/flowctl/crates/flowctl-db/src/pool.rs
index ed93a1e7..aa856e51 100644
--- a/flowctl/crates/flowctl-db/src/pool.rs
+++ b/flowctl/crates/flowctl-db/src/pool.rs
@@ -226,6 +226,8 @@ mod tests {
             "monthly_rollup",
             "memory",
             "skills",
+            "event_store",
+            "pipeline_progress",
             "_meta",
         ] {
             assert!(
diff --git a/flowctl/crates/flowctl-db/src/repo/event_store.rs b/flowctl/crates/flowctl-db/src/repo/event_store.rs
new file mode 100644
index 00000000..cfbdc415
--- /dev/null
+++ b/flowctl/crates/flowctl-db/src/repo/event_store.rs
@@ -0,0 +1,343 @@
+//! Async repository for the event-sourced event store.
+//!
+//! Distinct from [`EventRepo`](super::EventRepo) (the audit log). This repo
+//! implements append-only, version-ordered streams with optimistic concurrency
+//! via a unique `(stream_id, version)` constraint.
+
+use libsql::{params, Connection};
+
+use crate::error::DbError;
+use flowctl_core::events::{EventMetadata, FlowEvent};
+
+/// A persisted event read back from the event store.
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct StoredEvent {
+    pub event_id: i64,
+    pub stream_id: String,
+    pub version: i64,
+    pub event_type: String,
+    pub payload: FlowEvent,
+    pub metadata: Option<EventMetadata>,
+    pub created_at: String,
+}
+
+/// Async repository for event-sourced streams.
+pub struct EventStoreRepo {
+    conn: Connection,
+}
+
+impl EventStoreRepo {
+    pub fn new(conn: Connection) -> Self {
+        Self { conn }
+    }
+
+    /// Append an event to a stream. Auto-increments the version via
+    /// `SELECT MAX(version)+1`. Returns the assigned version number.
+    ///
+    /// Uses `INSERT OR FAIL` so a concurrent append that races on the same
+    /// version will fail with a constraint error rather than silently
+    /// overwriting.
+    pub async fn append(
+        &self,
+        stream_id: &str,
+        event: &FlowEvent,
+        metadata: &EventMetadata,
+    ) -> Result<u64, DbError> {
+        // Determine the next version for this stream.
+        let mut rows = self
+            .conn
+            .query(
+                "SELECT COALESCE(MAX(version), 0) FROM event_store WHERE stream_id = ?1",
+                params![stream_id.to_string()],
+            )
+            .await?;
+        let next_version: i64 = match rows.next().await? {
+            Some(row) => row.get::<i64>(0)? + 1,
+            None => 1,
+        };
+
+        let event_type = event_type_label(event);
+        let payload_json = serde_json::to_string(event)?;
+        let metadata_json = serde_json::to_string(metadata)?;
+
+        let result = self
+            .conn
+            .execute(
+                "INSERT OR FAIL INTO event_store (stream_id, version, event_type, payload, metadata)
+                 VALUES (?1, ?2, ?3, ?4, ?5)",
+                params![
+                    stream_id.to_string(),
+                    next_version,
+                    event_type,
+                    payload_json,
+                    metadata_json
+                ],
+            )
+            .await;
+
+        match result {
+            Ok(_) => Ok(next_version as u64),
+            Err(e) => {
+                let msg = e.to_string();
+                if msg.contains("UNIQUE constraint failed") || msg.contains("constraint") {
+                    Err(DbError::Constraint(format!(
+                        "concurrency conflict: version {next_version} already exists for stream {stream_id}"
+                    )))
+                } else {
+                    Err(DbError::LibSql(e))
+                }
+            }
+        }
+    }
+
+    /// Query all events for a stream, in version order.
+    pub async fn query_stream(&self, stream_id: &str) -> Result<Vec<StoredEvent>, DbError> {
+        let mut rows = self
+            .conn
+            .query(
+                "SELECT event_id, stream_id, version, event_type, payload, metadata, created_at
+                 FROM event_store WHERE stream_id = ?1 ORDER BY version ASC",
+                params![stream_id.to_string()],
+            )
+            .await?;
+
+        let mut out = Vec::new();
+        while let Some(row) = rows.next().await? {
+            out.push(parse_stored_event(&row)?);
+        }
+        Ok(out)
+    }
+
+    /// Query events globally by event type, in creation order.
+    pub async fn query_by_type(&self, event_type: &str) -> Result<Vec<StoredEvent>, DbError> {
+        let mut rows = self
+            .conn
+            .query(
+                "SELECT event_id, stream_id, version, event_type, payload, metadata, created_at
+                 FROM event_store WHERE event_type = ?1 ORDER BY event_id ASC",
+                params![event_type.to_string()],
+            )
+            .await?;
+
+        let mut out = Vec::new();
+        while let Some(row) = rows.next().await? {
+            out.push(parse_stored_event(&row)?);
+        }
+        Ok(out)
+    }
+
+    /// Replay all events for a stream (same as `query_stream`, named for intent).
+    pub async fn rebuild_stream(&self, stream_id: &str) -> Result<Vec<StoredEvent>, DbError> {
+        self.query_stream(stream_id).await
+    }
+
+    /// Query all events whose stream_id matches any of the given prefixes.
+    /// Useful for fetching all events related to an epic (epic stream + task streams).
+    pub async fn query_by_stream_prefixes(&self, prefixes: &[String]) -> Result<Vec<StoredEvent>, DbError> {
+        if prefixes.is_empty() {
+            return Ok(Vec::new());
+        }
+        // Build WHERE clause: stream_id LIKE 'prefix1%' OR stream_id LIKE 'prefix2%' ...
+        let conditions: Vec<String> = prefixes.iter().enumerate()
+            .map(|(i, _)| format!("stream_id LIKE ?{}", i + 1))
+            .collect();
+        let sql = format!(
+            "SELECT event_id, stream_id, version, event_type, payload, metadata, created_at
+             FROM event_store WHERE {} ORDER BY event_id ASC",
+            conditions.join(" OR ")
+        );
+
+        let like_params: Vec<String> = prefixes.iter().map(|p| format!("{p}%")).collect();
+        // Use positional params via libsql::params_from_iter
+        let values: Vec<libsql::Value> = like_params.into_iter().map(libsql::Value::from).collect();
+
+        let mut rows = self.conn.query(&sql, values).await?;
+        let mut out = Vec::new();
+        while let Some(row) = rows.next().await? {
+            out.push(parse_stored_event(&row)?);
+        }
+        Ok(out)
+    }
+}
+
+/// Extract a human-readable event type label from a `FlowEvent`.
+fn event_type_label(event: &FlowEvent) -> String {
+    match event {
+        FlowEvent::Epic(e) => format!("epic:{}", serde_json::to_value(e).unwrap_or_default().as_str().unwrap_or("unknown")),
+        FlowEvent::Task(t) => format!("task:{}", serde_json::to_value(t).unwrap_or_default().as_str().unwrap_or("unknown")),
+    }
+}
+
+/// Parse a row from the event_store table into a `StoredEvent`.
+fn parse_stored_event(row: &libsql::Row) -> Result<StoredEvent, DbError> {
+    let payload_str: String = row.get::<String>(4)?;
+    let metadata_str: Option<String> = row.get::<Option<String>>(5)?;
+
+    Ok(StoredEvent {
+        event_id: row.get::<i64>(0)?,
+        stream_id: row.get::<String>(1)?,
+        version: row.get::<i64>(2)?,
+        event_type: row.get::<String>(3)?,
+        payload: serde_json::from_str(&payload_str)?,
+        metadata: match metadata_str {
+            Some(s) if !s.is_empty() => Some(serde_json::from_str(&s)?),
+            _ => None,
+        },
+        created_at: row.get::<String>(6)?,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::pool::open_memory_async;
+    use flowctl_core::events::{EpicEvent, TaskEvent};
+
+    fn test_metadata() -> EventMetadata {
+        EventMetadata {
+            actor: "test".into(),
+            source_cmd: "test".into(),
+            session_id: "sess-1".into(),
+            timestamp: None,
+        }
+    }
+
+    #[tokio::test]
+    async fn append_auto_increments_version() {
+        let (_db, conn) = open_memory_async().await.unwrap();
+        let repo = EventStoreRepo::new(conn);
+
+        let v1 = repo
+            .append("epic:fn-1", &FlowEvent::Epic(EpicEvent::Created), &test_metadata())
+            .await
+            .unwrap();
+        assert_eq!(v1, 1);
+
+        let v2 = repo
+            .append("epic:fn-1", &FlowEvent::Epic(EpicEvent::PlanWritten), &test_metadata())
+            .await
+            .unwrap();
+        assert_eq!(v2, 2);
+
+        // Different stream starts at 1.
+        let v1b = repo
+            .append("task:fn-1.1", &FlowEvent::Task(TaskEvent::Created), &test_metadata())
+            .await
+            .unwrap();
+        assert_eq!(v1b, 1);
+    }
+
+    #[tokio::test]
+    async fn query_stream_returns_version_order() {
+        let (_db, conn) = open_memory_async().await.unwrap();
+        let repo = EventStoreRepo::new(conn);
+
+        repo.append("epic:fn-1", &FlowEvent::Epic(EpicEvent::Created), &test_metadata()).await.unwrap();
+        repo.append("epic:fn-1", &FlowEvent::Epic(EpicEvent::PlanWritten), &test_metadata()).await.unwrap();
+        repo.append("epic:fn-1", &FlowEvent::Epic(EpicEvent::Closed), &test_metadata()).await.unwrap();
+
+        let events = repo.query_stream("epic:fn-1").await.unwrap();
+        assert_eq!(events.len(), 3);
+        assert_eq!(events[0].version, 1);
+        assert_eq!(events[1].version, 2);
+        assert_eq!(events[2].version, 3);
+        assert_eq!(events[0].payload, FlowEvent::Epic(EpicEvent::Created));
+        assert_eq!(events[2].payload, FlowEvent::Epic(EpicEvent::Closed));
+    }
+
+    #[tokio::test]
+    async fn query_by_type_across_streams() {
+        let (_db, conn) = open_memory_async().await.unwrap();
+        let repo = EventStoreRepo::new(conn);
+
+        repo.append("epic:fn-1", &FlowEvent::Epic(EpicEvent::Created), &test_metadata()).await.unwrap();
+        repo.append("epic:fn-2", &FlowEvent::Epic(EpicEvent::Created), &test_metadata()).await.unwrap();
+        repo.append("epic:fn-1", &FlowEvent::Epic(EpicEvent::Closed), &test_metadata()).await.unwrap();
+
+        let created = repo.query_by_type("epic:created").await.unwrap();
+        assert_eq!(created.len(), 2);
+        assert_eq!(created[0].stream_id, "epic:fn-1");
+        assert_eq!(created[1].stream_id, "epic:fn-2");
+    }
+
+    #[tokio::test]
+    async fn rebuild_stream_replays_all_events() {
+        let (_db, conn) = open_memory_async().await.unwrap();
+        let repo = EventStoreRepo::new(conn);
+
+        repo.append("task:fn-1.1", &FlowEvent::Task(TaskEvent::Created), &test_metadata()).await.unwrap();
+        repo.append("task:fn-1.1", &FlowEvent::Task(TaskEvent::Started), &test_metadata()).await.unwrap();
+        repo.append("task:fn-1.1", &FlowEvent::Task(TaskEvent::Completed), &test_metadata()).await.unwrap();
+
+        let events = repo.rebuild_stream("task:fn-1.1").await.unwrap();
+        assert_eq!(events.len(), 3);
+        assert_eq!(events[0].payload, FlowEvent::Task(TaskEvent::Created));
+        assert_eq!(events[1].payload, FlowEvent::Task(TaskEvent::Started));
+        assert_eq!(events[2].payload, FlowEvent::Task(TaskEvent::Completed));
+    }
+
+    #[tokio::test]
+    async fn optimistic_concurrency_conflict() {
+        let (_db, conn) = open_memory_async().await.unwrap();
+
+        // Directly insert two rows with the same (stream_id, version) to verify
+        // the unique constraint fires correctly.
+        conn.execute(
+            "INSERT INTO event_store (stream_id, version, event_type, payload, metadata)
+             VALUES ('epic:fn-1', 1, 'epic:created', '{}', '{}')",
+            (),
+        )
+        .await
+        .unwrap();
+
+        // Second insert with the same stream_id + version should fail.
+        let result = conn
+            .execute(
+                "INSERT OR FAIL INTO event_store (stream_id, version, event_type, payload, metadata)
+                 VALUES ('epic:fn-1', 1, 'epic:plan_written', '{}', '{}')",
+                (),
+            )
+            .await;
+
+        assert!(result.is_err(), "expected UNIQUE constraint failure");
+        let err_msg = result.unwrap_err().to_string();
+        assert!(
+            err_msg.contains("UNIQUE") || err_msg.contains("constraint"),
+            "expected constraint error, got: {err_msg}"
+        );
+    }
+
+    #[tokio::test]
+    async fn metadata_round_trips() {
+        let (_db, conn) = open_memory_async().await.unwrap();
+        let repo = EventStoreRepo::new(conn);
+
+        let meta = EventMetadata {
+            actor: "worker-1".into(),
+            source_cmd: "flowctl done".into(),
+            session_id: "sess-xyz".into(),
+            timestamp: Some("2026-04-07T12:00:00Z".into()),
+        };
+
+        repo.append("epic:fn-1", &FlowEvent::Epic(EpicEvent::Created), &meta).await.unwrap();
+        let events = repo.query_stream("epic:fn-1").await.unwrap();
+        assert_eq!(events.len(), 1);
+
+        let got_meta = events[0].metadata.as_ref().expect("metadata should exist");
+        assert_eq!(got_meta.actor, "worker-1");
+        assert_eq!(got_meta.source_cmd, "flowctl done");
+        assert_eq!(got_meta.session_id, "sess-xyz");
+    }
+
+    #[tokio::test]
+    async fn empty_stream_returns_empty() {
+        let (_db, conn) = open_memory_async().await.unwrap();
+        let repo = EventStoreRepo::new(conn);
+
+        let events = repo.query_stream("nonexistent").await.unwrap();
+        assert!(events.is_empty());
+
+        let rebuilt = repo.rebuild_stream("nonexistent").await.unwrap();
+        assert!(rebuilt.is_empty());
+    }
+}
diff --git a/flowctl/crates/flowctl-db/src/repo/mod.rs b/flowctl/crates/flowctl-db/src/repo/mod.rs
index abeda75e..aa5407dd 100644
--- a/flowctl/crates/flowctl-db/src/repo/mod.rs
+++ b/flowctl/crates/flowctl-db/src/repo/mod.rs
@@ -7,6 +7,7 @@
 mod deps;
 mod epic;
 mod event;
+mod event_store;
 mod evidence;
 mod file_lock;
 mod file_ownership;
@@ -20,6 +21,7 @@ mod task;
 pub use deps::DepRepo;
 pub use epic::EpicRepo;
 pub use event::{EventRepo, EventRow};
+pub use event_store::{EventStoreRepo, StoredEvent};
 pub use evidence::EvidenceRepo;
 pub use file_lock::{FileLockRepo, LockEntry, LockMode};
 pub use file_ownership::FileOwnershipRepo;
diff --git a/flowctl/crates/flowctl-db/src/schema.sql b/flowctl/crates/flowctl-db/src/schema.sql
index a8af6bbb..9d74e988 100644
--- a/flowctl/crates/flowctl-db/src/schema.sql
+++ b/flowctl/crates/flowctl-db/src/schema.sql
@@ -149,6 +149,30 @@ CREATE TABLE IF NOT EXISTS events (
     session_id  TEXT
 );
 
+-- ── Event store (event-sourced pipeline) ──────────────────────────────
+
+CREATE TABLE IF NOT EXISTS event_store (
+    event_id    INTEGER PRIMARY KEY AUTOINCREMENT,
+    stream_id   TEXT NOT NULL,
+    version     INTEGER NOT NULL,
+    event_type  TEXT NOT NULL,
+    payload     TEXT NOT NULL,
+    metadata    TEXT,
+    created_at  TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now'))
+);
+
+CREATE UNIQUE INDEX IF NOT EXISTS idx_event_store_stream_version
+    ON event_store(stream_id, version);
+
+-- ── Pipeline progress ─────────────────────────────────────────────────
+
+CREATE TABLE IF NOT EXISTS pipeline_progress (
+    epic_id     TEXT PRIMARY KEY,
+    phase       TEXT NOT NULL DEFAULT 'plan',
+    started_at  TEXT,
+    updated_at  TEXT
+);
+
 CREATE TABLE IF NOT EXISTS token_usage (
     id              INTEGER PRIMARY KEY AUTOINCREMENT,
     timestamp       TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),
diff --git a/flowctl/crates/flowctl-service/src/changes.rs b/flowctl/crates/flowctl-service/src/changes.rs
index 02345f93..a92bfef9 100644
--- a/flowctl/crates/flowctl-service/src/changes.rs
+++ b/flowctl/crates/flowctl-service/src/changes.rs
@@ -7,8 +7,11 @@
 use std::path::Path;
 
 use flowctl_core::changes::{Changes, Mutation};
+use flowctl_core::events::{
+    EpicEvent, EventMetadata, FlowEvent, TaskEvent, epic_stream_id, task_stream_id,
+};
 use flowctl_core::json_store;
-use flowctl_db::EventRepo;
+use flowctl_db::{EventRepo, EventStoreRepo};
 
 use crate::error::{ServiceError, ServiceResult};
 
@@ -30,6 +33,7 @@ pub struct ApplyResult {
 pub struct ChangesApplier<'a> {
     flow_dir: &'a Path,
     event_repo: &'a EventRepo,
+    event_store: Option<EventStoreRepo>,
     actor: Option<&'a str>,
     session_id: Option<&'a str>,
 }
@@ -39,11 +43,18 @@ impl<'a> ChangesApplier<'a> {
         Self {
             flow_dir,
             event_repo,
+            event_store: None,
             actor: None,
             session_id: None,
         }
     }
 
+    /// Set an event store for domain event emission alongside audit logging.
+    pub fn with_event_store(mut self, store: EventStoreRepo) -> Self {
+        self.event_store = Some(store);
+        self
+    }
+
     /// Set the actor (who is applying the changes) for event logging.
     pub fn with_actor(mut self, actor: &'a str) -> Self {
         self.actor = Some(actor);
@@ -62,6 +73,9 @@ impl<'a> ChangesApplier<'a> {
         let mut event_ids = Vec::with_capacity(changes.len());
 
         for mutation in &changes.mutations {
+            // Emit domain event to event store (best-effort, before mutation)
+            self.emit_domain_event(mutation).await;
+
             self.apply_one(mutation)?;
 
             let event_id = self.log_event(mutation).await?;
@@ -157,4 +171,34 @@ impl<'a> ChangesApplier<'a> {
 
         Ok(row_id)
     }
+
+    /// Emit a domain event to the event store for create mutations.
+    /// Best-effort: failures are silently ignored so they don't block the pipeline.
+    async fn emit_domain_event(&self, mutation: &Mutation) {
+        let store = match self.event_store {
+            Some(ref s) => s,
+            None => return,
+        };
+
+        let (stream, flow_event) = match mutation {
+            Mutation::CreateEpic { epic } => (
+                epic_stream_id(&epic.id),
+                FlowEvent::Epic(EpicEvent::Created),
+            ),
+            Mutation::CreateTask { task } => (
+                task_stream_id(&task.id),
+                FlowEvent::Task(TaskEvent::Created),
+            ),
+            _ => return,
+        };
+
+        let metadata = EventMetadata {
+            actor: self.actor.unwrap_or("system").into(),
+            source_cmd: "changes_applier".into(),
+            session_id: self.session_id.unwrap_or("").into(),
+            timestamp: Some(chrono::Utc::now().to_rfc3339()),
+        };
+
+        let _ = store.append(&stream, &flow_event, &metadata).await.ok();
+    }
 }
diff --git a/flowctl/crates/flowctl-service/src/lifecycle.rs b/flowctl/crates/flowctl-service/src/lifecycle.rs
index 16171b06..45de9f94 100644
--- a/flowctl/crates/flowctl-service/src/lifecycle.rs
+++ b/flowctl/crates/flowctl-service/src/lifecycle.rs
@@ -16,6 +16,9 @@ use flowctl_core::types::{
     Epic, EpicStatus, Evidence, RuntimeState, Task, REVIEWS_DIR,
 };
 
+use flowctl_core::events::{EventMetadata, FlowEvent, TaskEvent, task_stream_id};
+use flowctl_db::EventStoreRepo;
+
 use crate::error::{ServiceError, ServiceResult};
 
 // ── Request / Response types ───────────────────────────────────────
@@ -439,6 +442,28 @@ async fn log_audit_event(
     }
 }
 
+/// Emit a task event to the event store. Failures are silently ignored
+/// (event emission must not block the lifecycle operation).
+async fn emit_task_event(
+    conn: Option<&Connection>,
+    task_id: &str,
+    event: TaskEvent,
+    source_cmd: &str,
+) {
+    if let Some(c) = conn {
+        let repo = EventStoreRepo::new(c.clone());
+        let stream = task_stream_id(task_id);
+        let flow_event = FlowEvent::Task(event);
+        let metadata = EventMetadata {
+            actor: "lifecycle".into(),
+            source_cmd: source_cmd.into(),
+            session_id: String::new(),
+            timestamp: Some(chrono::Utc::now().to_rfc3339()),
+        };
+        let _ = repo.append(&stream, &flow_event, &metadata).await.ok();
+    }
+}
+
 // ── Service functions ──────────────────────────────────────────────
 
 /// Start a task: validate deps, state machine, actor, update DB + Markdown.
@@ -561,6 +586,9 @@ pub async fn start_task(
     // Audit event
     log_audit_event(conn, &req.task_id, "task_started").await;
 
+    // Event store
+    emit_task_event(conn, &req.task_id, TaskEvent::Started, "flowctl start").await;
+
     Ok(StartTaskResponse {
         task_id: req.task_id,
         status: Status::InProgress,
@@ -647,6 +675,9 @@ pub async fn done_task(
     // 8. Audit event
     log_audit_event(conn, &req.task_id, "task_completed").await;
 
+    // Event store
+    emit_task_event(conn, &req.task_id, TaskEvent::Completed, "flowctl done").await;
+
     Ok(DoneTaskResponse {
         task_id: req.task_id,
         status: Status::Done,
@@ -730,6 +761,9 @@ pub async fn block_task(
             .map_err(|e| ServiceError::IoError(std::io::Error::other(e.to_string())))?;
     }
 
+    // Event store
+    emit_task_event(conn, &req.task_id, TaskEvent::Blocked, "flowctl block").await;
+
     Ok(BlockTaskResponse {
         task_id: req.task_id,
         status: Status::Blocked,
@@ -763,6 +797,9 @@ pub async fn fail_task(
         handle_task_failure(conn, flow_dir, &req.task_id, &runtime, config.as_ref()).await
             .map_err(ServiceError::IoError)?;
 
+    // Event store
+    emit_task_event(conn, &req.task_id, TaskEvent::Failed, "flowctl fail").await;
+
     let max_retries = get_max_retries_from_config(config.as_ref());
     let retry_count = if final_status == Status::UpForRetry {
         Some(runtime.as_ref().map(|r| r.retry_count).unwrap_or(0) + 1)
@@ -875,6 +912,9 @@ pub async fn restart_task(
         reset_ids.push(tid.clone());
     }
 
+    // Event store — emit Started for the restarted task
+    emit_task_event(conn, &req.task_id, TaskEvent::Started, "flowctl restart").await;
+
     Ok(RestartTaskResponse {
         cascade_from: req.task_id,
         reset_ids,
diff --git a/flowctl/tests/cmd/next_json.toml b/flowctl/tests/cmd/next_json.toml
index fd2a023d..8ae72634 100644
--- a/flowctl/tests/cmd/next_json.toml
+++ b/flowctl/tests/cmd/next_json.toml
@@ -1,5 +1,5 @@
 bin.name = "flowctl"
 args = ["--json", "next"]
 stdout = """
-{"epic":"fn-22-improve-flowctl-code-quality","reason":"ready_task","status":"work","task":"fn-22-improve-flowctl-code-quality.1"}
+{"epic":"fn-2-event-sourced-pipeline-first","reason":"resume_in_progress","status":"work","task":"fn-2-event-sourced-pipeline-first.4"}
 """
diff --git a/flowctl/tests/cmd/validate_json.toml b/flowctl/tests/cmd/validate_json.toml
index 8947a8f4..ffc26769 100644
--- a/flowctl/tests/cmd/validate_json.toml
+++ b/flowctl/tests/cmd/validate_json.toml
@@ -1,4 +1,3 @@
 bin.name = "flowctl"
 args = ["--json", "validate", "--all"]
 stdout = "..."
-status.code = 1
diff --git a/skills/flow-code-epic-review/SKILL.md b/skills/flow-code-epic-review/SKILL.md
index 800ab6b7..4ed4f398 100644
--- a/skills/flow-code-epic-review/SKILL.md
+++ b/skills/flow-code-epic-review/SKILL.md
@@ -2,8 +2,11 @@
 name: flow-code-epic-review
 description: "Use when all epic tasks are done and need a final review before closing. Triggers on /flow-code:epic-review."
 user-invocable: false
+deprecated: true
 ---
 
+**Deprecated**: Use `/flow-code:run` instead.
+
 # Epic Completion Review Mode
 
 **Read [workflow.md](workflow.md) for detailed phases and anti-patterns.**
diff --git a/skills/flow-code-impl-review/SKILL.md b/skills/flow-code-impl-review/SKILL.md
index 626e1e4f..8ebd2c3f 100644
--- a/skills/flow-code-impl-review/SKILL.md
+++ b/skills/flow-code-impl-review/SKILL.md
@@ -2,8 +2,11 @@
 name: flow-code-impl-review
 description: "Use when reviewing code changes, PRs, or implementations. Triggers on /flow-code:impl-review."
 user-invocable: false
+deprecated: true
 ---
 
+**Deprecated**: Use `/flow-code:run` instead.
+
 # Implementation Review Mode
 
 **Read [workflow.md](workflow.md) for detailed phases and anti-patterns.**
diff --git a/skills/flow-code-plan-review/SKILL.md b/skills/flow-code-plan-review/SKILL.md
index fb66a0aa..ae58288f 100644
--- a/skills/flow-code-plan-review/SKILL.md
+++ b/skills/flow-code-plan-review/SKILL.md
@@ -2,8 +2,11 @@
 name: flow-code-plan-review
 description: "Use when reviewing Flow epic specs or design docs. Triggers on /flow-code:plan-review."
 user-invocable: false
+deprecated: true
 ---
 
+**Deprecated**: Use `/flow-code:run` instead.
+
 # Plan Review Mode
 
 **Read [workflow.md](workflow.md) for detailed phases and anti-patterns.**
diff --git a/skills/flow-code-plan/SKILL.md b/skills/flow-code-plan/SKILL.md
index 67d316cd..65f43dd1 100644
--- a/skills/flow-code-plan/SKILL.md
+++ b/skills/flow-code-plan/SKILL.md
@@ -2,8 +2,11 @@
 name: flow-code-plan
 description: "Use when planning features or designing implementation. Triggers on /flow-code:plan with text descriptions or Flow IDs."
 user-invocable: false
+deprecated: true
 ---
 
+**Deprecated**: Use `/flow-code:run` instead.
+
 # Flow plan
 
 Turn a rough idea into an epic with tasks in `.flow/`. This skill does not write code.
diff --git a/skills/flow-code-run/SKILL.md b/skills/flow-code-run/SKILL.md
new file mode 100644
index 00000000..2ec9e007
--- /dev/null
+++ b/skills/flow-code-run/SKILL.md
@@ -0,0 +1,87 @@
+---
+name: flow-code-run
+description: Unified entry point for plan-first development. Manages the entire pipeline (plan, plan-review, work, impl-review, close) via flowctl phase commands.
+user-invocable: true
+---
+
+# Flow Code Run
+
+Unified pipeline entry point. Drives the entire development lifecycle through flowctl phase next/done.
+
+**CRITICAL: flowctl is BUNDLED.** Always use:
+```bash
+FLOWCTL="$HOME/.flow/bin/flowctl"
+```
+
+## Input
+
+Arguments: $ARGUMENTS
+
+Accepts:
+- Feature description: "Add OAuth login"
+- Flow epic ID: fn-1-add-oauth (resume existing epic)
+- --plan-only flag to stop after planning
+
+## Phase Loop
+
+Claude is the outer loop; flowctl provides phase content.
+
+### Step 1: Resolve or Create Epic
+
+If input is a Flow ID (fn-N-*): read with $FLOWCTL show <id> --json
+If input is text: create with $FLOWCTL epic create --title "<title>" --json
+
+### Step 2: Enter Phase Loop
+
+Loop until all phases complete:
+1. Run $FLOWCTL phase next --epic $EPIC_ID --json
+2. If all_done is true, break
+3. Execute the current phase (see Phase Details)
+4. Run $FLOWCTL phase done --epic $EPIC_ID --phase $PHASE --json
+5. Repeat
+
+## Phase Details
+
+### Plan (plan)
+1. Spawn research scouts in parallel (repo-scout, context-scout, practice-scout)
+2. Write epic spec via $FLOWCTL epic plan
+3. Create tasks via $FLOWCTL task create with dependencies
+4. Validate: $FLOWCTL validate --epic $EPIC_ID --json
+
+### Plan Review (plan_review)
+1. Detect review backend: $FLOWCTL review-backend
+2. Run review via RP context_builder or Codex
+3. Fix issues until SHIP verdict (max 3 iterations)
+4. If backend is none, skip and advance
+
+### Work (work)
+1. Find ready tasks: $FLOWCTL ready --epic $EPIC_ID --json
+2. Start tasks: $FLOWCTL start <task-id> --json
+3. Lock files: $FLOWCTL lock --task <id> --files "<files>"
+4. Spawn ALL ready workers in ONE parallel Agent call with isolation worktree and team_name
+5. Wait for workers, merge worktree branches back
+6. Wave checkpoint: verify done, run guards
+7. Repeat waves until no ready tasks remain
+
+### Impl Review (impl_review)
+1. Run adversarial review via Codex or RP
+2. Fix issues until SHIP (max 2 iterations)
+3. If no review backend, skip and advance
+
+### Close (close)
+1. Validate: $FLOWCTL validate --epic $EPIC_ID --json
+2. Run final guard if configured
+3. Mark complete: $FLOWCTL epic completion $EPIC_ID ship --json
+4. Push branch and create draft PR (unless --no-pr)
+
+## Recovery
+
+The loop resumes from wherever flowctl says the current phase is:
+$FLOWCTL phase next --epic $EPIC_ID --json
+
+## Guardrails
+
+- Never skip phases. flowctl enforces the sequence.
+- Never bypass flowctl phase done. It records evidence.
+- Always use flowctl for ALL state operations.
+- Workers use worker-phase next/done internally (unchanged).
diff --git a/skills/flow-code-work/SKILL.md b/skills/flow-code-work/SKILL.md
index b3a19e7a..929af89a 100644
--- a/skills/flow-code-work/SKILL.md
+++ b/skills/flow-code-work/SKILL.md
@@ -2,8 +2,11 @@
 name: flow-code-work
 description: "Use when implementing a plan or working through a spec. Triggers on /flow-code:work with Flow IDs."
 user-invocable: false
+deprecated: true
 ---
 
+**Deprecated**: Use `/flow-code:run` instead.
+
 # Flow work
 
 Execute a plan systematically. Focus on finishing.