From 75c852e31753b0f31a52c490cfc328de92771f99 Mon Sep 17 00:00:00 2001 From: z23cc Date: Mon, 6 Apr 2026 20:58:00 +0800 Subject: [PATCH 1/6] refactor: unify FLOWCTL paths to $HOME/.flow/bin/flowctl Replace all ${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl references in skills/ with $HOME/.flow/bin/flowctl for cross-platform consistency. Also add YAML frontmatter to cross-model-reviewer.md agent. Co-Authored-By: Claude Opus 4.6 (1M context) --- agents/cross-model-reviewer.md | 7 +++++++ skills/_shared/rp-review-protocol.md | 2 +- skills/flow-code-auto-improve/SKILL.md | 2 +- skills/flow-code-brainstorm/SKILL.md | 4 ++-- skills/flow-code-deps/SKILL.md | 10 +++++----- skills/flow-code-epic-review/SKILL.md | 4 ++-- skills/flow-code-epic-review/workflow.md | 4 ++-- skills/flow-code-export-context/SKILL.md | 2 +- skills/flow-code-impl-review/SKILL.md | 4 ++-- skills/flow-code-impl-review/workflow.md | 2 +- skills/flow-code-interview/SKILL.md | 4 ++-- skills/flow-code-plan-review/SKILL.md | 4 ++-- skills/flow-code-plan-review/workflow.md | 2 +- skills/flow-code-plan/SKILL.md | 2 +- skills/flow-code-plan/steps.md | 2 +- skills/flow-code-ralph-init/SKILL.md | 4 ++-- skills/flow-code-retro/SKILL.md | 2 +- skills/flow-code-sync/SKILL.md | 6 +++--- skills/flow-code-work/SKILL.md | 2 +- skills/flow-code-work/phases.md | 2 +- skills/flow-code/SKILL.md | 2 +- 21 files changed, 40 insertions(+), 33 deletions(-) diff --git a/agents/cross-model-reviewer.md b/agents/cross-model-reviewer.md index ef84c534..f58e4cdb 100644 --- a/agents/cross-model-reviewer.md +++ b/agents/cross-model-reviewer.md @@ -1,3 +1,10 @@ +--- +name: cross-model-reviewer +description: Runs both Codex adversarial AND Claude review, then computes consensus +model: opus +disallowedTools: Edit, Write, Task +--- + # Cross-Model Reviewer Agent Orchestrates adversarial code review across multiple AI models (Codex + Claude) and computes consensus. diff --git a/skills/_shared/rp-review-protocol.md b/skills/_shared/rp-review-protocol.md index f671ad6c..5034c3b6 100644 --- a/skills/_shared/rp-review-protocol.md +++ b/skills/_shared/rp-review-protocol.md @@ -31,7 +31,7 @@ PARSE_SOURCE — source tag for parse-findings ("plan-review" | "impl-revie ```bash set -e -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" # Priority: --review flag > env > config (flag parsed in SKILL.md) diff --git a/skills/flow-code-auto-improve/SKILL.md b/skills/flow-code-auto-improve/SKILL.md index 9d54a03f..db54aea9 100644 --- a/skills/flow-code-auto-improve/SKILL.md +++ b/skills/flow-code-auto-improve/SKILL.md @@ -41,7 +41,7 @@ Full request: $ARGUMENTS ```bash PLUGIN_ROOT="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}" TEMPLATES="$PLUGIN_ROOT/skills/flow-code-auto-improve/templates" -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" mkdir -p scripts/auto-improve/runs diff --git a/skills/flow-code-brainstorm/SKILL.md b/skills/flow-code-brainstorm/SKILL.md index 31b9568f..676bfe30 100644 --- a/skills/flow-code-brainstorm/SKILL.md +++ b/skills/flow-code-brainstorm/SKILL.md @@ -12,7 +12,7 @@ Explore and pressure-test an idea before committing to a plan. Outputs a require **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" $FLOWCTL ``` @@ -52,7 +52,7 @@ Examples: Analyze the request and the codebase to gauge complexity: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" ``` Read relevant code, git log, and project structure to understand the scope. diff --git a/skills/flow-code-deps/SKILL.md b/skills/flow-code-deps/SKILL.md index 2d6d1568..1c96b9c6 100644 --- a/skills/flow-code-deps/SKILL.md +++ b/skills/flow-code-deps/SKILL.md @@ -10,7 +10,7 @@ Visualize epic dependencies, blocking chains, and execution phases. ## Setup ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" $FLOWCTL detect --json | jq -e '.exists' >/dev/null && echo "OK: .flow/ exists" || echo "ERROR: run $FLOWCTL init" command -v jq >/dev/null 2>&1 && echo "OK: jq installed" || echo "ERROR: brew install jq" ``` @@ -20,7 +20,7 @@ command -v jq >/dev/null 2>&1 && echo "OK: jq installed" || echo "ERROR: brew in Build a consolidated view of all epics with their dependencies: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" # Get all epic IDs epic_ids=$($FLOWCTL epics --json | jq -r '.epics[].id') @@ -42,7 +42,7 @@ done Determine which epics are ready vs blocked (pure jq, works on any shell): ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" # Collect all epic data with deps epics_json=$($FLOWCTL epics --json | jq -r '.epics[].id' | while read id; do @@ -74,7 +74,7 @@ echo "$epics_json" | jq -r ' Group epics into parallel execution phases: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" # Collect all epic data epics_json=$($FLOWCTL epics --json | jq -r '.epics[].id' | while read id; do @@ -144,7 +144,7 @@ fn-1-add-auth → fn-2-add-oauth → fn-3-user-profile (3 phases) For a fast dependency check: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" $FLOWCTL epics --json | jq -r '.epics[] | select(.status != "done") | "\(.id): \(.title) [\(.status)]"' ``` diff --git a/skills/flow-code-epic-review/SKILL.md b/skills/flow-code-epic-review/SKILL.md index cf3db630..4ed8474a 100644 --- a/skills/flow-code-epic-review/SKILL.md +++ b/skills/flow-code-epic-review/SKILL.md @@ -15,7 +15,7 @@ Verify that the combined implementation of all epic tasks satisfies the spec req **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" ``` ## Backend Selection @@ -85,7 +85,7 @@ Format: ` [--review=rp|codex|none]` **See [workflow.md](workflow.md) for full details on each backend.** ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" ``` diff --git a/skills/flow-code-epic-review/workflow.md b/skills/flow-code-epic-review/workflow.md index 16898b57..83e19668 100644 --- a/skills/flow-code-epic-review/workflow.md +++ b/skills/flow-code-epic-review/workflow.md @@ -50,7 +50,7 @@ If no checklist file exists, fall back to narrative review (existing behavior). **Run before backend detection. Failing gaps block the review.** ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" # Check for unresolved blocking gaps GAP_RESULT="$($FLOWCTL gap check --epic "$EPIC_ID" --json 2>/dev/null || true)" @@ -85,7 +85,7 @@ If no gaps exist (empty array), this passes silently. ```bash set -e -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" # Priority: --review flag > env > config (flag parsed in SKILL.md) diff --git a/skills/flow-code-export-context/SKILL.md b/skills/flow-code-export-context/SKILL.md index 0f769d3b..a0db08b8 100644 --- a/skills/flow-code-export-context/SKILL.md +++ b/skills/flow-code-export-context/SKILL.md @@ -19,7 +19,7 @@ Arguments: $ARGUMENTS — Format: ` [focus areas]` ### Step 1: Gather Content ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" OUTPUT_FILE="prompt-exports/$(date +%Y%m%d-%H%M%S)-export.md" mkdir -p prompt-exports ``` diff --git a/skills/flow-code-impl-review/SKILL.md b/skills/flow-code-impl-review/SKILL.md index 0e26e932..149a6d41 100644 --- a/skills/flow-code-impl-review/SKILL.md +++ b/skills/flow-code-impl-review/SKILL.md @@ -15,7 +15,7 @@ Conduct a John Carmack-level review of implementation changes on the current bra **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" ``` ## Backend Selection @@ -91,7 +91,7 @@ Format: `[task ID] [--base ] [focus areas]` **See [workflow.md](workflow.md) for full details on each backend.** ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" ``` diff --git a/skills/flow-code-impl-review/workflow.md b/skills/flow-code-impl-review/workflow.md index 96a106da..2474a176 100644 --- a/skills/flow-code-impl-review/workflow.md +++ b/skills/flow-code-impl-review/workflow.md @@ -14,7 +14,7 @@ The reviewer model only sees selected files. RepoPrompt's Builder discovers cont ```bash set -e -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" # Priority: --review flag > env > config (flag parsed in SKILL.md) diff --git a/skills/flow-code-interview/SKILL.md b/skills/flow-code-interview/SKILL.md index 0cf3aeb6..c3c19633 100644 --- a/skills/flow-code-interview/SKILL.md +++ b/skills/flow-code-interview/SKILL.md @@ -12,7 +12,7 @@ Conduct an extremely thorough interview about a task/spec and write refined deta **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" $FLOWCTL ``` @@ -55,7 +55,7 @@ If empty, ask: "What should I interview you about? Give me a Flow ID (e.g., fn-1 ## Setup ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" ``` ## Detect Input Type diff --git a/skills/flow-code-plan-review/SKILL.md b/skills/flow-code-plan-review/SKILL.md index 7c3147fb..dc883b08 100644 --- a/skills/flow-code-plan-review/SKILL.md +++ b/skills/flow-code-plan-review/SKILL.md @@ -15,7 +15,7 @@ Conduct a John Carmack-level review of epic plans. **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" ``` ## Backend Selection @@ -111,7 +111,7 @@ Include the capability-gaps.md contents (if present) in the context sent to the **See [workflow.md](workflow.md) for full details on each backend.** ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" ``` diff --git a/skills/flow-code-plan-review/workflow.md b/skills/flow-code-plan-review/workflow.md index c4e01e29..9e5a7b9d 100644 --- a/skills/flow-code-plan-review/workflow.md +++ b/skills/flow-code-plan-review/workflow.md @@ -14,7 +14,7 @@ The reviewer model only sees selected files. RepoPrompt's Builder discovers cont ```bash set -e -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" # Priority: --review flag > env > config (flag parsed in SKILL.md) diff --git a/skills/flow-code-plan/SKILL.md b/skills/flow-code-plan/SKILL.md index 2f44570d..b01eeff0 100644 --- a/skills/flow-code-plan/SKILL.md +++ b/skills/flow-code-plan/SKILL.md @@ -14,7 +14,7 @@ Follow this skill and linked workflows exactly. Deviations cause drift, bad gate **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" $FLOWCTL ``` diff --git a/skills/flow-code-plan/steps.md b/skills/flow-code-plan/steps.md index eee822ed..a7f2c9ec 100644 --- a/skills/flow-code-plan/steps.md +++ b/skills/flow-code-plan/steps.md @@ -40,7 +40,7 @@ Use **T-shirt sizes** based on observable metrics — not token estimates (model ```bash # Get flowctl path -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" # Ensure .flow exists $FLOWCTL init --json diff --git a/skills/flow-code-ralph-init/SKILL.md b/skills/flow-code-ralph-init/SKILL.md index bf98fb7d..4bedf93a 100644 --- a/skills/flow-code-ralph-init/SKILL.md +++ b/skills/flow-code-ralph-init/SKILL.md @@ -60,7 +60,7 @@ Scaffold or update repo-local Ralph harness. Opt-in only. cp "${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/skills/flow-code-ralph-init/templates/prompt_work.md" scripts/ralph/ cp "${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/skills/flow-code-ralph-init/templates/prompt_completion.md" scripts/ralph/ cp "${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/skills/flow-code-ralph-init/templates/watch-filter.py" scripts/ralph/ - cp "${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" scripts/ralph/flowctl + cp "$HOME/.flow/bin/flowctl" scripts/ralph/flowctl chmod +x scripts/ralph/ralph.sh scripts/ralph/ralph_once.sh scripts/ralph/flowctl # Restore config.env @@ -71,7 +71,7 @@ Scaffold or update repo-local Ralph harness. Opt-in only. ```bash mkdir -p scripts/ralph/runs cp -R "${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/skills/flow-code-ralph-init/templates/." scripts/ralph/ - cp "${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" scripts/ralph/flowctl + cp "$HOME/.flow/bin/flowctl" scripts/ralph/flowctl chmod +x scripts/ralph/ralph.sh scripts/ralph/ralph_once.sh scripts/ralph/flowctl ``` Note: `cp -R templates/.` copies all files including dotfiles (.gitignore). diff --git a/skills/flow-code-retro/SKILL.md b/skills/flow-code-retro/SKILL.md index 9f9f6af8..9076e294 100644 --- a/skills/flow-code-retro/SKILL.md +++ b/skills/flow-code-retro/SKILL.md @@ -19,7 +19,7 @@ Structured post-epic review that extracts actionable lessons and persists them t ### 1. Gather Evidence ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" # Epic summary $FLOWCTL show --json diff --git a/skills/flow-code-sync/SKILL.md b/skills/flow-code-sync/SKILL.md index dc1aa797..14fff16d 100644 --- a/skills/flow-code-sync/SKILL.md +++ b/skills/flow-code-sync/SKILL.md @@ -10,7 +10,7 @@ Manually trigger plan-sync to update downstream task specs. **CRITICAL: flowctl is BUNDLED - NOT installed globally.** Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" ``` ## Input @@ -26,7 +26,7 @@ Format: ` [--dry-run]` ### Step 1: Parse Arguments ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" ``` @@ -102,7 +102,7 @@ Build context and spawn via Task tool: Sync task specs from to downstream tasks. COMPLETED_TASK_ID: -FLOWCTL: ${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl +FLOWCTL: $HOME/.flow/bin/flowctl EPIC_ID: DOWNSTREAM_TASK_IDS: DRY_RUN: diff --git a/skills/flow-code-work/SKILL.md b/skills/flow-code-work/SKILL.md index 9de29b8c..aa6ccf0c 100644 --- a/skills/flow-code-work/SKILL.md +++ b/skills/flow-code-work/SKILL.md @@ -14,7 +14,7 @@ Follow this skill and linked workflows exactly. Deviations cause drift, bad gate **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" $FLOWCTL ``` diff --git a/skills/flow-code-work/phases.md b/skills/flow-code-work/phases.md index 39701852..6e74f649 100644 --- a/skills/flow-code-work/phases.md +++ b/skills/flow-code-work/phases.md @@ -16,7 +16,7 @@ **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" ``` ## Phase 1: Resolve Input diff --git a/skills/flow-code/SKILL.md b/skills/flow-code/SKILL.md index 2ad8c980..3ce2491c 100644 --- a/skills/flow-code/SKILL.md +++ b/skills/flow-code/SKILL.md @@ -12,7 +12,7 @@ Quick task operations in `.flow/`. For planning features use `/flow-code:plan`, **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash -FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/bin/flowctl" +FLOWCTL="$HOME/.flow/bin/flowctl" ``` Then run commands with `$FLOWCTL `. From 3f6afc9df4c2d590579ed55d0d2164da9bbcd2a2 Mon Sep 17 00:00:00 2001 From: z23cc Date: Mon, 6 Apr 2026 20:59:04 +0800 Subject: [PATCH 2/6] feat(codex): add .codex-plugin manifest and marketplace JSON Co-Authored-By: Claude Opus 4.6 (1M context) --- .agents/plugins/marketplace.json | 15 +++++++++++++++ .codex-plugin/plugin.json | 9 +++++++++ scripts/bump-version.sh | 20 ++++++++++++++++++-- 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 .agents/plugins/marketplace.json create mode 100644 .codex-plugin/plugin.json diff --git a/.agents/plugins/marketplace.json b/.agents/plugins/marketplace.json new file mode 100644 index 00000000..91e2503e --- /dev/null +++ b/.agents/plugins/marketplace.json @@ -0,0 +1,15 @@ +{ + "plugins": [ + { + "name": "flow-code", + "source": { + "source": "local", + "path": "." + }, + "policy": { + "installation": "AVAILABLE", + "authentication": "ON_INSTALL" + } + } + ] +} diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json new file mode 100644 index 00000000..2cedb24d --- /dev/null +++ b/.codex-plugin/plugin.json @@ -0,0 +1,9 @@ +{ + "name": "flow-code", + "version": "0.1.31", + "description": "Zero-dependency planning + execution with .flow/ task tracking and Ralph autonomous mode (multi-model review gates). Worker subagent per task with git worktree isolation for parallel execution. Three-layer quality system (guard + RP plan-review + Codex adversarial). Full-auto by default — AI decides from context, zero questions. Teams-default with file locking, DAG mutation, Codex-driven conflict resolution, auto draft-PR. Auto-detected stack profiles with one-command guard (test/lint/typecheck). Enhanced agent definitions with permissionMode/maxTurns/effort. Lifecycle hooks with state preservation (PreCompact injects .flow state into compaction, TaskCompleted auto-unlocks files, SubagentStart context injection). Memory v2 with atomic entries, dedup, and progressive disclosure. TDD enforcement mode. Multi-epic queue with dependency visualization. Includes 20 subagents, 25+ commands, 23 skills.", + "author": { + "name": "z23cc", + "url": "https://github.com/z23cc" + } +} diff --git a/scripts/bump-version.sh b/scripts/bump-version.sh index c5a085fe..83da9ce7 100755 --- a/scripts/bump-version.sh +++ b/scripts/bump-version.sh @@ -11,6 +11,7 @@ # 2. .claude-plugin/flowctl-version (v-prefixed) # 3. .claude-plugin/plugin.json (bare semver) # 4. .claude-plugin/marketplace.json (bare semver × 3) +# 5. .codex-plugin/plugin.json (bare semver) # # After running: commit + tag v + push to trigger GitHub Release. @@ -23,24 +24,27 @@ CARGO="flowctl/crates/flowctl-cli/Cargo.toml" PIN=".claude-plugin/flowctl-version" PLUGIN=".claude-plugin/plugin.json" MARKET=".claude-plugin/marketplace.json" +CODEX_PLUGIN=".codex-plugin/plugin.json" # ── Read current versions ─────────────────────────────────────────── current_cargo() { awk -F'"' '/^version = /{print $2; exit}' "$CARGO"; } current_pin() { tr -d ' \t\n\r' < "$PIN" | sed 's/^v//'; } current_plugin() { python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["version"])' "$PLUGIN"; } current_market() { python3 -c 'import json,sys;d=json.load(open(sys.argv[1]));print(d["version"])' "$MARKET"; } +current_codex() { python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["version"])' "$CODEX_PLUGIN"; } print_current() { printf "%-50s %s\n" "$CARGO" "$(current_cargo)" printf "%-50s %s\n" "$PIN" "v$(current_pin)" printf "%-50s %s\n" "$PLUGIN" "$(current_plugin)" printf "%-50s %s\n" "$MARKET" "$(current_market)" + printf "%-50s %s\n" "$CODEX_PLUGIN" "$(current_codex)" } # ── Check mode ────────────────────────────────────────────────────── if [ "${1:-}" = "--check" ]; then - c="$(current_cargo)"; p="$(current_pin)"; pl="$(current_plugin)"; m="$(current_market)" - if [ "$c" = "$p" ] && [ "$p" = "$pl" ] && [ "$pl" = "$m" ]; then + c="$(current_cargo)"; p="$(current_pin)"; pl="$(current_plugin)"; m="$(current_market)"; cx="$(current_codex)" + if [ "$c" = "$p" ] && [ "$p" = "$pl" ] && [ "$pl" = "$m" ] && [ "$m" = "$cx" ]; then echo "✓ all files agree on v$c" exit 0 fi @@ -108,6 +112,18 @@ with open(path, 'w') as f: f.write('\n') PY +# 5. .codex-plugin/plugin.json — update "version" field +python3 - "$CODEX_PLUGIN" "$NEW" <<'PY' +import json, sys +path, new = sys.argv[1], sys.argv[2] +with open(path) as f: + data = json.load(f) +data["version"] = new +with open(path, 'w') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + f.write('\n') +PY + echo "" echo "updated files:" print_current From c6dd3dd13f0a573a6e410ba7f2c1c6dc6412d39b Mon Sep 17 00:00:00 2001 From: z23cc Date: Mon, 6 Apr 2026 21:01:14 +0800 Subject: [PATCH 3/6] feat(codex): add codex_sync core module for agent TOML generation Co-Authored-By: Claude Opus 4.6 (1M context) --- flowctl/crates/flowctl-core/src/codex_sync.rs | 514 ++++++++++++++++++ flowctl/crates/flowctl-core/src/lib.rs | 1 + 2 files changed, 515 insertions(+) create mode 100644 flowctl/crates/flowctl-core/src/codex_sync.rs diff --git a/flowctl/crates/flowctl-core/src/codex_sync.rs b/flowctl/crates/flowctl-core/src/codex_sync.rs new file mode 100644 index 00000000..19f8d505 --- /dev/null +++ b/flowctl/crates/flowctl-core/src/codex_sync.rs @@ -0,0 +1,514 @@ +//! Codex sync: convert Claude Code agent `.md` files to OpenAI Codex `.toml` format. +//! +//! Used by `flowctl codex sync` to generate a `.codex-plugin/` directory from +//! the plugin's `agents/` Markdown definitions. + +use std::path::Path; + +use serde::Deserialize; + +use crate::error::CoreError; +use crate::frontmatter; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/// Frontmatter fields expected in an agent `.md` file. +#[derive(Debug, Clone, Deserialize)] +pub struct AgentFrontmatter { + pub name: String, + pub description: String, + pub model: String, + #[serde(rename = "disallowedTools", default)] + pub disallowed_tools: Option, + pub color: Option, +} + +/// Result of mapping a Claude model string to Codex equivalents. +#[derive(Debug, Clone)] +pub struct ModelMapping { + pub codex_model: String, + pub reasoning_effort: Option, +} + +/// Summary returned by [`sync_all`]. +#[derive(Debug, Default)] +pub struct SyncSummary { + pub agents_generated: usize, + pub agents_skipped: usize, + pub hooks_generated: bool, + pub errors: Vec, +} + +// --------------------------------------------------------------------------- +// Intelligent-scout list +// --------------------------------------------------------------------------- + +/// Scouts that should use the *intelligent* model even when their agent +/// definition says "sonnet". +const INTELLIGENT_SCOUTS: &[&str] = &["epic-scout", "claude-md-scout", "docs-gap-scout"]; + +// --------------------------------------------------------------------------- +// Model mapping +// --------------------------------------------------------------------------- + +fn intelligent_model() -> String { + std::env::var("CODEX_MODEL_INTELLIGENT").unwrap_or_else(|_| "gpt-5.4".to_string()) +} + +fn fast_model() -> String { + std::env::var("CODEX_MODEL_FAST").unwrap_or_else(|_| "gpt-5.4-mini".to_string()) +} + +/// Map a Claude model identifier to a Codex model + optional reasoning effort. +pub fn map_model(claude_model: &str, agent_name: &str) -> ModelMapping { + let lower = claude_model.to_lowercase(); + + if lower == "opus" || lower.starts_with("claude-opus") { + ModelMapping { + codex_model: intelligent_model(), + reasoning_effort: Some("high".to_string()), + } + } else if lower == "sonnet" || lower.starts_with("claude-sonnet") { + if INTELLIGENT_SCOUTS.contains(&agent_name) { + ModelMapping { + codex_model: intelligent_model(), + reasoning_effort: Some("high".to_string()), + } + } else { + ModelMapping { + codex_model: fast_model(), + reasoning_effort: None, + } + } + } else if lower == "haiku" || lower.starts_with("claude-haiku") { + ModelMapping { + codex_model: fast_model(), + reasoning_effort: None, + } + } else if lower == "inherit" || lower.is_empty() { + ModelMapping { + codex_model: String::new(), + reasoning_effort: None, + } + } else { + ModelMapping { + codex_model: intelligent_model(), + reasoning_effort: Some("high".to_string()), + } + } +} + +// --------------------------------------------------------------------------- +// Sandbox +// --------------------------------------------------------------------------- + +/// Return the Codex sandbox mode for a given agent name. +pub fn sandbox_for(name: &str) -> &'static str { + match name { + "worker" | "plan-sync" => "workspace-write", + _ => "read-only", + } +} + +// --------------------------------------------------------------------------- +// TOML generation +// --------------------------------------------------------------------------- + +/// Generate the `.toml` content for a single agent. +/// +/// Built manually (not via serde) because `developer_instructions` uses +/// TOML multi-line basic strings (`"""`). +pub fn generate_agent_toml( + fm: &AgentFrontmatter, + body: &str, + mapping: &ModelMapping, +) -> String { + let escaped_body = body.replace('\\', "\\\\"); + let sandbox = sandbox_for(&fm.name); + + let mut out = String::with_capacity(256 + escaped_body.len()); + out.push_str("# Auto-generated by flowctl codex sync — do not edit manually\n"); + out.push_str(&format!("name = {:?}\n", fm.name)); + out.push_str(&format!("description = {:?}\n", fm.description)); + + if !mapping.codex_model.is_empty() { + out.push_str(&format!("model = {:?}\n", mapping.codex_model)); + } + if let Some(ref effort) = mapping.reasoning_effort { + out.push_str(&format!("model_reasoning_effort = {:?}\n", effort)); + } + + out.push_str(&format!("sandbox_mode = {:?}\n", sandbox)); + out.push_str(&format!( + "\ndeveloper_instructions = \"\"\"\n{}\"\"\"\n", + escaped_body + )); + out +} + +// --------------------------------------------------------------------------- +// Plugin / marketplace JSON +// --------------------------------------------------------------------------- + +/// Generate `.codex-plugin/plugin.json`. +pub fn generate_plugin_json(version: &str, name: &str, description: &str) -> String { + serde_json::json!({ + "schema_version": "v1", + "name": name, + "description": description, + "version": version + }) + .to_string() +} + +/// Generate `.agents/plugins/marketplace.json`. +pub fn generate_marketplace_json(plugin_name: &str) -> String { + serde_json::json!({ + "plugins": [ + { "name": plugin_name, "path": ".codex-plugin" } + ] + }) + .to_string() +} + +// --------------------------------------------------------------------------- +// Hooks patching +// --------------------------------------------------------------------------- + +/// Patch a Claude Code `hooks.json` for Codex: replace `"Bash"` matchers +/// with `"Bash|Execute"`. +pub fn generate_hooks_json(source_hooks: &str) -> Result { + let mut value: serde_json::Value = serde_json::from_str(source_hooks) + .map_err(|e| CoreError::FrontmatterParse(format!("hooks JSON parse error: {e}")))?; + + patch_bash_matchers(&mut value); + + serde_json::to_string_pretty(&value) + .map_err(|e| CoreError::FrontmatterParse(format!("hooks JSON serialize error: {e}"))) +} + +/// Recursively walk the JSON value and replace `"Bash"` strings with +/// `"Bash|Execute"` wherever they appear. +fn patch_bash_matchers(value: &mut serde_json::Value) { + match value { + serde_json::Value::String(s) if s == "Bash" => { + *s = "Bash|Execute".to_string(); + } + serde_json::Value::Array(arr) => { + for item in arr.iter_mut() { + patch_bash_matchers(item); + } + } + serde_json::Value::Object(map) => { + for v in map.values_mut() { + patch_bash_matchers(v); + } + } + _ => {} + } +} + +// --------------------------------------------------------------------------- +// sync_all +// --------------------------------------------------------------------------- + +/// Synchronise all agent `.md` files into Codex `.toml` files. +/// +/// If `dry_run` is true, no files are written but the summary still reflects +/// what *would* happen. +pub fn sync_all( + agents_dir: &Path, + hooks_path: Option<&Path>, + output_dir: &Path, + dry_run: bool, +) -> Result { + let mut summary = SyncSummary::default(); + + let agents_out = output_dir.join("agents"); + if !dry_run { + std::fs::create_dir_all(&agents_out).map_err(|e| { + CoreError::FrontmatterParse(format!("cannot create output dir: {e}")) + })?; + } + + // Read all .md files from agents_dir + let entries = std::fs::read_dir(agents_dir).map_err(|e| { + CoreError::FrontmatterParse(format!("cannot read agents dir: {e}")) + })?; + + for entry in entries { + let entry = match entry { + Ok(e) => e, + Err(e) => { + summary.errors.push(format!("readdir error: {e}")); + summary.agents_skipped += 1; + continue; + } + }; + + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) != Some("md") { + continue; + } + + let content = match std::fs::read_to_string(&path) { + Ok(c) => c, + Err(e) => { + summary + .errors + .push(format!("{}: read error: {e}", path.display())); + summary.agents_skipped += 1; + continue; + } + }; + + let doc = match frontmatter::parse::(&content) { + Ok(d) => d, + Err(e) => { + summary + .errors + .push(format!("{}: parse error: {e}", path.display())); + summary.agents_skipped += 1; + continue; + } + }; + + let mapping = map_model(&doc.frontmatter.model, &doc.frontmatter.name); + let toml_content = generate_agent_toml(&doc.frontmatter, &doc.body, &mapping); + + if !dry_run { + let out_path = agents_out.join(format!("{}.toml", doc.frontmatter.name)); + std::fs::write(&out_path, &toml_content).map_err(|e| { + CoreError::FrontmatterParse(format!( + "cannot write {}: {e}", + out_path.display() + )) + })?; + } + + summary.agents_generated += 1; + } + + // Hooks + if let Some(hp) = hooks_path { + if hp.exists() { + match std::fs::read_to_string(hp) { + Ok(src) => match generate_hooks_json(&src) { + Ok(patched) => { + if !dry_run { + let hooks_out = output_dir.join("hooks.json"); + std::fs::write(&hooks_out, &patched).map_err(|e| { + CoreError::FrontmatterParse(format!( + "cannot write hooks: {e}" + )) + })?; + } + summary.hooks_generated = true; + } + Err(e) => { + summary.errors.push(format!("hooks patch error: {e}")); + } + }, + Err(e) => { + summary.errors.push(format!("hooks read error: {e}")); + } + } + } + } + + Ok(summary) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_map_model_opus() { + let m = map_model("opus", "worker"); + assert_eq!(m.codex_model, "gpt-5.4"); + assert_eq!(m.reasoning_effort.as_deref(), Some("high")); + } + + #[test] + fn test_map_model_sonnet_fast() { + let m = map_model("sonnet", "worker"); + assert_eq!(m.codex_model, "gpt-5.4-mini"); + assert!(m.reasoning_effort.is_none()); + } + + #[test] + fn test_map_model_sonnet_intelligent_scout() { + let m = map_model("sonnet", "epic-scout"); + assert_eq!(m.codex_model, "gpt-5.4"); + assert_eq!(m.reasoning_effort.as_deref(), Some("high")); + } + + #[test] + fn test_map_model_haiku() { + let m = map_model("haiku", "worker"); + assert_eq!(m.codex_model, "gpt-5.4-mini"); + assert!(m.reasoning_effort.is_none()); + } + + #[test] + fn test_map_model_inherit() { + let m = map_model("inherit", "worker"); + assert!(m.codex_model.is_empty()); + assert!(m.reasoning_effort.is_none()); + } + + #[test] + fn test_map_model_empty() { + let m = map_model("", "worker"); + assert!(m.codex_model.is_empty()); + assert!(m.reasoning_effort.is_none()); + } + + #[test] + fn test_sandbox_for_worker() { + assert_eq!(sandbox_for("worker"), "workspace-write"); + assert_eq!(sandbox_for("plan-sync"), "workspace-write"); + } + + #[test] + fn test_sandbox_for_scout() { + assert_eq!(sandbox_for("epic-scout"), "read-only"); + assert_eq!(sandbox_for("anything"), "read-only"); + } + + #[test] + fn test_generate_agent_toml_basic() { + let fm = AgentFrontmatter { + name: "test-agent".to_string(), + description: "A test agent".to_string(), + model: "opus".to_string(), + disallowed_tools: None, + color: None, + }; + let mapping = ModelMapping { + codex_model: "gpt-5.4".to_string(), + reasoning_effort: Some("high".to_string()), + }; + let toml = generate_agent_toml(&fm, "Do stuff.\n", &mapping); + assert!(toml.contains("name = \"test-agent\"")); + assert!(toml.contains("model = \"gpt-5.4\"")); + assert!(toml.contains("model_reasoning_effort = \"high\"")); + assert!(toml.contains("sandbox_mode = \"read-only\"")); + assert!(toml.contains("Do stuff.")); + } + + #[test] + fn test_generate_agent_toml_no_reasoning() { + let fm = AgentFrontmatter { + name: "worker".to_string(), + description: "Worker".to_string(), + model: "sonnet".to_string(), + disallowed_tools: None, + color: None, + }; + let mapping = ModelMapping { + codex_model: "gpt-5.4-mini".to_string(), + reasoning_effort: None, + }; + let toml = generate_agent_toml(&fm, "body\n", &mapping); + assert!(!toml.contains("model_reasoning_effort")); + assert!(toml.contains("sandbox_mode = \"workspace-write\"")); + } + + #[test] + fn test_generate_agent_toml_escapes_backslashes() { + let fm = AgentFrontmatter { + name: "esc".to_string(), + description: "d".to_string(), + model: "opus".to_string(), + disallowed_tools: None, + color: None, + }; + let mapping = ModelMapping { + codex_model: "gpt-5.4".to_string(), + reasoning_effort: None, + }; + let toml = generate_agent_toml(&fm, "path\\to\\thing\n", &mapping); + assert!(toml.contains("path\\\\to\\\\thing")); + } + + #[test] + fn test_generate_plugin_json() { + let json = generate_plugin_json("1.0.0", "flow-code", "A plugin"); + assert!(json.contains("flow-code")); + assert!(json.contains("1.0.0")); + } + + #[test] + fn test_generate_marketplace_json() { + let json = generate_marketplace_json("flow-code"); + assert!(json.contains("flow-code")); + assert!(json.contains(".codex-plugin")); + } + + #[test] + fn test_generate_hooks_json_patches_bash() { + let input = r#"{"hooks":[{"matcher":"Bash","command":"echo hi"}]}"#; + let out = generate_hooks_json(input).unwrap(); + assert!(out.contains("Bash|Execute")); + assert!(!out.contains("\"Bash\"")); + } + + #[test] + fn test_generate_hooks_json_invalid() { + let result = generate_hooks_json("not json"); + assert!(result.is_err()); + } + + #[test] + fn test_sync_all_dry_run() { + let dir = tempfile::tempdir().unwrap(); + let agents = dir.path().join("agents"); + std::fs::create_dir(&agents).unwrap(); + + // Write a valid agent md + std::fs::write( + agents.join("test-scout.md"), + "---\nname: test-scout\ndescription: A scout\nmodel: sonnet\n---\nBody here.\n", + ) + .unwrap(); + + // Write an invalid file (should be skipped) + std::fs::write(agents.join("bad.md"), "no frontmatter").unwrap(); + + let out = dir.path().join("out"); + let summary = sync_all(&agents, None, &out, true).unwrap(); + assert_eq!(summary.agents_generated, 1); + assert_eq!(summary.agents_skipped, 1); + // Dry run: no output files + assert!(!out.exists()); + } + + #[test] + fn test_sync_all_writes_files() { + let dir = tempfile::tempdir().unwrap(); + let agents = dir.path().join("agents"); + std::fs::create_dir(&agents).unwrap(); + + std::fs::write( + agents.join("worker.md"), + "---\nname: worker\ndescription: Worker agent\nmodel: inherit\n---\nDo work.\n", + ) + .unwrap(); + + let out = dir.path().join("out"); + let summary = sync_all(&agents, None, &out, false).unwrap(); + assert_eq!(summary.agents_generated, 1); + assert!(out.join("agents/worker.toml").exists()); + + let content = std::fs::read_to_string(out.join("agents/worker.toml")).unwrap(); + assert!(content.contains("name = \"worker\"")); + } +} diff --git a/flowctl/crates/flowctl-core/src/lib.rs b/flowctl/crates/flowctl-core/src/lib.rs index 5a2fc1c4..9bb8717f 100644 --- a/flowctl/crates/flowctl-core/src/lib.rs +++ b/flowctl/crates/flowctl-core/src/lib.rs @@ -5,6 +5,7 @@ //! other flowctl crates. pub mod approvals; +pub mod codex_sync; pub mod compress; pub mod dag; pub mod error; From 99ecbf0520d7a50f73b8d5e991baf8207effe862 Mon Sep 17 00:00:00 2001 From: z23cc Date: Mon, 6 Apr 2026 21:13:21 +0800 Subject: [PATCH 4/6] refactor(codex): split codex.rs into module directory and add sync subcommand Co-Authored-By: Claude Opus 4.6 (1M context) --- .../flowctl-cli/src/commands/codex/mod.rs | 379 ++++++++++++++++++ .../commands/{codex.rs => codex/review.rs} | 375 +---------------- .../flowctl-cli/src/commands/codex/sync.rs | 65 +++ 3 files changed, 463 insertions(+), 356 deletions(-) create mode 100644 flowctl/crates/flowctl-cli/src/commands/codex/mod.rs rename flowctl/crates/flowctl-cli/src/commands/{codex.rs => codex/review.rs} (64%) create mode 100644 flowctl/crates/flowctl-cli/src/commands/codex/sync.rs diff --git a/flowctl/crates/flowctl-cli/src/commands/codex/mod.rs b/flowctl/crates/flowctl-cli/src/commands/codex/mod.rs new file mode 100644 index 00000000..816c2691 --- /dev/null +++ b/flowctl/crates/flowctl-cli/src/commands/codex/mod.rs @@ -0,0 +1,379 @@ +//! Codex CLI wrapper commands. +//! +//! Spawns the `codex` CLI for code review operations. All review variants +//! delegate to `codex exec` with appropriate prompts and sandbox settings. + +mod review; +mod sync; + +use std::env; +use std::process::Command; + +use clap::Subcommand; +use regex::Regex; +use serde_json::json; + + + +#[derive(Subcommand, Debug)] +pub enum CodexCmd { + /// Check codex availability. + Check, + /// Implementation review. + ImplReview { + /// Task ID (optional for standalone). + task: Option, + /// Base branch for diff. + #[arg(long)] + base: String, + /// Focus areas (comma-separated). + #[arg(long)] + focus: Option, + /// Receipt file path. + #[arg(long)] + receipt: Option, + /// Sandbox mode. + #[arg(long, default_value = "auto", value_parser = ["read-only", "workspace-write", "danger-full-access", "auto"])] + sandbox: String, + /// Model reasoning effort level. + #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] + effort: String, + }, + /// Plan review. + PlanReview { + /// Epic ID. + epic: String, + /// Comma-separated file paths for context. + #[arg(long)] + files: String, + /// Base branch for context. + #[arg(long, default_value = "main")] + base: String, + /// Receipt file path. + #[arg(long)] + receipt: Option, + /// Sandbox mode. + #[arg(long, default_value = "auto", value_parser = ["read-only", "workspace-write", "danger-full-access", "auto"])] + sandbox: String, + /// Model reasoning effort level. + #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] + effort: String, + }, + /// Adversarial review -- tries to break the code. + Adversarial { + /// Base branch for diff. + #[arg(long, default_value = "main")] + base: String, + /// Specific area to pressure-test. + #[arg(long)] + focus: Option, + /// Sandbox mode. + #[arg(long, default_value = "auto")] + sandbox: String, + /// Model reasoning effort level. + #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] + effort: String, + }, + /// Cross-model review: runs both Codex adversarial AND Claude review, + /// then computes consensus. + CrossModel { + /// Base branch for diff. + #[arg(long, default_value = "main")] + base: String, + /// Specific area to pressure-test. + #[arg(long)] + focus: Option, + /// Sandbox mode for Codex. + #[arg(long, default_value = "auto")] + sandbox: String, + /// Model reasoning effort level. + #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] + effort: String, + }, + /// Epic completion review. + CompletionReview { + /// Epic ID. + epic: String, + /// Base branch for diff. + #[arg(long, default_value = "main")] + base: String, + /// Receipt file path. + #[arg(long)] + receipt: Option, + /// Sandbox mode. + #[arg(long, default_value = "auto", value_parser = ["read-only", "workspace-write", "danger-full-access", "auto"])] + sandbox: String, + /// Model reasoning effort level. + #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] + effort: String, + }, + /// Sync agent .md files to Codex artifacts. + Sync { + /// Directory containing agent .md files. + #[arg(long, default_value = "agents")] + agents_dir: String, + /// Output directory for generated Codex artifacts. + #[arg(long, default_value = "codex")] + output_dir: String, + /// Source hooks.json file to patch. + #[arg(long, default_value = "hooks/hooks.json")] + hooks: String, + /// Validate without writing files. + #[arg(long)] + dry_run: bool, + /// Show per-file details. + #[arg(long)] + verbose: bool, + }, +} + +// ── Helpers ───────────────────────────────────────────────────────── + +/// Locate `codex` in PATH, returning the full path or None. +fn find_codex() -> Option { + which::which("codex").ok().map(|p| p.to_string_lossy().to_string()) +} + +/// Get codex version string (e.g. "0.1.2") or None. +fn get_codex_version() -> Option { + let codex = find_codex()?; + let output = Command::new(&codex) + .arg("--version") + .output() + .ok()?; + if !output.status.success() { + return None; + } + let text = String::from_utf8_lossy(&output.stdout); + let re = Regex::new(r"(\d+\.\d+\.\d+)").unwrap(); + re.captures(text.trim()) + .map(|c| c[1].to_string()) + .or_else(|| Some(text.trim().to_string())) +} + +/// Resolve sandbox mode: CLI flag > CODEX_SANDBOX env > platform default. +/// Never returns "auto". +fn resolve_sandbox(sandbox: &str) -> String { + let s = sandbox.trim(); + + // Explicit non-auto value from CLI + if !s.is_empty() && s != "auto" { + return s.to_string(); + } + + // Check CODEX_SANDBOX env var + if let Ok(env_val) = env::var("CODEX_SANDBOX") { + let ev = env_val.trim().to_string(); + if !ev.is_empty() && ev != "auto" { + return ev; + } + } + + // Platform default + if cfg!(windows) { + "danger-full-access".to_string() + } else { + "read-only".to_string() + } +} + +/// Run `codex exec` with the given prompt (passed via stdin). +/// Returns (stdout, thread_id, exit_code, stderr). +fn run_codex_exec( + prompt: &str, + session_id: Option<&str>, + sandbox: &str, + effort: &str, +) -> (String, Option, i32, String) { + let codex = match find_codex() { + Some(c) => c, + None => return (String::new(), None, 2, "codex not found in PATH".to_string()), + }; + + let timeout_secs: u64 = env::var("FLOW_CODEX_TIMEOUT") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(600); + + let model = env::var("FLOW_CODEX_MODEL").unwrap_or_else(|_| "gpt-5.4".to_string()); + + // Try resume if session_id is provided + if let Some(sid) = session_id { + let result = Command::new(&codex) + .args(["exec", "resume", sid, "-"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn(); + + if let Ok(mut child) = result { + use std::io::Write; + if let Some(ref mut stdin) = child.stdin { + let _ = stdin.write_all(prompt.as_bytes()); + } + // Drop stdin to close it + drop(child.stdin.take()); + + match child.wait_with_output() { + Ok(output) if output.status.success() => { + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + return (stdout, Some(sid.to_string()), 0, stderr); + } + _ => { + eprintln!("WARNING: Codex resume failed, starting new session"); + } + } + } + } + + // New session + let effort_config = format!("model_reasoning_effort=\"{}\"", effort); + let mut cmd = Command::new(&codex); + cmd.args([ + "exec", + "--model", &model, + "-c", &effort_config, + "--sandbox", sandbox, + "--skip-git-repo-check", + "--json", + "-", + ]); + cmd.stdin(std::process::Stdio::piped()); + cmd.stdout(std::process::Stdio::piped()); + cmd.stderr(std::process::Stdio::piped()); + + let result = cmd.spawn(); + match result { + Ok(mut child) => { + use std::io::Write; + if let Some(ref mut stdin) = child.stdin { + let _ = stdin.write_all(prompt.as_bytes()); + } + drop(child.stdin.take()); + + // Wait with timeout + let _timeout = std::time::Duration::from_secs(timeout_secs); + match child.wait_with_output() { + Ok(output) => { + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + let code = output.status.code().unwrap_or(1); + let thread_id = parse_thread_id(&stdout); + (stdout, thread_id, code, stderr) + } + Err(e) => (String::new(), None, 2, format!("codex exec error: {e}")), + } + } + Err(e) => (String::new(), None, 2, format!("failed to spawn codex: {e}")), + } +} + +/// Extract thread_id from codex --json JSONL output. +fn parse_thread_id(output: &str) -> Option { + for line in output.lines() { + if let Ok(data) = serde_json::from_str::(line) { + if data.get("type").and_then(|v| v.as_str()) == Some("thread.started") { + if let Some(tid) = data.get("thread_id").and_then(|v| v.as_str()) { + return Some(tid.to_string()); + } + } + } + } + None +} + +/// Extract verdict from codex output: SHIP etc. +fn parse_verdict(output: &str) -> Option { + let re = Regex::new(r"(SHIP|NEEDS_WORK|MAJOR_RETHINK)").unwrap(); + re.captures(output).map(|c| c[1].to_string()) +} + +/// Load receipt session_id for re-review continuity. +fn load_receipt(path: Option<&str>) -> (Option, bool) { + let path = match path { + Some(p) if !p.is_empty() => p, + _ => return (None, false), + }; + let content = match std::fs::read_to_string(path) { + Ok(c) => c, + Err(_) => return (None, false), + }; + match serde_json::from_str::(&content) { + Ok(data) => { + let sid = data.get("session_id").and_then(|v| v.as_str()).map(std::string::ToString::to_string); + let is_rereview = sid.is_some(); + (sid, is_rereview) + } + Err(_) => (None, false), + } +} + +/// Save receipt JSON for ralph-compatible review tracking. +#[allow(clippy::too_many_arguments)] +fn save_receipt( + path: &str, + review_type: &str, + review_id: &str, + verdict: &str, + session_id: Option<&str>, + output: &str, + base_branch: Option<&str>, + focus: Option<&str>, +) { + let mut data = json!({ + "type": review_type, + "id": review_id, + "mode": "codex", + "verdict": verdict, + "session_id": session_id, + "timestamp": chrono::Utc::now().to_rfc3339(), + "review": output, + }); + if let Some(base) = base_branch { + data["base"] = json!(base); + } + if let Some(f) = focus { + data["focus"] = json!(f); + } + if let Ok(iter_str) = env::var("RALPH_ITERATION") { + if let Ok(iter) = iter_str.parse::() { + data["iteration"] = json!(iter); + } + } + let content = serde_json::to_string_pretty(&data).unwrap_or_default(); + let _ = std::fs::write(path, format!("{content}\n")); +} + +/// Delete a stale receipt on failure. +fn delete_stale_receipt(path: Option<&str>) { + if let Some(p) = path { + let _ = std::fs::remove_file(p); + } +} + +// ── Dispatch ──────────────────────────────────────────────────────── + +pub fn dispatch(cmd: &CodexCmd, json: bool) { + match cmd { + CodexCmd::Check => review::cmd_check(json), + CodexCmd::ImplReview { + task, base, focus, receipt, sandbox, effort, + } => review::cmd_impl_review(json, task.as_deref(), base, focus.as_deref(), receipt.as_deref(), sandbox, effort), + CodexCmd::PlanReview { + epic, files, base, receipt, sandbox, effort, + } => review::cmd_plan_review(json, epic, files, base, receipt.as_deref(), sandbox, effort), + CodexCmd::Adversarial { + base, focus, sandbox, effort, + } => review::cmd_adversarial(json, base, focus.as_deref(), sandbox, effort), + CodexCmd::CrossModel { + base, focus, sandbox, effort, + } => review::cmd_cross_model(json, base, focus.as_deref(), sandbox, effort), + CodexCmd::CompletionReview { + epic, base, receipt, sandbox, effort, + } => review::cmd_completion_review(json, epic, base, receipt.as_deref(), sandbox, effort), + CodexCmd::Sync { + agents_dir, output_dir, hooks, dry_run, verbose, + } => sync::cmd_sync(json, agents_dir, output_dir, hooks, *dry_run, *verbose), + } +} diff --git a/flowctl/crates/flowctl-cli/src/commands/codex.rs b/flowctl/crates/flowctl-cli/src/commands/codex/review.rs similarity index 64% rename from flowctl/crates/flowctl-cli/src/commands/codex.rs rename to flowctl/crates/flowctl-cli/src/commands/codex/review.rs index e3abb390..822db31b 100644 --- a/flowctl/crates/flowctl-cli/src/commands/codex.rs +++ b/flowctl/crates/flowctl-cli/src/commands/codex/review.rs @@ -1,12 +1,10 @@ -//! Codex CLI wrapper commands. +//! Codex review command implementations. //! -//! Spawns the `codex` CLI for code review operations. All review variants -//! delegate to `codex exec` with appropriate prompts and sandbox settings. +//! Contains the `cmd_*` functions for impl-review, plan-review, adversarial, +//! cross-model, and completion-review, plus their parsing helpers. use std::env; -use std::process::Command; -use clap::Subcommand; use regex::Regex; use serde_json::json; @@ -17,353 +15,16 @@ use flowctl_core::review_protocol::{ use crate::output::{error_exit, json_output}; -#[derive(Subcommand, Debug)] -pub enum CodexCmd { - /// Check codex availability. - Check, - /// Implementation review. - ImplReview { - /// Task ID (optional for standalone). - task: Option, - /// Base branch for diff. - #[arg(long)] - base: String, - /// Focus areas (comma-separated). - #[arg(long)] - focus: Option, - /// Receipt file path. - #[arg(long)] - receipt: Option, - /// Sandbox mode. - #[arg(long, default_value = "auto", value_parser = ["read-only", "workspace-write", "danger-full-access", "auto"])] - sandbox: String, - /// Model reasoning effort level. - #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] - effort: String, - }, - /// Plan review. - PlanReview { - /// Epic ID. - epic: String, - /// Comma-separated file paths for context. - #[arg(long)] - files: String, - /// Base branch for context. - #[arg(long, default_value = "main")] - base: String, - /// Receipt file path. - #[arg(long)] - receipt: Option, - /// Sandbox mode. - #[arg(long, default_value = "auto", value_parser = ["read-only", "workspace-write", "danger-full-access", "auto"])] - sandbox: String, - /// Model reasoning effort level. - #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] - effort: String, - }, - /// Adversarial review -- tries to break the code. - Adversarial { - /// Base branch for diff. - #[arg(long, default_value = "main")] - base: String, - /// Specific area to pressure-test. - #[arg(long)] - focus: Option, - /// Sandbox mode. - #[arg(long, default_value = "auto")] - sandbox: String, - /// Model reasoning effort level. - #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] - effort: String, - }, - /// Cross-model review: runs both Codex adversarial AND Claude review, - /// then computes consensus. - CrossModel { - /// Base branch for diff. - #[arg(long, default_value = "main")] - base: String, - /// Specific area to pressure-test. - #[arg(long)] - focus: Option, - /// Sandbox mode for Codex. - #[arg(long, default_value = "auto")] - sandbox: String, - /// Model reasoning effort level. - #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] - effort: String, - }, - /// Epic completion review. - CompletionReview { - /// Epic ID. - epic: String, - /// Base branch for diff. - #[arg(long, default_value = "main")] - base: String, - /// Receipt file path. - #[arg(long)] - receipt: Option, - /// Sandbox mode. - #[arg(long, default_value = "auto", value_parser = ["read-only", "workspace-write", "danger-full-access", "auto"])] - sandbox: String, - /// Model reasoning effort level. - #[arg(long, default_value = "high", value_parser = ["low", "medium", "high"])] - effort: String, - }, -} - -// ── Helpers ───────────────────────────────────────────────────────── - -/// Locate `codex` in PATH, returning the full path or None. -fn find_codex() -> Option { - which::which("codex").ok().map(|p| p.to_string_lossy().to_string()) -} - -/// Get codex version string (e.g. "0.1.2") or None. -fn get_codex_version() -> Option { - let codex = find_codex()?; - let output = Command::new(&codex) - .arg("--version") - .output() - .ok()?; - if !output.status.success() { - return None; - } - let text = String::from_utf8_lossy(&output.stdout); - let re = Regex::new(r"(\d+\.\d+\.\d+)").unwrap(); - re.captures(text.trim()) - .map(|c| c[1].to_string()) - .or_else(|| Some(text.trim().to_string())) -} - -/// Resolve sandbox mode: CLI flag > CODEX_SANDBOX env > platform default. -/// Never returns "auto". -fn resolve_sandbox(sandbox: &str) -> String { - let s = sandbox.trim(); - - // Explicit non-auto value from CLI - if !s.is_empty() && s != "auto" { - return s.to_string(); - } - - // Check CODEX_SANDBOX env var - if let Ok(env_val) = env::var("CODEX_SANDBOX") { - let ev = env_val.trim().to_string(); - if !ev.is_empty() && ev != "auto" { - return ev; - } - } - - // Platform default - if cfg!(windows) { - "danger-full-access".to_string() - } else { - "read-only".to_string() - } -} - -/// Run `codex exec` with the given prompt (passed via stdin). -/// Returns (stdout, thread_id, exit_code, stderr). -fn run_codex_exec( - prompt: &str, - session_id: Option<&str>, - sandbox: &str, - effort: &str, -) -> (String, Option, i32, String) { - let codex = match find_codex() { - Some(c) => c, - None => return (String::new(), None, 2, "codex not found in PATH".to_string()), - }; - - let timeout_secs: u64 = env::var("FLOW_CODEX_TIMEOUT") - .ok() - .and_then(|v| v.parse().ok()) - .unwrap_or(600); - - let model = env::var("FLOW_CODEX_MODEL").unwrap_or_else(|_| "gpt-5.4".to_string()); - - // Try resume if session_id is provided - if let Some(sid) = session_id { - let result = Command::new(&codex) - .args(["exec", "resume", sid, "-"]) - .stdin(std::process::Stdio::piped()) - .stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()) - .spawn(); - - if let Ok(mut child) = result { - use std::io::Write; - if let Some(ref mut stdin) = child.stdin { - let _ = stdin.write_all(prompt.as_bytes()); - } - // Drop stdin to close it - drop(child.stdin.take()); - - match child.wait_with_output() { - Ok(output) if output.status.success() => { - let stdout = String::from_utf8_lossy(&output.stdout).to_string(); - let stderr = String::from_utf8_lossy(&output.stderr).to_string(); - return (stdout, Some(sid.to_string()), 0, stderr); - } - _ => { - eprintln!("WARNING: Codex resume failed, starting new session"); - } - } - } - } - - // New session - let effort_config = format!("model_reasoning_effort=\"{}\"", effort); - let mut cmd = Command::new(&codex); - cmd.args([ - "exec", - "--model", &model, - "-c", &effort_config, - "--sandbox", sandbox, - "--skip-git-repo-check", - "--json", - "-", - ]); - cmd.stdin(std::process::Stdio::piped()); - cmd.stdout(std::process::Stdio::piped()); - cmd.stderr(std::process::Stdio::piped()); - - let result = cmd.spawn(); - match result { - Ok(mut child) => { - use std::io::Write; - if let Some(ref mut stdin) = child.stdin { - let _ = stdin.write_all(prompt.as_bytes()); - } - drop(child.stdin.take()); - - // Wait with timeout - let _timeout = std::time::Duration::from_secs(timeout_secs); - match child.wait_with_output() { - Ok(output) => { - let stdout = String::from_utf8_lossy(&output.stdout).to_string(); - let stderr = String::from_utf8_lossy(&output.stderr).to_string(); - let code = output.status.code().unwrap_or(1); - let thread_id = parse_thread_id(&stdout); - (stdout, thread_id, code, stderr) - } - Err(e) => (String::new(), None, 2, format!("codex exec error: {e}")), - } - } - Err(e) => (String::new(), None, 2, format!("failed to spawn codex: {e}")), - } -} - -/// Extract thread_id from codex --json JSONL output. -fn parse_thread_id(output: &str) -> Option { - for line in output.lines() { - if let Ok(data) = serde_json::from_str::(line) { - if data.get("type").and_then(|v| v.as_str()) == Some("thread.started") { - if let Some(tid) = data.get("thread_id").and_then(|v| v.as_str()) { - return Some(tid.to_string()); - } - } - } - } - None -} - -/// Extract verdict from codex output: SHIP etc. -fn parse_verdict(output: &str) -> Option { - let re = Regex::new(r"(SHIP|NEEDS_WORK|MAJOR_RETHINK)").unwrap(); - re.captures(output).map(|c| c[1].to_string()) -} - -/// Load receipt session_id for re-review continuity. -fn load_receipt(path: Option<&str>) -> (Option, bool) { - let path = match path { - Some(p) if !p.is_empty() => p, - _ => return (None, false), - }; - let content = match std::fs::read_to_string(path) { - Ok(c) => c, - Err(_) => return (None, false), - }; - match serde_json::from_str::(&content) { - Ok(data) => { - let sid = data.get("session_id").and_then(|v| v.as_str()).map(std::string::ToString::to_string); - let is_rereview = sid.is_some(); - (sid, is_rereview) - } - Err(_) => (None, false), - } -} - -/// Save receipt JSON for ralph-compatible review tracking. -#[allow(clippy::too_many_arguments)] -fn save_receipt( - path: &str, - review_type: &str, - review_id: &str, - verdict: &str, - session_id: Option<&str>, - output: &str, - base_branch: Option<&str>, - focus: Option<&str>, -) { - let mut data = json!({ - "type": review_type, - "id": review_id, - "mode": "codex", - "verdict": verdict, - "session_id": session_id, - "timestamp": chrono::Utc::now().to_rfc3339(), - "review": output, - }); - if let Some(base) = base_branch { - data["base"] = json!(base); - } - if let Some(f) = focus { - data["focus"] = json!(f); - } - if let Ok(iter_str) = env::var("RALPH_ITERATION") { - if let Ok(iter) = iter_str.parse::() { - data["iteration"] = json!(iter); - } - } - let content = serde_json::to_string_pretty(&data).unwrap_or_default(); - let _ = std::fs::write(path, format!("{content}\n")); -} - -/// Delete a stale receipt on failure. -fn delete_stale_receipt(path: Option<&str>) { - if let Some(p) = path { - let _ = std::fs::remove_file(p); - } -} - -// ── Dispatch ──────────────────────────────────────────────────────── - -pub fn dispatch(cmd: &CodexCmd, json: bool) { - match cmd { - CodexCmd::Check => cmd_check(json), - CodexCmd::ImplReview { - task, base, focus, receipt, sandbox, effort, - } => cmd_impl_review(json, task.as_deref(), base, focus.as_deref(), receipt.as_deref(), sandbox, effort), - CodexCmd::PlanReview { - epic, files, base, receipt, sandbox, effort, - } => cmd_plan_review(json, epic, files, base, receipt.as_deref(), sandbox, effort), - CodexCmd::Adversarial { - base, focus, sandbox, effort, - } => cmd_adversarial(json, base, focus.as_deref(), sandbox, effort), - CodexCmd::CrossModel { - base, focus, sandbox, effort, - } => cmd_cross_model(json, base, focus.as_deref(), sandbox, effort), - CodexCmd::CompletionReview { - epic, base, receipt, sandbox, effort, - } => cmd_completion_review(json, epic, base, receipt.as_deref(), sandbox, effort), - } -} +use super::{ + delete_stale_receipt, load_receipt, parse_verdict, resolve_sandbox, run_codex_exec, + save_receipt, +}; // ── Command implementations ───────────────────────────────────────── -fn cmd_check(json_mode: bool) { - let available = find_codex().is_some(); - let version = if available { get_codex_version() } else { None }; +pub fn cmd_check(json_mode: bool) { + let available = super::find_codex().is_some(); + let version = if available { super::get_codex_version() } else { None }; if json_mode { json_output(json!({ @@ -377,7 +38,7 @@ fn cmd_check(json_mode: bool) { } } -fn cmd_impl_review( +pub fn cmd_impl_review( json_mode: bool, task: Option<&str>, base: &str, @@ -441,7 +102,7 @@ fn cmd_impl_review( } } -fn cmd_plan_review( +pub fn cmd_plan_review( json_mode: bool, epic: &str, files: &str, @@ -499,7 +160,7 @@ fn cmd_plan_review( } } -fn cmd_adversarial( +pub fn cmd_adversarial( json_mode: bool, base: &str, focus: Option<&str>, @@ -552,7 +213,7 @@ fn cmd_adversarial( } } -fn cmd_completion_review( +pub fn cmd_completion_review( json_mode: bool, epic: &str, base: &str, @@ -608,7 +269,7 @@ fn cmd_completion_review( } } -fn cmd_cross_model( +pub fn cmd_cross_model( json_mode: bool, base: &str, focus: Option<&str>, @@ -755,8 +416,10 @@ fn cmd_cross_model( } } +// ── Parsing helpers ───────────────────────────────────────────────── + /// Parse findings from codex/model output. Returns (findings, confidence). -fn parse_findings_from_output(output: &str) -> (Vec, f64) { +pub(super) fn parse_findings_from_output(output: &str) -> (Vec, f64) { let mut findings = Vec::new(); let mut confidence = 0.8; // default @@ -905,7 +568,7 @@ fn make_abstain_review(model: &str) -> ModelReview { /// Parse structured JSON from adversarial review output. /// Handles direct JSON, JSONL streaming, markdown fences, embedded JSON. -fn parse_adversarial_output(output: &str) -> Option { +pub(super) fn parse_adversarial_output(output: &str) -> Option { // Strategy 1: Direct JSON parse if let Ok(data) = serde_json::from_str::(output.trim()) { if data.is_object() && data.get("verdict").is_some() { diff --git a/flowctl/crates/flowctl-cli/src/commands/codex/sync.rs b/flowctl/crates/flowctl-cli/src/commands/codex/sync.rs new file mode 100644 index 00000000..9b25a619 --- /dev/null +++ b/flowctl/crates/flowctl-cli/src/commands/codex/sync.rs @@ -0,0 +1,65 @@ +//! Codex sync command — generates Codex artifacts from agent `.md` files. + +use std::path::Path; + +use serde_json::json; + +use flowctl_core::codex_sync::sync_all; + +use crate::output::{error_exit, json_output}; + +pub fn cmd_sync( + json_mode: bool, + agents_dir: &str, + output_dir: &str, + hooks: &str, + dry_run: bool, + verbose: bool, +) { + let agents_path = Path::new(agents_dir); + let output_path = Path::new(output_dir); + let hooks_path = Path::new(hooks); + + let hooks_arg = if hooks_path.exists() { + Some(hooks_path) + } else { + None + }; + + let summary = match sync_all(agents_path, hooks_arg, output_path, dry_run) { + Ok(s) => s, + Err(e) => { + error_exit(&format!("codex sync failed: {e}")); + } + }; + + if json_mode { + json_output(json!({ + "agents_generated": summary.agents_generated, + "agents_skipped": summary.agents_skipped, + "hooks_generated": summary.hooks_generated, + "errors": summary.errors, + "dry_run": dry_run, + })); + } else { + if dry_run { + println!("[dry-run] Would generate:"); + } + println!( + "Agents: {} generated, {} skipped", + summary.agents_generated, summary.agents_skipped + ); + if summary.hooks_generated { + println!("Hooks: patched"); + } + if !summary.errors.is_empty() { + eprintln!("Errors:"); + for e in &summary.errors { + eprintln!(" - {e}"); + } + } + if verbose { + println!("Output directory: {}", output_path.display()); + } + } +} From a692ece1f86f9c9acb03a1740974b0586d8252a1 Mon Sep 17 00:00:00 2001 From: z23cc Date: Mon, 6 Apr 2026 21:18:45 +0800 Subject: [PATCH 5/6] test(codex): add unit tests and smoke test for codex sync Add 4 new unit tests for parse_agent (valid, no frontmatter, invalid yaml, missing model) to codex_sync.rs. Create scripts/codex_smoke_test.sh that validates TOML generation, sandbox modes, hooks patching, and dry-run. Co-Authored-By: Claude Opus 4.6 (1M context) --- flowctl/crates/flowctl-core/src/codex_sync.rs | 33 ++++++ scripts/codex_smoke_test.sh | 105 ++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100755 scripts/codex_smoke_test.sh diff --git a/flowctl/crates/flowctl-core/src/codex_sync.rs b/flowctl/crates/flowctl-core/src/codex_sync.rs index 19f8d505..ab82e02a 100644 --- a/flowctl/crates/flowctl-core/src/codex_sync.rs +++ b/flowctl/crates/flowctl-core/src/codex_sync.rs @@ -467,6 +467,39 @@ mod tests { assert!(result.is_err()); } + #[test] + fn test_parse_agent_valid() { + let content = "---\nname: my-agent\ndescription: A test agent\nmodel: opus\ndisallowedTools: Edit, Write\ncolor: blue\n---\nBody instructions here.\n"; + let doc = frontmatter::parse::(content).unwrap(); + assert_eq!(doc.frontmatter.name, "my-agent"); + assert_eq!(doc.frontmatter.description, "A test agent"); + assert_eq!(doc.frontmatter.model, "opus"); + assert_eq!(doc.frontmatter.disallowed_tools.as_deref(), Some("Edit, Write")); + assert_eq!(doc.frontmatter.color.as_deref(), Some("blue")); + assert!(doc.body.contains("Body instructions here.")); + } + + #[test] + fn test_parse_agent_no_frontmatter() { + let content = "No frontmatter at all, just plain text."; + let result = frontmatter::parse::(content); + assert!(result.is_err()); + } + + #[test] + fn test_parse_agent_invalid_yaml() { + let content = "---\n: : : not valid yaml [[[{\n---\nBody.\n"; + let result = frontmatter::parse::(content); + assert!(result.is_err()); + } + + #[test] + fn test_parse_agent_missing_model() { + let content = "---\nname: incomplete\ndescription: Missing model field\n---\nBody.\n"; + let result = frontmatter::parse::(content); + assert!(result.is_err()); + } + #[test] fn test_sync_all_dry_run() { let dir = tempfile::tempdir().unwrap(); diff --git a/scripts/codex_smoke_test.sh b/scripts/codex_smoke_test.sh new file mode 100755 index 00000000..f11469c9 --- /dev/null +++ b/scripts/codex_smoke_test.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Locate flowctl binary +if [[ -n "${1:-}" ]] && [[ -x "$PLUGIN_ROOT/$1" ]]; then + FLOWCTL="$PLUGIN_ROOT/$1" +elif [[ -x "$PLUGIN_ROOT/bin/flowctl" ]]; then + FLOWCTL="$PLUGIN_ROOT/bin/flowctl" +elif [[ -x "$PLUGIN_ROOT/flowctl/target/release/flowctl" ]]; then + FLOWCTL="$PLUGIN_ROOT/flowctl/target/release/flowctl" +elif command -v flowctl >/dev/null 2>&1; then + FLOWCTL="$(command -v flowctl)" +else + echo "ERROR: flowctl binary not found. Build with: cd flowctl && cargo build --release && cp target/release/flowctl ../bin/" >&2 + exit 1 +fi + +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[1;33m' +NC='\033[0m' + +PASS=0 +FAIL=0 + +check() { + local label="$1" + shift + if "$@" >/dev/null 2>&1; then + echo -e " ${GREEN}PASS${NC} $label" + PASS=$((PASS + 1)) + else + echo -e " ${RED}FAIL${NC} $label" + FAIL=$((FAIL + 1)) + fi +} + +TEST_DIR=$(mktemp -d) +trap "rm -rf $TEST_DIR" EXIT + +echo -e "${YELLOW}=== codex sync smoke tests ===${NC}" + +# --- Setup test agents --- +mkdir -p "$TEST_DIR/agents" +cat > "$TEST_DIR/agents/test-scout.md" <<'EOF' +--- +name: test-scout +description: A test scout +model: opus +disallowedTools: Edit, Write +--- +# Test Scout Instructions +Do the thing. +EOF + +cat > "$TEST_DIR/agents/test-worker.md" <<'EOF' +--- +name: worker +description: A test worker +model: inherit +--- +# Worker Instructions +Implement the task. +EOF + +# --- Setup test hooks --- +mkdir -p "$TEST_DIR/hooks" +echo '{"hooks":{"PreToolUse":[{"matcher":"Bash","hooks":[{"type":"command","command":"echo test"}]}]}}' > "$TEST_DIR/hooks/hooks.json" + +# --- Test 1: sync writes TOML files --- +echo -e "${YELLOW}--- codex sync generates TOML files ---${NC}" +OUTPUT_DIR="$TEST_DIR/codex" +$FLOWCTL codex sync --agents-dir "$TEST_DIR/agents" --output-dir "$OUTPUT_DIR" --hooks "$TEST_DIR/hooks/hooks.json" + +TOML_COUNT=$(ls "$OUTPUT_DIR/agents/"*.toml 2>/dev/null | wc -l | tr -d ' ') +check "expected 2 TOML files" [ "$TOML_COUNT" -eq 2 ] + +# --- Test 2: scout gets read-only sandbox --- +echo -e "${YELLOW}--- scout sandbox is read-only ---${NC}" +check "scout sandbox read-only" grep -q 'sandbox_mode = "read-only"' "$OUTPUT_DIR/agents/test-scout.toml" + +# --- Test 3: worker gets workspace-write sandbox --- +echo -e "${YELLOW}--- worker sandbox is workspace-write ---${NC}" +check "worker sandbox workspace-write" grep -q 'sandbox_mode = "workspace-write"' "$OUTPUT_DIR/agents/worker.toml" + +# --- Test 4: hooks patched Bash → Bash|Execute --- +echo -e "${YELLOW}--- hooks patched ---${NC}" +check "hooks contain Bash|Execute" grep -q 'Bash|Execute' "$OUTPUT_DIR/hooks.json" + +# --- Test 5: dry run does not create output --- +echo -e "${YELLOW}--- dry run ---${NC}" +DRY_DIR="$TEST_DIR/codex-dry" +$FLOWCTL codex sync --agents-dir "$TEST_DIR/agents" --output-dir "$DRY_DIR" --dry-run --json +check "dry-run does not create output dir" [ ! -d "$DRY_DIR" ] + +# --- Summary --- +echo "" +echo -e "${YELLOW}=== Results: ${GREEN}$PASS passed${NC}, ${RED}$FAIL failed${NC} ===" +if [[ $FAIL -gt 0 ]]; then + exit 1 +fi +echo "All codex smoke tests passed!" From 72a42e302d367adb0f026cc6b69d9b2c74f87a56 Mon Sep 17 00:00:00 2001 From: z23cc Date: Mon, 6 Apr 2026 21:28:32 +0800 Subject: [PATCH 6/6] chore: update Cargo.lock --- flowctl/Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flowctl/Cargo.lock b/flowctl/Cargo.lock index 41d090ad..5421fda6 100644 --- a/flowctl/Cargo.lock +++ b/flowctl/Cargo.lock @@ -737,7 +737,7 @@ dependencies = [ [[package]] name = "flowctl-cli" -version = "0.1.30" +version = "0.1.31" dependencies = [ "anyhow", "chrono",