From d8a23c51a5a9cb71f2406c5f81e4cc9b9f807612 Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Wed, 6 May 2026 00:01:45 +0000 Subject: [PATCH 1/2] feat(cli): agent --dry-run for safe pre-flight validation (issue #62 item 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets agents and CI wrappers sanity-check inputs (prompt, url, config, env vars, deps, output dir writability) without launching a browser or burning LLM tokens. Implies --json since --dry-run is fundamentally about machine-parseable validation. Output shape: same top-level fields as `agent --json` (schema_version, status, run_id=null, prompt, url, mode="dry-run", error, error_kind) plus: - `would_run` sub-object: agent_provider, sdk, model, output_dir, headless — what an actual `agent` invocation would do - `checks` array: one entry per validation step with name, status (ok | warn | error), and a human message Validations: 1. prompt non-empty 2. url is http(s):// if provided 3. agent_provider in {auto, chrome-mcp} 4. SDK env var present (warn — SDK may resolve auth elsewhere) 5. node binary in PATH (required by both MCP servers) + version 6. chrome-mcp without --headless: warn that auto-connect needs Chrome 146+ with remote-debugging enabled (not auto-checkable) 7. output_dir writable (probe-write-and-delete) Aggregate status is `error` if any check is `error`, otherwise `ok`. Error kinds are `misuse` for prompt/url issues, `config_invalid` for env/deps/output_dir issues — matching the schema-v1 error_kind enum. Tests: 6 new in TestAgentDryRun covering: ok path, missing prompt (misuse), bad url (misuse), unwritable output_dir (config_invalid), that run_agent_capture is NOT called under --dry-run, and --help mentions the flag with "Implies --json". Closes the last medium-priority blocker on issue #62. Item #8 (`run --json` wrapped) intentionally deferred — orthogonal surface. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reverse_api/cli.py | 166 +++++++++++++++++++++++++++++++++++- tests/test_cli_followups.py | 120 ++++++++++++++++++++++++++ 2 files changed, 285 insertions(+), 1 deletion(-) diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index 5b59910..65bc3d9 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -1,5 +1,6 @@ import asyncio import json +import os import random import sys from contextlib import contextmanager @@ -175,6 +176,153 @@ def _quiet_consoles_for_json(): c._file = original_inner +def _build_dry_run_payload( + *, + prompt: str | None, + url: str | None, + model: str | None, + output_dir: str | None, + headless: bool, +) -> dict: + """Validate config / env / deps without launching a browser. + + Returns a payload with the same top-level shape as `_build_agent_payload` + plus a `checks` array (each item: name, status: ok|warn|error, message) + and a `would_run` sub-object showing what an actual `agent` invocation + would do with these inputs. Status is `error` if any check is `error`, + otherwise `ok`. Misuse errors (missing prompt) get `error_kind=misuse`; + runtime issues (missing API key, missing node) get `config_invalid`. + """ + import shutil + import subprocess + + checks: list[dict] = [] + + # 1. Prompt + if not prompt or not prompt.strip(): + checks.append({"name": "prompt", "status": "error", "message": "--prompt is required"}) + else: + checks.append({"name": "prompt", "status": "ok", "message": f"{len(prompt)} chars"}) + + # 2. URL (optional, but if given it must look reasonable) + if url: + if url.startswith("http://") or url.startswith("https://"): + checks.append({"name": "url", "status": "ok", "message": url}) + else: + checks.append({ + "name": "url", + "status": "error", + "message": f"url must start with http:// or https://, got {url!r}", + }) + else: + checks.append({"name": "url", "status": "ok", "message": "(none — agent will pick a start)"}) + + # 3. Agent provider + agent_provider = config_manager.get("agent_provider", "auto") + if agent_provider in ("auto", "chrome-mcp"): + checks.append({"name": "agent_provider", "status": "ok", "message": agent_provider}) + else: + checks.append({ + "name": "agent_provider", + "status": "error", + "message": f"unknown agent_provider {agent_provider!r}; expected 'auto' or 'chrome-mcp'", + }) + + # 4. SDK + API key presence (we only check env var existence, not validity) + sdk = config_manager.get("sdk", "claude") + sdk_env_var = { + "claude": "ANTHROPIC_API_KEY", + "opencode": "OPENCODE_API_KEY", + "copilot": "GITHUB_COPILOT_TOKEN", + }.get(sdk) + if sdk_env_var is None: + checks.append({ + "name": "sdk", + "status": "error", + "message": f"unknown sdk {sdk!r}; expected 'claude', 'opencode', or 'copilot'", + }) + elif not os.environ.get(sdk_env_var): + checks.append({ + "name": f"sdk:{sdk}", + "status": "warn", + "message": f"{sdk_env_var} not set in env (the SDK may still resolve auth via a config file)", + }) + else: + checks.append({"name": f"sdk:{sdk}", "status": "ok", "message": f"{sdk_env_var} present"}) + + # 5. Node.js for MCP servers (both auto and chrome-mcp use npx) + node = shutil.which("node") + if node is None: + checks.append({ + "name": "node", + "status": "error", + "message": "node not found in PATH; required by both auto (rae-playwright-mcp) and chrome-mcp", + }) + else: + try: + ver = subprocess.run( + [node, "--version"], capture_output=True, text=True, timeout=5 + ).stdout.strip() + checks.append({"name": "node", "status": "ok", "message": ver}) + except Exception as e: + checks.append({"name": "node", "status": "warn", "message": f"could not query version: {e}"}) + + # 6. Headed chrome-mcp requires the user has a real Chrome with auto-connect + if agent_provider == "chrome-mcp" and not headless: + checks.append({ + "name": "chrome-mcp:auto-connect", + "status": "warn", + "message": "chrome-mcp without --headless requires Chrome 146+ with auto-connect enabled at chrome://inspect/#remote-debugging — this is not auto-checkable", + }) + + # 7. Output dir writability + base = Path(output_dir or config_manager.get("output_dir") or "~/.reverse-api/runs").expanduser() + try: + base.mkdir(parents=True, exist_ok=True) + probe = base / ".dry_run_write_probe" + probe.write_text("") + probe.unlink() + checks.append({"name": "output_dir", "status": "ok", "message": str(base)}) + except Exception as e: + checks.append({ + "name": "output_dir", + "status": "error", + "message": f"{base}: {e}", + }) + + # Aggregate + has_error = any(c["status"] == "error" for c in checks) + final_error = None + error_kind = None + if has_error: + first_err = next(c for c in checks if c["status"] == "error") + final_error = f"{first_err['name']}: {first_err['message']}" + # Misuse for prompt/url; config_invalid otherwise + error_kind = "misuse" if first_err["name"] in ("prompt", "url") else "config_invalid" + + return { + "schema_version": AGENT_JSON_SCHEMA_VERSION, + "status": "error" if has_error else "ok", + "run_id": None, + "prompt": prompt, + "url": url, + "mode": "dry-run", + "har_path": None, + "script_path": None, + "usage": {}, + "error": final_error, + "error_kind": error_kind, + "would_run": { + "agent_provider": agent_provider, + "sdk": sdk, + "model": model or config_manager.get("claude_code_model", "claude-sonnet-4-6"), + "output_dir": str(base), + "headless": headless, + }, + "checks": checks, + } + + def _build_agent_payload( result: dict | None, *, @@ -1218,12 +1366,28 @@ def manual(prompt, url, reverse_engineer, model, output_dir): is_flag=True, help="Launch the MCP-controlled browser in headless mode (required on machines without an X server). For chrome-mcp this drops --autoConnect since auto-connect requires a headed Chrome instance.", ) -def agent(prompt, url, model, output_dir, no_interactive, as_json, headless): +@click.option( + "--dry-run", + is_flag=True, + help="Validate prompt/url/config/env without launching the browser. Emits a manifest of what would run + check results. Implies --json. Exits 0 if all checks pass, 1 if any error.", +) +def agent(prompt, url, model, output_dir, no_interactive, as_json, headless, dry_run): """Run autonomous agent browser session. Agent mode runs an integrated capture + reverse-engineering pipeline; if you only want a HAR recording, use `manual --no-engineer` instead. """ + if dry_run: + # --dry-run is fundamentally about emitting machine-parseable validation + # results, so it implies --json (and therefore --no-interactive). + with _quiet_consoles_for_json() as real_stdout: + payload = _build_dry_run_payload( + prompt=prompt, url=url, model=model, output_dir=output_dir, headless=headless + ) + real_stdout.write(json.dumps(payload) + "\n") + real_stdout.flush() + sys.exit(0 if payload["status"] == "ok" else 1) + no_interactive = no_interactive or as_json if no_interactive and not (prompt and prompt.strip()): diff --git a/tests/test_cli_followups.py b/tests/test_cli_followups.py index e359da5..46ed07c 100644 --- a/tests/test_cli_followups.py +++ b/tests/test_cli_followups.py @@ -308,6 +308,126 @@ def test_root_flag_advertised_in_help(self): assert "--json-schema-version" in result.output +class TestAgentDryRun: + """`agent --dry-run` validates without launching the browser.""" + + def test_dry_run_ok_path(self, tmp_path): + """All checks pass → status=ok, exit 0, full payload + checks array.""" + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + # Patch config_manager so we don't depend on the user's real config + with patch("reverse_api.cli.config_manager") as cm, \ + patch.dict("os.environ", {"ANTHROPIC_API_KEY": "fake"}, clear=False): + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "claude", + "claude_code_model": "claude-sonnet-4-6", + "output_dir": str(tmp_path), + }.get(key, default) + result = runner.invoke( + agent_cmd, ["--dry-run", "-p", "fetch jobs", "-u", "https://example.com"] + ) + + assert result.exit_code == 0, result.output + payload = json.loads(result.stdout.strip()) + assert payload["status"] == "ok" + assert payload["mode"] == "dry-run" + assert payload["run_id"] is None + assert payload["error"] is None + assert payload["would_run"]["agent_provider"] == "auto" + assert payload["would_run"]["sdk"] == "claude" + assert payload["would_run"]["headless"] is False + # Checks include prompt, url, agent_provider, sdk, node, output_dir + check_names = {c["name"] for c in payload["checks"]} + assert "prompt" in check_names + assert "url" in check_names + assert "agent_provider" in check_names + assert "node" in check_names + assert "output_dir" in check_names + + def test_dry_run_missing_prompt_is_misuse(self, tmp_path): + """Missing --prompt → error_kind=misuse, exit 1, no browser launched.""" + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + with patch("reverse_api.cli.config_manager") as cm: + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "claude", + "output_dir": str(tmp_path), + }.get(key, default) + result = runner.invoke(agent_cmd, ["--dry-run"]) + + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip()) + assert payload["status"] == "error" + assert payload["error_kind"] == "misuse" + assert any(c["name"] == "prompt" and c["status"] == "error" for c in payload["checks"]) + + def test_dry_run_bad_url_is_misuse(self, tmp_path): + """A url that doesn't start with http(s):// is flagged.""" + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + with patch("reverse_api.cli.config_manager") as cm: + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "claude", + "output_dir": str(tmp_path), + }.get(key, default) + result = runner.invoke(agent_cmd, ["--dry-run", "-p", "x", "-u", "ftp://nope"]) + + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip()) + assert payload["error_kind"] == "misuse" + assert any(c["name"] == "url" and c["status"] == "error" for c in payload["checks"]) + + def test_dry_run_unwritable_output_dir_is_config_invalid(self): + """Unwritable output_dir → error_kind=config_invalid, not misuse.""" + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + with patch("reverse_api.cli.config_manager") as cm: + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "claude", + "output_dir": "/sys/forbidden", + }.get(key, default) + result = runner.invoke( + agent_cmd, ["--dry-run", "-p", "x", "--output-dir", "/sys/forbidden"] + ) + + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip()) + assert payload["error_kind"] == "config_invalid" + assert any(c["name"] == "output_dir" and c["status"] == "error" for c in payload["checks"]) + + def test_dry_run_does_not_launch_browser(self, tmp_path): + """--dry-run must NOT call run_agent_capture (no browser, no LLM, no cost).""" + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + with patch("reverse_api.cli.config_manager") as cm, \ + patch("reverse_api.cli.run_agent_capture") as mock_run: + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "claude", + "output_dir": str(tmp_path), + }.get(key, default) + runner.invoke(agent_cmd, ["--dry-run", "-p", "x", "-u", "https://example.com"]) + mock_run.assert_not_called() + + def test_dry_run_help_mentions_implies_json(self): + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + result = runner.invoke(agent_cmd, ["--help"]) + assert "--dry-run" in result.output + # Click reflows whitespace, so "Implies\n--json" or "Implies --json" + assert "Implies" in result.output and "--json" in result.output + + class TestRootHelpMentionsScripted: """Item #6 partial: root --help should advertise scripted features.""" From 060e5271f7674cfddb10d44a2ca697fdca2491a9 Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Wed, 6 May 2026 00:20:45 +0000 Subject: [PATCH 2/2] fix(cli): address review feedback on agent --dry-run (PR #67) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three P2 issues flagged by cubic-dev-ai: 1. Check npx availability separately from node MCP servers shell out to `npx ` (not just node), so a minimal Docker image with node-but-no-npx would pass dry-run and then fail the real run. Added a dedicated `npx` check. 2. Use a unique probe filename for output_dir writability The fixed `.dry_run_write_probe` could legitimately exist in a user's output dir and would be deleted by the probe. Now uses `.rae_dry_run_probe_{pid}_{8hex}` and refuses to touch any path that already exists, guaranteeing we never clobber user data. 3. Resolve `would_run.model` per-SDK Previously always read `claude_code_model` regardless of which SDK was configured, so an opencode/copilot session would have a misleading manifest. Now branches on `sdk` to pick `opencode_model` / `copilot_model` / `claude_code_model` with the matching default — mirroring the live capture path. Adds 4 regression tests in TestAgentDryRun: - test_dry_run_checks_npx_separately_from_node (mocks shutil.which) - test_dry_run_probe_does_not_clobber_existing_files (creates a canary file at the OLD probe path, asserts it survives) - test_dry_run_resolves_correct_model_per_sdk (opencode case) - test_dry_run_copilot_model_resolution (copilot case) 10/10 in TestAgentDryRun, 706 passing on full suite (5 pre-existing). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reverse_api/cli.py | 42 +++++++++++++-- tests/test_cli_followups.py | 104 ++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 4 deletions(-) diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index 65bc3d9..1bb86e3 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -250,7 +250,9 @@ def _build_dry_run_payload( else: checks.append({"name": f"sdk:{sdk}", "status": "ok", "message": f"{sdk_env_var} present"}) - # 5. Node.js for MCP servers (both auto and chrome-mcp use npx) + # 5. Node.js + npx for MCP servers (both auto and chrome-mcp shell out to + # `npx `; minimal Docker images sometimes ship `node` without + # `npx`, so checking only `node` would lull dry-run into a false ok). node = shutil.which("node") if node is None: checks.append({ @@ -267,6 +269,16 @@ def _build_dry_run_payload( except Exception as e: checks.append({"name": "node", "status": "warn", "message": f"could not query version: {e}"}) + npx = shutil.which("npx") + if npx is None: + checks.append({ + "name": "npx", + "status": "error", + "message": "npx not found in PATH; both MCP servers are launched via `npx `", + }) + else: + checks.append({"name": "npx", "status": "ok", "message": npx}) + # 6. Headed chrome-mcp requires the user has a real Chrome with auto-connect if agent_provider == "chrome-mcp" and not headless: checks.append({ @@ -275,11 +287,18 @@ def _build_dry_run_payload( "message": "chrome-mcp without --headless requires Chrome 146+ with auto-connect enabled at chrome://inspect/#remote-debugging — this is not auto-checkable", }) - # 7. Output dir writability + # 7. Output dir writability — probe with a unique filename so we never + # clobber a real user file (a fixed name like `.dry_run_write_probe` + # could legitimately exist in someone's output dir). + import secrets + base = Path(output_dir or config_manager.get("output_dir") or "~/.reverse-api/runs").expanduser() try: base.mkdir(parents=True, exist_ok=True) - probe = base / ".dry_run_write_probe" + probe = base / f".rae_dry_run_probe_{os.getpid()}_{secrets.token_hex(4)}" + # If somehow this exact filename already exists, refuse to touch it. + if probe.exists(): + raise FileExistsError(f"unique probe path collision: {probe}") probe.write_text("") probe.unlink() checks.append({"name": "output_dir", "status": "ok", "message": str(base)}) @@ -300,6 +319,21 @@ def _build_dry_run_payload( # Misuse for prompt/url; config_invalid otherwise error_kind = "misuse" if first_err["name"] in ("prompt", "url") else "config_invalid" + # `would_run.model` resolution mirrors the live capture path: each SDK + # has its own model config key, so picking `claude_code_model` for an + # opencode/copilot session would misreport what would actually run. + sdk_model_key = { + "claude": "claude_code_model", + "opencode": "opencode_model", + "copilot": "copilot_model", + }.get(sdk, "claude_code_model") + sdk_default_model = { + "claude": "claude-sonnet-4-6", + "opencode": "claude-opus-4-6", + "copilot": "gpt-5", + }.get(sdk, "claude-sonnet-4-6") + resolved_model = model or config_manager.get(sdk_model_key, sdk_default_model) + return { "schema_version": AGENT_JSON_SCHEMA_VERSION, "status": "error" if has_error else "ok", @@ -315,7 +349,7 @@ def _build_dry_run_payload( "would_run": { "agent_provider": agent_provider, "sdk": sdk, - "model": model or config_manager.get("claude_code_model", "claude-sonnet-4-6"), + "model": resolved_model, "output_dir": str(base), "headless": headless, }, diff --git a/tests/test_cli_followups.py b/tests/test_cli_followups.py index 46ed07c..a3933e4 100644 --- a/tests/test_cli_followups.py +++ b/tests/test_cli_followups.py @@ -427,6 +427,110 @@ def test_dry_run_help_mentions_implies_json(self): # Click reflows whitespace, so "Implies\n--json" or "Implies --json" assert "Implies" in result.output and "--json" in result.output + def test_dry_run_checks_npx_separately_from_node(self, tmp_path): + """cubic-dev-ai PR #67 review (P2): MCP servers shell out to `npx`, + so dry-run must check npx availability — not just node — otherwise + a minimal Docker image with node-but-no-npx passes dry-run and then + fails the real run.""" + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + + # Pretend npx is missing while node is present + def fake_which(name): + if name == "node": + return "/usr/bin/node" + if name == "npx": + return None + return None + + with patch("reverse_api.cli.config_manager") as cm, \ + patch("shutil.which", side_effect=fake_which): + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "claude", + "output_dir": str(tmp_path), + }.get(key, default) + result = runner.invoke(agent_cmd, ["--dry-run", "-p", "x"]) + + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip()) + assert payload["error_kind"] == "config_invalid" + npx_check = next(c for c in payload["checks"] if c["name"] == "npx") + assert npx_check["status"] == "error" + assert "npx not found" in npx_check["message"] + + def test_dry_run_probe_does_not_clobber_existing_files(self, tmp_path): + """cubic-dev-ai PR #67 review (P2): a fixed probe filename like + `.dry_run_write_probe` could legitimately exist in a user's output + dir and would be deleted by the probe. We use a unique filename + with PID + random hex so collisions are astronomically unlikely, + and refuse to touch any path that already exists.""" + from reverse_api.cli import agent as agent_cmd + + # Pre-populate the output dir with a file that would collide with + # the OLD fixed probe name. The dry-run must not delete it. + canary = tmp_path / ".dry_run_write_probe" + canary.write_text("user data — do not delete") + + runner = CliRunner() + with patch("reverse_api.cli.config_manager") as cm: + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "claude", + "output_dir": str(tmp_path), + }.get(key, default) + result = runner.invoke(agent_cmd, ["--dry-run", "-p", "x"]) + + assert result.exit_code == 0, result.output + # The user's pre-existing file is untouched + assert canary.exists() + assert canary.read_text() == "user data — do not delete" + + def test_dry_run_resolves_correct_model_per_sdk(self, tmp_path): + """cubic-dev-ai PR #67 review (P2): when sdk=opencode the live agent + uses `opencode_model`, not `claude_code_model`. would_run.model must + reflect what would actually run, otherwise the manifest lies.""" + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + + # Configure opencode SDK with a custom opencode_model + with patch("reverse_api.cli.config_manager") as cm: + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "opencode", + "opencode_model": "claude-opus-4-6-custom", + "claude_code_model": "claude-sonnet-4-6-irrelevant", + "output_dir": str(tmp_path), + }.get(key, default) + result = runner.invoke(agent_cmd, ["--dry-run", "-p", "x"]) + + assert result.exit_code == 0, result.output + payload = json.loads(result.stdout.strip()) + assert payload["would_run"]["sdk"] == "opencode" + assert payload["would_run"]["model"] == "claude-opus-4-6-custom" + # And NOT the claude_code_model that the old code would have grabbed + assert "irrelevant" not in payload["would_run"]["model"] + + def test_dry_run_copilot_model_resolution(self, tmp_path): + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + with patch("reverse_api.cli.config_manager") as cm: + cm.get.side_effect = lambda key, default=None: { + "agent_provider": "auto", + "sdk": "copilot", + "copilot_model": "gpt-5-custom", + "output_dir": str(tmp_path), + }.get(key, default) + result = runner.invoke(agent_cmd, ["--dry-run", "-p", "x"]) + + assert result.exit_code == 0, result.output + payload = json.loads(result.stdout.strip()) + assert payload["would_run"]["sdk"] == "copilot" + assert payload["would_run"]["model"] == "gpt-5-custom" + class TestRootHelpMentionsScripted: """Item #6 partial: root --help should advertise scripted features."""