From 51680c9fc0857ebb511c0f4d2bf8fb2c2bad9ea0 Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Tue, 5 May 2026 10:51:23 +0000 Subject: [PATCH 1/8] feat: agent-friendly CLI surface Add scriptable / non-interactive flags so other agents can drive the CLI for reverse engineering without TTY prompts. - `agent` gains `--no-interactive` and `--json`; --json suppresses Rich output to stderr and emits a single stable JSON object on stdout (schema_version, status, run_id, prompt, url, mode, har_path, script_path, usage, error). Missing --prompt under --json/--no-interactive errors out with exit code 2 instead of opening questionary - `run_auto_capture` now returns a normalized dict so the JSON payload has a stable shape regardless of the underlying engineer SDK - `list --json` emits `[]` on empty history (was a human-readable "No runs found." line); `show --json` emits a structured error and exits 1 when the run can't be found - `run` gains `--no-interactive` (fail instead of opening a script picker) and `--auto-install` (install missing deps without confirm) - Documented scripted usage and exit-code contract in the README - Added tests/test_cli_agent_json.py covering the JSON payload shape and CLI-level edge cases (10 new tests) Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 36 ++++++++ src/reverse_api/cli.py | 165 +++++++++++++++++++++++++++++++++-- tests/test_cli_agent_json.py | 138 +++++++++++++++++++++++++++++ 3 files changed, 330 insertions(+), 9 deletions(-) create mode 100644 tests/test_cli_agent_json.py diff --git a/README.md b/README.md index 99db5ea..c65e249 100644 --- a/README.md +++ b/README.md @@ -356,6 +356,42 @@ Use these slash commands while in the CLI: - `/help` - Show all commands - `/exit` - Quit +### Scripted / Agent Usage + +The CLI subcommands are scriptable. Pass `--no-interactive` (and/or `--json`) so they fail fast instead of opening prompts, and pipe the structured output into `jq` or another agent. + +```bash +# Run an autonomous agent capture and get a single JSON result on stdout +reverse-api-engineer agent \ + --prompt "capture the public jobs api" \ + --url https://example.com/jobs \ + --json | jq + +# Output (logs go to stderr; one JSON object on stdout): +# { +# "schema_version": 1, +# "status": "ok", +# "run_id": "deadbeef0001", +# "prompt": "capture the public jobs api", +# "url": "https://example.com/jobs", +# "mode": "auto", +# "har_path": "/.../recording.har", +# "script_path": "/.../api_client.py", +# "usage": { ... }, +# "error": null +# } + +# List runs / inspect a run as JSON (empty history -> []) +reverse-api-engineer list --json +reverse-api-engineer show --json + +# Run a generated script non-interactively (errors on multiple-script picker +# or missing-dep prompt instead of blocking) +reverse-api-engineer run --no-interactive --auto-install -- --org acme +``` + +Exit codes: `0` success, `1` runtime error, `2` misuse (missing required args). + ## 🔌 Claude Code Plugin Install the plugin in [Claude Code](https://claude.com/claude-code): diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index 8c6f32d..831515a 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -2,6 +2,8 @@ import json import random import re +import sys +from contextlib import contextmanager from pathlib import Path import click @@ -59,6 +61,66 @@ config_manager = ConfigManager(get_config_path()) session_manager = SessionManager(get_history_path()) +# Stable schema version for --json outputs consumed by other agents/scripts. +AGENT_JSON_SCHEMA_VERSION = 1 + + +@contextmanager +def _quiet_consoles_for_json(): + """Reserve stdout for the final JSON payload; route Rich output to stderr. + + Yields the original stdout file object so the caller can write JSON to it + after the wrapped block exits cleanly. Rich Console.file is a property that + lazily resolves to sys.stdout, so we save/restore the underlying _file slot + rather than the resolved file object. + """ + real_stdout = sys.stdout + redirected_consoles: list[tuple[Console, object]] = [] + sys.stdout = sys.stderr + for mod_name, mod in list(sys.modules.items()): + if not mod_name.startswith("reverse_api") or mod is None: + continue + candidate = getattr(mod, "console", None) + if isinstance(candidate, Console): + redirected_consoles.append((candidate, candidate._file)) + candidate.file = sys.stderr + try: + yield real_stdout + finally: + sys.stdout = real_stdout + for c, original_inner in redirected_consoles: + c._file = original_inner + + +def _build_agent_payload( + result: dict | None, + *, + prompt: str | None, + url: str | None, + error: str | None = None, +) -> dict: + """Normalize an agent capture result into a stable JSON shape.""" + result = result or {} + run_id = result.get("run_id") + inner_error = result.get("error") + final_error = error or inner_error or (None if run_id else "agent capture produced no run") + har_path = None + if run_id: + candidate = get_har_dir(run_id, None) / "recording.har" + har_path = str(candidate) if candidate.exists() else None + return { + "schema_version": AGENT_JSON_SCHEMA_VERSION, + "status": "error" if final_error else "ok", + "run_id": run_id, + "prompt": prompt, + "url": url, + "mode": result.get("mode"), + "har_path": har_path, + "script_path": result.get("script_path"), + "usage": result.get("usage") or {}, + "error": final_error, + } + # Mode definitions MODES = ["agent", "manual", "engineer", "collector"] MODE_DESCRIPTIONS = { @@ -1261,9 +1323,55 @@ def manual(prompt, url, reverse_engineer, model, output_dir): default=None, ) @click.option("--output-dir", "-o", default=None, help="Custom output directory.") -def agent(prompt, url, reverse_engineer, model, output_dir): +@click.option( + "--no-interactive", + is_flag=True, + help="Fail fast instead of prompting for missing inputs (intended for scripted/agent usage).", +) +@click.option( + "--json", + "as_json", + is_flag=True, + help="Emit a single JSON result on stdout (logs go to stderr). Implies --no-interactive.", +) +def agent(prompt, url, reverse_engineer, model, output_dir, no_interactive, as_json): """Run autonomous agent browser session.""" - run_agent_capture(prompt=prompt, url=url, reverse_engineer=reverse_engineer, model=model, output_dir=output_dir) + no_interactive = no_interactive or as_json + + if no_interactive and not (prompt and prompt.strip()): + if as_json: + click.echo(json.dumps({ + "schema_version": AGENT_JSON_SCHEMA_VERSION, + "status": "error", + "error": "--prompt is required in non-interactive/--json mode", + })) + else: + click.echo("error: --prompt is required when --no-interactive is set", err=True) + sys.exit(2) + + if not as_json: + run_agent_capture(prompt=prompt, url=url, reverse_engineer=reverse_engineer, model=model, output_dir=output_dir) + return + + payload: dict + with _quiet_consoles_for_json() as real_stdout: + try: + result = run_agent_capture( + prompt=prompt, + url=url, + reverse_engineer=reverse_engineer, + model=model, + output_dir=output_dir, + ) + payload = _build_agent_payload(result, prompt=prompt, url=url) + except KeyboardInterrupt: + payload = _build_agent_payload({}, prompt=prompt, url=url, error="interrupted") + except Exception as e: + payload = _build_agent_payload({}, prompt=prompt, url=url, error=str(e)) + + real_stdout.write(json.dumps(payload) + "\n") + real_stdout.flush() + sys.exit(0 if payload["status"] == "ok" else 1) def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, output_dir=None): @@ -1637,7 +1745,12 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p paths={"script_path": result.get("script_path")}, ) - return result + return { + "run_id": run_id, + "mode": mode_label, + "script_path": (result or {}).get("script_path"), + "usage": (result or {}).get("usage", {}), + } except Exception as e: console.print(f" [red]auto mode error: {e}[/red]") @@ -1645,7 +1758,13 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p import traceback traceback.print_exc() - return None + return { + "run_id": run_id, + "mode": mode_label, + "script_path": None, + "usage": {}, + "error": str(e), + } def run_playwright_codegen(run_id: str, prompt: str, output_dir: str | None = None, start_url: str | None = None): @@ -1897,7 +2016,9 @@ def list_runs(as_json, full, limit, mode, model, search): runs = [r for r in runs if search.lower() in (r.get("prompt") or "").lower()] if not runs: - if not session_manager.history: + if as_json: + click.echo(json.dumps([])) + elif not session_manager.history: console.print("No runs found.", style="dim") else: console.print("No matching runs found.") @@ -1968,10 +2089,16 @@ def show_run(run_id, as_json): elif session_manager.history: run = session_manager.history[0] else: + if as_json: + click.echo(json.dumps({"error": "no runs found"})) + sys.exit(1) console.print("No runs found.", style="dim") return if run is None: + if as_json: + click.echo(json.dumps({"error": "run not found", "run_id": run_id})) + sys.exit(1) console.print(f"Run not found: {run_id}") return @@ -2066,8 +2193,18 @@ def _path_val(path_str: str, exists: bool) -> Text: @click.argument("script_args", nargs=-1, type=click.UNPROCESSED) @click.option("--file", "-f", "file_name", default=None, help="Script filename to run (e.g. api_client.py).") @click.option("--ls", "list_scripts", is_flag=True, help="List available scripts without executing.") +@click.option( + "--no-interactive", + is_flag=True, + help="Fail fast on script-picker / missing-dependency prompts (intended for scripted/agent usage).", +) +@click.option( + "--auto-install", + is_flag=True, + help="Auto-install missing dependencies on retry without prompting (implies --no-interactive for the picker).", +) @click.pass_context -def run_script(ctx, identifier, script_args, file_name, list_scripts): +def run_script(ctx, identifier, script_args, file_name, list_scripts, no_interactive, auto_install): """Run a generated script from a previous run. IDENTIFIER is a run ID or search term to match against prompts. @@ -2132,6 +2269,11 @@ def run_script(ctx, identifier, script_args, file_name, list_scripts): script = matching[0] elif len(scripts) == 1: script = scripts[0] + elif no_interactive or auto_install: + available = ", ".join(s.name for s in scripts) + raise click.ClickException( + f"multiple scripts found for run {run_id}; pass --file to choose. Available: {available}" + ) else: # Interactive picker choices = [questionary.Choice(title=s.name, value=s) for s in scripts] @@ -2181,9 +2323,14 @@ def run_script(ctx, identifier, script_args, file_name, list_scripts): missing = match.group(1).split(".")[0] console.print(f"[yellow]Missing dependency: {missing}[/yellow]") - install = questionary.confirm( - f"Install '{missing}' and retry?", default=True - ).ask() + if auto_install: + install = True + elif no_interactive: + install = False + else: + install = questionary.confirm( + f"Install '{missing}' and retry?", default=True + ).ask() if install: subprocess.run([str(venv_pip), "install", "-q", missing], check=True) console.print(f"Installed [green]{missing}[/green]. Retrying...") diff --git a/tests/test_cli_agent_json.py b/tests/test_cli_agent_json.py new file mode 100644 index 0000000..f8dca12 --- /dev/null +++ b/tests/test_cli_agent_json.py @@ -0,0 +1,138 @@ +"""Tests for the agent-friendly CLI surface (--json, --no-interactive, payload shape).""" + +import json +from unittest.mock import patch + +from click.testing import CliRunner + +from reverse_api.cli import ( + AGENT_JSON_SCHEMA_VERSION, + _build_agent_payload, + agent, + list_runs, + show_run, +) + + +class TestBuildAgentPayload: + """Stable shape for the `agent --json` payload.""" + + def test_success_shape(self, tmp_path): + payload = _build_agent_payload( + { + "run_id": "abc123", + "mode": "auto", + "script_path": str(tmp_path / "scripts" / "api_client.py"), + "usage": {"input_tokens": 1, "output_tokens": 2, "total_cost": 0.001}, + }, + prompt="capture the X api", + url="https://example.com", + ) + assert payload["schema_version"] == AGENT_JSON_SCHEMA_VERSION + # No run produced a HAR yet on disk in this test, so har_path is None + assert payload["status"] == "ok" + assert payload["run_id"] == "abc123" + assert payload["mode"] == "auto" + assert payload["prompt"] == "capture the X api" + assert payload["url"] == "https://example.com" + assert payload["usage"]["total_cost"] == 0.001 + assert payload["error"] is None + # Must be JSON-serializable (no Path objects sneaking through) + json.dumps(payload) + + def test_no_run_id_is_error(self): + payload = _build_agent_payload({}, prompt="x", url=None) + assert payload["status"] == "error" + assert payload["error"] + assert payload["run_id"] is None + + def test_explicit_error_overrides(self): + payload = _build_agent_payload( + {"run_id": "abc", "mode": "auto"}, + prompt="x", + url=None, + error="boom", + ) + assert payload["status"] == "error" + assert payload["error"] == "boom" + + def test_inner_error_propagates(self): + payload = _build_agent_payload( + {"run_id": "abc", "mode": "auto", "error": "inner"}, + prompt="x", + url=None, + ) + assert payload["status"] == "error" + assert payload["error"] == "inner" + + +class TestAgentCommandJson: + """`agent` click command's --json / --no-interactive behavior.""" + + def test_json_without_prompt_exits_2(self): + runner = CliRunner() + result = runner.invoke(agent, ["--json"]) + assert result.exit_code == 2 + payload = json.loads(result.stdout.strip()) + assert payload["status"] == "error" + assert "prompt" in payload["error"].lower() + + def test_no_interactive_without_prompt_exits_2(self): + runner = CliRunner() + result = runner.invoke(agent, ["--no-interactive"]) + assert result.exit_code == 2 + # Plain text on stderr, not JSON, since --json wasn't requested + assert "prompt" in (result.stderr or result.output).lower() + + def test_json_emits_payload_on_success(self): + runner = CliRunner() + fake_result = { + "run_id": "deadbeef0001", + "mode": "auto", + "script_path": None, + "usage": {"total_cost": 0.0}, + } + with patch("reverse_api.cli.run_agent_capture", return_value=fake_result): + result = runner.invoke(agent, ["--json", "-p", "capture the X api"]) + assert result.exit_code == 0, result.output + payload = json.loads(result.stdout.strip().splitlines()[-1]) + assert payload["status"] == "ok" + assert payload["run_id"] == "deadbeef0001" + assert payload["prompt"] == "capture the X api" + + def test_json_emits_error_payload_on_exception(self): + runner = CliRunner() + with patch("reverse_api.cli.run_agent_capture", side_effect=RuntimeError("boom")): + result = runner.invoke(agent, ["--json", "-p", "x"]) + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip().splitlines()[-1]) + assert payload["status"] == "error" + assert payload["error"] == "boom" + + +class TestListJsonEmpty: + """`list --json` should emit a JSON array even when there is no history.""" + + def test_empty_history_emits_empty_array(self, tmp_path): + runner = CliRunner() + # Repoint session_manager.history to empty list for the test + with patch("reverse_api.cli.session_manager") as sm: + sm.history = [] + result = runner.invoke(list_runs, ["--json"]) + assert result.exit_code == 0 + assert json.loads(result.stdout.strip()) == [] + + +class TestShowJsonNotFound: + """`show --json` should emit a structured error and exit non-zero.""" + + def test_unknown_run_id_emits_error(self): + runner = CliRunner() + with patch("reverse_api.cli.session_manager") as sm: + sm.get_run.return_value = None + sm.history = [] + result = runner.invoke(show_run, ["doesnotexist", "--json"]) + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip()) + assert "error" in payload + assert payload["run_id"] == "doesnotexist" From ab5b579637166b5a7010a7bd1abe61d10de99550 Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Tue, 5 May 2026 10:58:22 +0000 Subject: [PATCH 2/8] feat(cli): expand agent-friendly tests, --help epilogs and README - Add 7 more tests covering: run --no-interactive (multi-script error, --file passthrough, refusal to install missing deps), --auto-install (skips questionary.confirm), agent --json KeyboardInterrupt path, and stdout-purity (Rich noise lands on stderr, not stdout) - Add --help epilogs to agent / list / show / run with examples, JSON schema, and exit-code documentation so an agent inspecting --help can self-discover the scripted contract - Expand README "Scripted / Agent Usage" section with a per-field JSON schema table and an exit-code table Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 51 +++++++---- src/reverse_api/cli.py | 72 ++++++++++++++- tests/test_cli_agent_json.py | 171 ++++++++++++++++++++++++++++++++++- 3 files changed, 270 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index c65e249..e5dc7af 100644 --- a/README.md +++ b/README.md @@ -358,7 +358,9 @@ Use these slash commands while in the CLI: ### Scripted / Agent Usage -The CLI subcommands are scriptable. Pass `--no-interactive` (and/or `--json`) so they fail fast instead of opening prompts, and pipe the structured output into `jq` or another agent. +The CLI subcommands can be driven by another agent or a wrapper script. Pass `--no-interactive` (and/or `--json`) so they fail fast instead of opening questionary prompts, and pipe the structured output into `jq`. + +When `--json` is set: stdout contains exactly one JSON document (the final result), Rich logs and progress are diverted to stderr, and the process exits with a stable code. ```bash # Run an autonomous agent capture and get a single JSON result on stdout @@ -367,30 +369,41 @@ reverse-api-engineer agent \ --url https://example.com/jobs \ --json | jq -# Output (logs go to stderr; one JSON object on stdout): -# { -# "schema_version": 1, -# "status": "ok", -# "run_id": "deadbeef0001", -# "prompt": "capture the public jobs api", -# "url": "https://example.com/jobs", -# "mode": "auto", -# "har_path": "/.../recording.har", -# "script_path": "/.../api_client.py", -# "usage": { ... }, -# "error": null -# } - # List runs / inspect a run as JSON (empty history -> []) reverse-api-engineer list --json reverse-api-engineer show --json -# Run a generated script non-interactively (errors on multiple-script picker -# or missing-dep prompt instead of blocking) -reverse-api-engineer run --no-interactive --auto-install -- --org acme +# Run a generated script non-interactively +# --no-interactive : never open the script-picker / install confirm +# --auto-install : install missing deps on retry without asking +reverse-api-engineer run --file api_client.py \ + --no-interactive --auto-install -- --org acme ``` -Exit codes: `0` success, `1` runtime error, `2` misuse (missing required args). +#### `agent --json` output schema + +| Field | Type | Notes | +|------------------|---------------------|------------------------------------------------------------------------| +| `schema_version` | `int` | Currently `1`. Bumped on breaking changes. | +| `status` | `"ok"` \| `"error"` | Top-level result. | +| `run_id` | `string` \| `null` | Stable id for follow-up `show` / `engineer` / `run` calls. | +| `prompt` | `string` | The prompt that was passed in. | +| `url` | `string` \| `null` | Optional starting URL. | +| `mode` | `string` \| `null` | Provider used (`"auto"` for Playwright MCP, `"chrome-mcp"`). | +| `har_path` | `string` \| `null` | Absolute path to the captured HAR (`recording.har`). | +| `script_path` | `string` \| `null` | Absolute path to the generated client when reverse engineering ran. | +| `usage` | `object` | Token + cost usage from the engineer SDK (`{input_tokens, output_tokens, total_cost}`).| +| `error` | `string` \| `null` | Human-readable error message when `status == "error"`. | + +#### Exit codes + +| Code | Meaning | +|------|---------------------------------------------------------------------------| +| `0` | Success. | +| `1` | Runtime error (capture or engineering failed; details in `error`). | +| `2` | Misuse — required arg missing under `--no-interactive` / `--json`. | + +For `run`, the exit code is the underlying script's return code on success, or `1` if no script was found, or non-zero if `--no-interactive` would otherwise have to prompt. ## 🔌 Claude Code Plugin diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index 831515a..0038ba3 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -1307,7 +1307,35 @@ def manual(prompt, url, reverse_engineer, model, output_dir): run_manual_capture(prompt, url, reverse_engineer, model, output_dir) -@main.command() +@main.command( + epilog="""\b +Examples: + reverse-api-engineer agent + reverse-api-engineer agent -p "capture the jobs api" -u https://example.com --no-interactive + reverse-api-engineer agent -p "capture the jobs api" --json | jq + +\b +JSON output schema (--json): + { + "schema_version": 1, + "status": "ok" | "error", + "run_id": "" | null, + "prompt": "...", + "url": "..." | null, + "mode": "auto" | "chrome-mcp" | null, + "har_path": "/abs/path/recording.har" | null, + "script_path": "/abs/path/api_client.py" | null, + "usage": { "input_tokens": ..., "output_tokens": ..., "total_cost": ... }, + "error": "" | null + } + +\b +Exit codes: + 0 success + 1 runtime error (capture or engineering failed) + 2 misuse (e.g. --prompt missing under --no-interactive / --json) +""" +) @click.option("--prompt", "-p", default=None, help="Instruction for the autonomous agent.") @click.option("--url", "-u", default=None, help="Optional starting URL.") @click.option( @@ -1995,7 +2023,16 @@ def _get_run_details(run: dict) -> dict: } -@main.command("list") +@main.command( + "list", + epilog="""\b +Examples: + reverse-api-engineer list + reverse-api-engineer list --mode auto --search jobs --json | jq + +JSON output (--json) is always a flat array (possibly empty []). +""", +) @click.option("--json", "as_json", is_flag=True, help="Output as flat JSON array.") @click.option("--full", is_flag=True, help="Show all columns (default: compact view).") @click.option("--limit", "-n", type=int, default=None, help="Limit number of results.") @@ -2076,7 +2113,19 @@ def list_runs(as_json, full, limit, mode, model, search): console.print(table) -@main.command("show") +@main.command( + "show", + epilog="""\b +Examples: + reverse-api-engineer show + reverse-api-engineer show --json | jq + +\b +Exit codes (--json): + 0 run found + 1 run not found / no history +""", +) @click.argument("run_id", required=False) @click.option("--json", "as_json", is_flag=True, help="Output as JSON object.") def show_run(run_id, as_json): @@ -2188,7 +2237,22 @@ def _path_val(path_str: str, exists: bool) -> Text: console.print(table) -@main.command("run") +@main.command( + "run", + epilog="""\b +Examples: + reverse-api-engineer run a450e520ca30 + reverse-api-engineer run ashby --ls + reverse-api-engineer run ashby --file api_client.py -- --org acme --limit 10 + reverse-api-engineer run a450e520ca30 --file api_client.py --no-interactive --auto-install + +\b +Exit codes: + the underlying script's return code (0 on success) + 1 no script found for the run + non-zero missing --file when multiple scripts exist under --no-interactive +""", +) @click.argument("identifier") @click.argument("script_args", nargs=-1, type=click.UNPROCESSED) @click.option("--file", "-f", "file_name", default=None, help="Script filename to run (e.g. api_client.py).") diff --git a/tests/test_cli_agent_json.py b/tests/test_cli_agent_json.py index f8dca12..72672d0 100644 --- a/tests/test_cli_agent_json.py +++ b/tests/test_cli_agent_json.py @@ -1,8 +1,9 @@ """Tests for the agent-friendly CLI surface (--json, --no-interactive, payload shape).""" import json -from unittest.mock import patch +from unittest.mock import MagicMock, patch +import pytest from click.testing import CliRunner from reverse_api.cli import ( @@ -10,8 +11,10 @@ _build_agent_payload, agent, list_runs, + main, show_run, ) +from reverse_api.session import SessionManager class TestBuildAgentPayload: @@ -136,3 +139,169 @@ def test_unknown_run_id_emits_error(self): payload = json.loads(result.stdout.strip()) assert "error" in payload assert payload["run_id"] == "doesnotexist" + + +# --------------------------------------------------------------------------- +# `run` command: --no-interactive and --auto-install behavior +# --------------------------------------------------------------------------- + + +@pytest.fixture +def multi_script_env(tmp_path): + """Fake a run with multiple .py scripts on disk so the picker would normally fire.""" + history_path = tmp_path / "history.json" + sm = SessionManager(history_path) + sm.add_run( + "abc123def456", + "capture the ashby jobs api", + mode="manual", + paths={"script_path": "/scripts/abc123def456/api_client.py"}, + ) + scripts_dir = tmp_path / "scripts" / "abc123def456" + scripts_dir.mkdir(parents=True) + (scripts_dir / "api_client.py").write_text("print('client')") + (scripts_dir / "example_usage.py").write_text("print('example')") + + patches = [ + patch("reverse_api.cli.session_manager", sm), + patch("reverse_api.cli.config_manager", MagicMock(get=MagicMock(return_value=None))), + patch("reverse_api.utils.get_base_output_dir", return_value=tmp_path), + ] + for p in patches: + p.start() + try: + yield tmp_path, sm + finally: + for p in patches: + p.stop() + + +class TestRunCommandNonInteractive: + """`run` should never block on questionary when --no-interactive or --auto-install is set.""" + + def test_multiple_scripts_with_no_interactive_errors(self, multi_script_env): + runner = CliRunner() + with patch("questionary.select") as mock_select: + result = runner.invoke(main, ["run", "abc123def456", "--no-interactive"]) + assert result.exit_code != 0 + # Picker must NOT have been opened + mock_select.assert_not_called() + assert "multiple scripts" in result.output.lower() or "available" in result.output.lower() + + def test_multiple_scripts_with_auto_install_errors_on_picker(self, multi_script_env): + """--auto-install also implies non-interactive for the picker.""" + runner = CliRunner() + with patch("questionary.select") as mock_select: + result = runner.invoke(main, ["run", "abc123def456", "--auto-install"]) + assert result.exit_code != 0 + mock_select.assert_not_called() + + def test_no_interactive_with_file_flag_runs(self, multi_script_env): + """--file disambiguates and the script runs without prompting.""" + runner = CliRunner() + ok = MagicMock(returncode=0, stdout="", stderr="") + with patch("subprocess.run", return_value=ok) as mock_sub, patch("questionary.select") as mock_select: + result = runner.invoke( + main, ["run", "abc123def456", "--file", "api_client.py", "--no-interactive"] + ) + assert result.exit_code == 0 + mock_select.assert_not_called() + assert mock_sub.called + + def test_auto_install_skips_questionary_confirm(self, multi_script_env): + """When the script raises ModuleNotFoundError, --auto-install skips the confirm prompt.""" + runner = CliRunner() + # First subprocess.run returns missing-module error; subsequent ones (venv setup, pip install, retry) + # all succeed. + first = MagicMock(returncode=1, stdout="", stderr="ModuleNotFoundError: No module named 'foo'") + ok = MagicMock(returncode=0, stdout="", stderr="") + + def fake_subprocess(cmd, *args, **kwargs): + # Detect the script execution call by looking for the .py path + cmd_str = " ".join(str(c) for c in cmd) + if cmd_str.endswith("api_client.py") or "api_client.py" in cmd_str: + # First and second invocation of the script path + fake_subprocess.script_calls += 1 + if fake_subprocess.script_calls == 1: + return first + return ok + + fake_subprocess.script_calls = 0 + + with patch("subprocess.run", side_effect=fake_subprocess), patch("questionary.confirm") as mock_confirm: + result = runner.invoke( + main, ["run", "abc123def456", "--file", "api_client.py", "--auto-install"] + ) + # questionary.confirm must never have been called + mock_confirm.assert_not_called() + # Final exit code should be from the retry (success) + assert result.exit_code == 0 + + def test_no_interactive_skips_confirm_and_does_not_install(self, multi_script_env): + """--no-interactive (without --auto-install) must NOT install missing deps; it surfaces the failure.""" + runner = CliRunner() + first = MagicMock(returncode=1, stdout="", stderr="ModuleNotFoundError: No module named 'foo'") + ok = MagicMock(returncode=0, stdout="", stderr="") + + def fake_subprocess(cmd, *args, **kwargs): + cmd_str = " ".join(str(c) for c in cmd) + if "api_client.py" in cmd_str: + fake_subprocess.script_calls += 1 + if fake_subprocess.script_calls == 1: + return first + # pip install must NOT happen — assert it + if "pip" in cmd_str and "install" in cmd_str and "foo" in cmd_str: + pytest.fail("pip should not have been called to install 'foo' under --no-interactive") + return ok + + fake_subprocess.script_calls = 0 + + with patch("subprocess.run", side_effect=fake_subprocess), patch("questionary.confirm") as mock_confirm: + result = runner.invoke( + main, ["run", "abc123def456", "--file", "api_client.py", "--no-interactive"] + ) + mock_confirm.assert_not_called() + # Exit propagates the original failure + assert result.exit_code != 0 + + +# --------------------------------------------------------------------------- +# Agent --json: interruption + stdout-purity +# --------------------------------------------------------------------------- + + +class TestAgentJsonInterruption: + def test_keyboard_interrupt_emits_error_payload(self): + runner = CliRunner() + with patch("reverse_api.cli.run_agent_capture", side_effect=KeyboardInterrupt): + result = runner.invoke(agent, ["--json", "-p", "x"]) + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip().splitlines()[-1]) + assert payload["status"] == "error" + assert payload["error"] == "interrupted" + + +class TestAgentJsonStdoutPurity: + """Stdout should contain exactly one JSON line; Rich noise must go to stderr.""" + + def test_rich_console_output_does_not_contaminate_stdout(self): + from reverse_api.cli import console as cli_console + + # Click 8.2+ separates stdout/stderr by default + runner = CliRunner() + + def fake_capture(**kwargs): + cli_console.print("noisy human-readable status") + return {"run_id": "abc", "mode": "auto", "script_path": None, "usage": {}} + + with patch("reverse_api.cli.run_agent_capture", side_effect=fake_capture): + result = runner.invoke(agent, ["--json", "-p", "x"]) + + assert result.exit_code == 0 + # stdout is exactly one JSON object — nothing else + stdout = result.stdout.strip() + payload = json.loads(stdout) + assert payload["status"] == "ok" + assert "noisy" not in stdout + # The noise landed on stderr instead + assert "noisy" in result.stderr From c91d1fad76fe270b250c3a0f1bc7c81819ef2d1c Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Tue, 5 May 2026 19:12:19 +0000 Subject: [PATCH 3/8] fix(cli): address review feedback on agent --json contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues flagged by cubic-dev-ai on PR #61: P1 — Interrupted auto-capture silently reported as ok When KeyboardInterrupt was caught inside run_auto_capture, the function returned a dict with no error flag, so _build_agent_payload saw a run_id and emitted status=ok. Now the inner KeyboardInterrupt bubbles up an "error: interrupted" key in the result dict, which _build_agent_payload propagates to status=error. P2 — har_path resolved against the wrong output_dir _build_agent_payload computed the HAR path with the default config root, ignoring the user's --output-dir. Threaded output_dir through the agent click handler to _build_agent_payload so HAR resolution matches the actual capture location. P2 — misuse JSON emitted incomplete schema When --json was set and --prompt was missing, the early-exit path printed only {schema_version, status, error}. Agents parsing the output expected every documented field. Now uses _build_agent_payload so misuse responses contain the full schema (with nulls). Adds 3 regression tests covering each fix. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reverse_api/cli.py | 35 ++++++++++++------ tests/test_cli_agent_json.py | 69 ++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 10 deletions(-) diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index 0038ba3..4850246 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -97,16 +97,21 @@ def _build_agent_payload( *, prompt: str | None, url: str | None, + output_dir: str | None = None, error: str | None = None, ) -> dict: - """Normalize an agent capture result into a stable JSON shape.""" + """Normalize an agent capture result into a stable JSON shape. + + `output_dir` must match the value passed to the underlying capture so the + HAR path is resolved against the user's chosen run root, not the default. + """ result = result or {} run_id = result.get("run_id") inner_error = result.get("error") final_error = error or inner_error or (None if run_id else "agent capture produced no run") har_path = None if run_id: - candidate = get_har_dir(run_id, None) / "recording.har" + candidate = get_har_dir(run_id, output_dir) / "recording.har" har_path = str(candidate) if candidate.exists() else None return { "schema_version": AGENT_JSON_SCHEMA_VERSION, @@ -1368,11 +1373,14 @@ def agent(prompt, url, reverse_engineer, model, output_dir, no_interactive, as_j if no_interactive and not (prompt and prompt.strip()): if as_json: - click.echo(json.dumps({ - "schema_version": AGENT_JSON_SCHEMA_VERSION, - "status": "error", - "error": "--prompt is required in non-interactive/--json mode", - })) + misuse = _build_agent_payload( + {}, + prompt=prompt, + url=url, + output_dir=output_dir, + error="--prompt is required in non-interactive/--json mode", + ) + click.echo(json.dumps(misuse)) else: click.echo("error: --prompt is required when --no-interactive is set", err=True) sys.exit(2) @@ -1391,11 +1399,15 @@ def agent(prompt, url, reverse_engineer, model, output_dir, no_interactive, as_j model=model, output_dir=output_dir, ) - payload = _build_agent_payload(result, prompt=prompt, url=url) + payload = _build_agent_payload(result, prompt=prompt, url=url, output_dir=output_dir) except KeyboardInterrupt: - payload = _build_agent_payload({}, prompt=prompt, url=url, error="interrupted") + payload = _build_agent_payload( + {}, prompt=prompt, url=url, output_dir=output_dir, error="interrupted" + ) except Exception as e: - payload = _build_agent_payload({}, prompt=prompt, url=url, error=str(e)) + payload = _build_agent_payload( + {}, prompt=prompt, url=url, output_dir=output_dir, error=str(e) + ) real_stdout.write(json.dumps(payload) + "\n") real_stdout.flush() @@ -1757,10 +1769,12 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p # Start sync before analysis engineer.start_sync() + interrupted = False try: result = asyncio.run(engineer.analyze_and_generate()) except KeyboardInterrupt: result = None + interrupted = True finally: # Always stop sync when done engineer.stop_sync() @@ -1778,6 +1792,7 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p "mode": mode_label, "script_path": (result or {}).get("script_path"), "usage": (result or {}).get("usage", {}), + **({"error": "interrupted"} if interrupted else {}), } except Exception as e: diff --git a/tests/test_cli_agent_json.py b/tests/test_cli_agent_json.py index 72672d0..f15d27d 100644 --- a/tests/test_cli_agent_json.py +++ b/tests/test_cli_agent_json.py @@ -17,6 +17,20 @@ from reverse_api.session import SessionManager +EXPECTED_PAYLOAD_KEYS = { + "schema_version", + "status", + "run_id", + "prompt", + "url", + "mode", + "har_path", + "script_path", + "usage", + "error", +} + + class TestBuildAgentPayload: """Stable shape for the `agent --json` payload.""" @@ -68,6 +82,28 @@ def test_inner_error_propagates(self): assert payload["status"] == "error" assert payload["error"] == "inner" + def test_har_path_resolves_against_provided_output_dir(self, tmp_path): + """har_path must use the user's --output-dir, not the default config root. + + Regression for cubic-dev-ai PR #61 review (P2). + """ + run_id = "deadbeef0001" + run_har_dir = tmp_path / "har" / run_id + run_har_dir.mkdir(parents=True) + (run_har_dir / "recording.har").write_text("{}") + + with patch("reverse_api.cli.get_har_dir") as gethar: + gethar.side_effect = lambda rid, odir: tmp_path / "har" / rid + payload = _build_agent_payload( + {"run_id": run_id, "mode": "auto"}, + prompt="x", + url=None, + output_dir=str(tmp_path), + ) + + gethar.assert_called_with(run_id, str(tmp_path)) + assert payload["har_path"] == str(run_har_dir / "recording.har") + class TestAgentCommandJson: """`agent` click command's --json / --no-interactive behavior.""" @@ -80,6 +116,16 @@ def test_json_without_prompt_exits_2(self): assert payload["status"] == "error" assert "prompt" in payload["error"].lower() + def test_json_without_prompt_emits_full_schema(self): + """Misuse JSON must contain every documented field (nulled), not a 3-key shortcut. + + Regression for cubic-dev-ai PR #61 review (P2). + """ + runner = CliRunner() + result = runner.invoke(agent, ["--json"]) + payload = json.loads(result.stdout.strip()) + assert set(payload.keys()) == EXPECTED_PAYLOAD_KEYS + def test_no_interactive_without_prompt_exits_2(self): runner = CliRunner() result = runner.invoke(agent, ["--no-interactive"]) @@ -280,6 +326,29 @@ def test_keyboard_interrupt_emits_error_payload(self): assert payload["status"] == "error" assert payload["error"] == "interrupted" + def test_inner_keyboard_interrupt_does_not_silently_succeed(self): + """When run_agent_capture returns a dict with `error: "interrupted"` (the + scenario where KeyboardInterrupt is caught inside run_auto_capture rather + than propagating up), the agent payload must report status=error. + + Regression for cubic-dev-ai PR #61 review (P1). + """ + runner = CliRunner() + captured = { + "run_id": "abc12345", + "mode": "auto", + "script_path": None, + "usage": {}, + "error": "interrupted", + } + with patch("reverse_api.cli.run_agent_capture", return_value=captured): + result = runner.invoke(agent, ["--json", "-p", "x"]) + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip().splitlines()[-1]) + assert payload["status"] == "error" + assert payload["error"] == "interrupted" + assert payload["run_id"] == "abc12345" + class TestAgentJsonStdoutPurity: """Stdout should contain exactly one JSON line; Rich noise must go to stderr.""" From d2c4e49b8692a1f81cd1b77f8cfa4dfa9e27017b Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Tue, 5 May 2026 20:21:59 +0000 Subject: [PATCH 4/8] feat(cli): TTY-detect at REPL entry + engineer --json/--no-interactive Implements the first two items of the agent-friendliness backlog (#62): #1 TTY-detect at REPL entry Without a TTY and no subcommand, the prompt_toolkit REPL would block on stdin forever. Now detects `not sys.stdin.isatty()`, prints `--help` to stderr, and exits 2 (misuse). Subcommands are unaffected. #2 engineer --json / --no-interactive Mirrors the agent-mode contract (PR #61): --json redirects Rich output to stderr and emits a stable JSON payload on stdout. New helper `_build_engineer_payload` produces the schema {schema_version, status, run_id, prompt, fresh, script_path, usage, error}. KeyboardInterrupt surfaces as `error: "interrupted"`. Exit codes: 0 ok, 1 runtime error. --no-interactive is reserved for symmetry (engineer mode has no questionary prompts internally). Also surfaces `--json` / `--no-interactive` in the root --help (item #6 partial). Adds tests/test_cli_followups.py (14 tests) covering TTY-detect end-to- end via subprocess, _build_engineer_payload edge cases, and the click wiring for engineer --json (success, not-found, KeyboardInterrupt, exception, prompt-vs-fresh threading). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reverse_api/cli.py | 131 +++++++++++++++++++++-- tests/test_cli_followups.py | 202 ++++++++++++++++++++++++++++++++++++ 2 files changed, 323 insertions(+), 10 deletions(-) create mode 100644 tests/test_cli_followups.py diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index 8cc0a14..fca2eef 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -119,6 +119,36 @@ def _build_agent_payload( "error": final_error, } + +def _build_engineer_payload( + result: dict | None, + *, + run_id: str, + prompt: str | None, + fresh: bool, + error: str | None = None, +) -> dict: + """Normalize an engineer-mode result into a stable JSON shape. + + Mirrors `_build_agent_payload`'s contract minus the agent-specific fields + (no `url`, no `mode`, no `har_path`). `prompt` is the value the user passed + to --prompt (which may have been used as either a full replacement or as + additional instructions depending on --fresh). + """ + result = result if result is not None else {} + inner_error = result.get("error") if isinstance(result, dict) else None + final_error = error or inner_error or (None if result else "engineering produced no result") + return { + "schema_version": AGENT_JSON_SCHEMA_VERSION, + "status": "error" if final_error else "ok", + "run_id": run_id, + "prompt": prompt, + "fresh": fresh, + "script_path": result.get("script_path") if isinstance(result, dict) else None, + "usage": (result.get("usage") if isinstance(result, dict) else None) or {}, + "error": final_error, + } + # Mode definitions MODES = ["agent", "manual", "engineer", "collector"] MODE_DESCRIPTIONS = { @@ -391,8 +421,23 @@ def main(ctx: click.Context): Run without a subcommand to start the interactive REPL; use agent, manual, or engineer for CLI mode. + + Most subcommands accept --json and --no-interactive for scripted use; see + ` --help` for per-command details. """ if ctx.invoked_subcommand is None: + # Refuse to drop into the prompt_toolkit REPL when stdin is not a TTY: + # without an interactive terminal the REPL would block on stdin + # forever (e.g. CI invocations, agent wrappers that forgot the + # subcommand). Print --help to stderr and exit 2 (misuse). + if not sys.stdin.isatty(): + click.echo(ctx.get_help(), err=True) + click.echo( + "\nerror: no subcommand given and stdin is not a TTY; " + "the interactive REPL requires a terminal.", + err=True, + ) + sys.exit(2) repl_loop() @@ -1366,7 +1411,34 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p -@main.command() +@main.command( + epilog="""\b +Examples: + reverse-api-engineer engineer abc123def456 + reverse-api-engineer engineer abc123def456 -p "add pagination support" + reverse-api-engineer engineer abc123def456 --fresh -p "extract auth flow only" + reverse-api-engineer engineer abc123def456 --json | jq + +\b +JSON output schema (--json): + { + "schema_version": 1, + "status": "ok" | "error", + "run_id": "", + "prompt": "..." | null, + "fresh": false, + "script_path": "/abs/path/api_client.py" | null, + "usage": { "input_tokens": ..., "output_tokens": ..., "total_cost": ... }, + "error": "" | null + } + +\b +Exit codes: + 0 success + 1 runtime error (engineering failed, run not found) + 2 misuse +""" +) @click.argument("run_id") @click.option( "--prompt", @@ -1389,20 +1461,59 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p default=None, ) @click.option("--output-dir", "-o", default=None, help="Custom output directory.") -def engineer(run_id, prompt, fresh, model, output_dir): +@click.option( + "--no-interactive", + is_flag=True, + help="Reserved for symmetry with `agent`/`run`; engineer mode is non-interactive by design.", +) +@click.option( + "--json", + "as_json", + is_flag=True, + help="Emit a single JSON result on stdout (logs go to stderr). Implies --no-interactive.", +) +def engineer(run_id, prompt, fresh, model, output_dir, no_interactive, as_json): """Run reverse engineering on a previous run.""" # --fresh treats --prompt as a full replacement of the original goal; # without --fresh, --prompt is additive so the captured run's context is preserved. main_prompt = prompt if fresh else None additional = prompt if (prompt and not fresh) else None - run_engineer( - run_id, - prompt=main_prompt, - additional_instructions=additional, - model=model, - output_dir=output_dir, - is_fresh=fresh, - ) + + if not as_json: + run_engineer( + run_id, + prompt=main_prompt, + additional_instructions=additional, + model=model, + output_dir=output_dir, + is_fresh=fresh, + ) + return + + payload: dict + with _quiet_consoles_for_json() as real_stdout: + try: + result = run_engineer( + run_id, + prompt=main_prompt, + additional_instructions=additional, + model=model, + output_dir=output_dir, + is_fresh=fresh, + ) + payload = _build_engineer_payload(result, run_id=run_id, prompt=prompt, fresh=fresh) + except KeyboardInterrupt: + payload = _build_engineer_payload( + None, run_id=run_id, prompt=prompt, fresh=fresh, error="interrupted" + ) + except Exception as e: + payload = _build_engineer_payload( + None, run_id=run_id, prompt=prompt, fresh=fresh, error=str(e) + ) + + real_stdout.write(json.dumps(payload) + "\n") + real_stdout.flush() + sys.exit(0 if payload["status"] == "ok" else 1) def run_engineer( diff --git a/tests/test_cli_followups.py b/tests/test_cli_followups.py new file mode 100644 index 0000000..08d96da --- /dev/null +++ b/tests/test_cli_followups.py @@ -0,0 +1,202 @@ +"""Tests for the issue #62 follow-up items. + +Covers: +1. TTY-detect at REPL entry (no subcommand + non-TTY stdin → exit 2 + help) +2. `engineer --json` / `--no-interactive` parity with `agent --json` +""" + +import json +import subprocess +from unittest.mock import patch + +from click.testing import CliRunner + +from reverse_api.cli import ( + AGENT_JSON_SCHEMA_VERSION, + _build_engineer_payload, + engineer, + main, +) + +EXPECTED_ENGINEER_KEYS = { + "schema_version", + "status", + "run_id", + "prompt", + "fresh", + "script_path", + "usage", + "error", +} + + +# --------------------------------------------------------------------------- +# Item #1: TTY-detect at REPL entry +# --------------------------------------------------------------------------- + + +class TestTtyDetectionAtReplEntry: + """Without a TTY and no subcommand, the REPL must NOT block on prompt_toolkit.""" + + def test_no_tty_no_subcommand_exits_2(self): + """End-to-end: invoke the installed binary with stdin redirected from /dev/null.""" + result = subprocess.run( + ["uv", "run", "reverse-api-engineer"], + stdin=subprocess.DEVNULL, + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode == 2 + assert "stdin is not a TTY" in result.stderr + # Help is printed too so a wrapper can self-discover the subcommands + assert "agent" in result.stderr + assert "engineer" in result.stderr + + def test_no_tty_with_subcommand_does_not_trip(self): + """Subcommands work fine without a TTY (this is the whole point of agent --json).""" + result = subprocess.run( + ["uv", "run", "reverse-api-engineer", "list", "--json"], + stdin=subprocess.DEVNULL, + capture_output=True, + text=True, + timeout=10, + ) + # list --json on empty history should succeed with [] + assert result.returncode == 0 + # Nothing about TTY in the output + assert "TTY" not in result.stderr + + +# --------------------------------------------------------------------------- +# Item #2: engineer --json +# --------------------------------------------------------------------------- + + +class TestBuildEngineerPayload: + """Stable shape for the `engineer --json` payload.""" + + def test_success_shape(self): + payload = _build_engineer_payload( + {"script_path": "/abs/api_client.py", "usage": {"total_cost": 0.001}}, + run_id="abc123", + prompt="add pagination", + fresh=False, + ) + assert payload["schema_version"] == AGENT_JSON_SCHEMA_VERSION + assert payload["status"] == "ok" + assert payload["run_id"] == "abc123" + assert payload["prompt"] == "add pagination" + assert payload["fresh"] is False + assert payload["script_path"] == "/abs/api_client.py" + assert payload["usage"]["total_cost"] == 0.001 + assert payload["error"] is None + assert set(payload.keys()) == EXPECTED_ENGINEER_KEYS + + def test_none_result_is_error(self): + payload = _build_engineer_payload(None, run_id="abc", prompt=None, fresh=False) + assert payload["status"] == "error" + assert payload["error"] + assert set(payload.keys()) == EXPECTED_ENGINEER_KEYS + + def test_explicit_error_overrides(self): + payload = _build_engineer_payload( + {"script_path": "/x.py"}, run_id="abc", prompt=None, fresh=False, error="boom" + ) + assert payload["status"] == "error" + assert payload["error"] == "boom" + + def test_inner_error_propagates(self): + payload = _build_engineer_payload( + {"error": "engine crashed"}, run_id="abc", prompt=None, fresh=False + ) + assert payload["status"] == "error" + assert payload["error"] == "engine crashed" + + +class TestEngineerCommandJson: + """`engineer` click command --json wiring.""" + + def test_json_emits_payload_on_success(self): + runner = CliRunner() + fake_result = {"script_path": "/abs/api_client.py", "usage": {"total_cost": 0.0}} + with patch("reverse_api.cli.run_engineer", return_value=fake_result): + result = runner.invoke(engineer, ["abc123", "--json"]) + assert result.exit_code == 0, result.output + payload = json.loads(result.stdout.strip().splitlines()[-1]) + assert payload["status"] == "ok" + assert payload["run_id"] == "abc123" + assert payload["script_path"] == "/abs/api_client.py" + assert set(payload.keys()) == EXPECTED_ENGINEER_KEYS + + def test_json_emits_error_on_not_found(self): + runner = CliRunner() + # run_engineer returns None when the run can't be located + with patch("reverse_api.cli.run_engineer", return_value=None): + result = runner.invoke(engineer, ["doesnotexist", "--json"]) + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip().splitlines()[-1]) + assert payload["status"] == "error" + assert payload["run_id"] == "doesnotexist" + assert payload["error"] + assert set(payload.keys()) == EXPECTED_ENGINEER_KEYS + + def test_json_emits_error_on_keyboard_interrupt(self): + runner = CliRunner() + with patch("reverse_api.cli.run_engineer", side_effect=KeyboardInterrupt): + result = runner.invoke(engineer, ["abc123", "--json"]) + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip().splitlines()[-1]) + assert payload["status"] == "error" + assert payload["error"] == "interrupted" + + def test_json_emits_error_on_exception(self): + runner = CliRunner() + with patch("reverse_api.cli.run_engineer", side_effect=RuntimeError("boom")): + result = runner.invoke(engineer, ["abc123", "--json"]) + assert result.exit_code == 1 + payload = json.loads(result.stdout.strip().splitlines()[-1]) + assert payload["status"] == "error" + assert payload["error"] == "boom" + + def test_prompt_without_fresh_threaded_as_additional(self): + """--prompt without --fresh must reach run_engineer as additional_instructions + even on the JSON path (regression for the cubic-dev-ai catch on PR #63).""" + runner = CliRunner() + with patch("reverse_api.cli.run_engineer", return_value={"script_path": "/x.py"}) as mock_run: + result = runner.invoke(engineer, ["abc123", "--json", "-p", "add pagination"]) + assert result.exit_code == 0, result.output + kwargs = mock_run.call_args.kwargs + assert kwargs["prompt"] is None + assert kwargs["additional_instructions"] == "add pagination" + assert kwargs["is_fresh"] is False + + def test_fresh_with_prompt_threaded_as_main(self): + runner = CliRunner() + with patch("reverse_api.cli.run_engineer", return_value={"script_path": "/x.py"}) as mock_run: + result = runner.invoke( + engineer, ["abc123", "--json", "--fresh", "-p", "extract auth"] + ) + assert result.exit_code == 0, result.output + kwargs = mock_run.call_args.kwargs + assert kwargs["prompt"] == "extract auth" + assert kwargs["additional_instructions"] is None + assert kwargs["is_fresh"] is True + + def test_no_interactive_flag_accepted(self): + """--no-interactive is reserved for symmetry; should not crash.""" + runner = CliRunner() + with patch("reverse_api.cli.run_engineer", return_value={"script_path": "/x.py"}): + result = runner.invoke(engineer, ["abc123", "--no-interactive"]) + assert result.exit_code == 0, result.output + + +class TestRootHelpMentionsScripted: + """Item #6 partial: root --help should advertise scripted features.""" + + def test_root_help_mentions_json_and_no_interactive(self): + runner = CliRunner() + result = runner.invoke(main, ["--help"]) + assert result.exit_code == 0 + assert "--json" in result.output + assert "--no-interactive" in result.output From ea9ea77e3025452f0efff0730ed549ff62b447b2 Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Tue, 5 May 2026 20:31:46 +0000 Subject: [PATCH 5/8] fix(cli): suppress follow-up prompt in --json / --no-interactive mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the first successful generation, ClaudeEngineer (and ClaudeAuto- Engineer) loops on `BaseEngineer._prompt_follow_up()` which awaits `input(" > ")` via an executor. With `--json` or `--no-interactive` that block on stdin defeats the scripted contract: pipelines like `engineer --json | jq` would hang before emitting the JSON. - Add `interactive: bool = True` to BaseEngineer; `_prompt_follow_up` now returns None immediately when interactive is False (still flushes sync so partial output reaches disk). - Thread `interactive` through `run_reverse_engineering` and all three SDK constructors (Claude, OpenCode, Copilot — the latter two don't use the follow-up loop today but are wired for symmetry / future). - Thread `interactive` through `cli.run_engineer`, `cli.run_auto_capture`, `cli.run_agent_capture`, and the auto-engineer constructors. - The `agent` and `engineer` click commands derive `interactive = not (as_json or no_interactive)` and pass it down. Adds 6 regression tests in test_cli_followups.py covering: the base short-circuit (input() raises if reached), default interactive=True (REPL UX preserved), and that all three click-command flag combinations (--json, --no-interactive, default) thread the right value through run_engineer / run_agent_capture. Reported by chatgpt-codex-connector on PR #65 (P2). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reverse_api/base_engineer.py | 16 ++++- src/reverse_api/cli.py | 28 +++++++- src/reverse_api/engineer.py | 4 ++ tests/test_cli_followups.py | 117 +++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 5 deletions(-) diff --git a/src/reverse_api/base_engineer.py b/src/reverse_api/base_engineer.py index dba7d56..0f83ae1 100644 --- a/src/reverse_api/base_engineer.py +++ b/src/reverse_api/base_engineer.py @@ -42,6 +42,7 @@ def __init__( is_fresh: bool = False, output_language: str = "python", output_mode: str = "client", + interactive: bool = True, ): self.run_id = run_id self.har_path = har_path @@ -67,6 +68,10 @@ def __init__( self.sync_watcher: FileSyncWatcher | None = None self.local_scripts_dir: Path | None = None self._stderr_error_shown = False + # When False, _prompt_follow_up() returns None immediately so the + # conversation loop in subclasses ends after the first generation. + # Set this from --json / --no-interactive entry points. + self.interactive = interactive def _handle_cli_stderr(self, line: str) -> None: """Filter CLI subprocess stderr. Shows full output in DEBUG mode, otherwise shows a single clean error.""" @@ -279,9 +284,16 @@ async def _ask_user_interactive(self, questions: list[dict[str, Any]]) -> dict[s async def _prompt_follow_up(self) -> str | None: """Prompt user for a follow-up message. Returns None to finish. - Uses plain input() via executor instead of questionary to avoid - terminal state issues after the SDK subprocess exits. + In non-interactive mode (e.g. --json / --no-interactive) returns None + immediately so the conversation loop terminates after the first + generation. Otherwise uses plain input() via executor instead of + questionary to avoid terminal state issues after the SDK subprocess + exits. """ + if not self.interactive: + # Still flush sync so any partial output reaches disk before we exit. + self.flush_sync() + return None # Ensure all files are synced locally before waiting for user input self.flush_sync() self.ui.console.print() diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index fca2eef..ee2e093 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -1111,8 +1111,14 @@ def agent(prompt, url, model, output_dir, no_interactive, as_json): click.echo("error: --prompt is required when --no-interactive is set", err=True) sys.exit(2) + # Either flag must suppress the post-generation follow-up prompt that would + # otherwise block on stdin (`input(" > ")`) inside ClaudeAutoEngineer. + interactive = not (as_json or no_interactive) + if not as_json: - run_agent_capture(prompt=prompt, url=url, model=model, output_dir=output_dir) + run_agent_capture( + prompt=prompt, url=url, model=model, output_dir=output_dir, interactive=interactive + ) return payload: dict @@ -1123,6 +1129,7 @@ def agent(prompt, url, model, output_dir, no_interactive, as_json): url=url, model=model, output_dir=output_dir, + interactive=interactive, ) payload = _build_agent_payload(result, prompt=prompt, url=url, output_dir=output_dir) except KeyboardInterrupt: @@ -1198,7 +1205,7 @@ def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, console.print(f" [dim]>[/dim] [dim]use 'reverse-api-engineer engineer {run_id}' to engineer later[/dim]\n") -def run_agent_capture(prompt=None, url=None, model=None, output_dir=None): +def run_agent_capture(prompt=None, url=None, model=None, output_dir=None, interactive=True): """Shared logic for agent capture mode.""" output_dir = output_dir or config_manager.get("output_dir") @@ -1222,6 +1229,7 @@ def run_agent_capture(prompt=None, url=None, model=None, output_dir=None): model=model, output_dir=output_dir, agent_provider=agent_provider, + interactive=interactive, ) @@ -1274,7 +1282,9 @@ def run_collector(prompt=None, model=None, output_dir=None): traceback.print_exc() -def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_provider="auto"): +def run_auto_capture( + prompt=None, url=None, model=None, output_dir=None, agent_provider="auto", interactive=True +): """Auto mode: LLM-driven browser automation + real-time reverse engineering.""" output_dir = output_dir or config_manager.get("output_dir") @@ -1338,6 +1348,7 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p enable_sync=config_manager.get("real_time_sync", False), sdk=sdk, output_language=output_language, + interactive=interactive, ) elif sdk == "copilot": from .auto_engineer import CopilotAutoEngineer @@ -1351,6 +1362,7 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p enable_sync=config_manager.get("real_time_sync", False), sdk=sdk, output_language=output_language, + interactive=interactive, ) else: from .auto_engineer import ClaudeAutoEngineer @@ -1364,6 +1376,7 @@ def run_auto_capture(prompt=None, url=None, model=None, output_dir=None, agent_p enable_sync=config_manager.get("real_time_sync", False), sdk=sdk, output_language=output_language, + interactive=interactive, ) # Start sync before analysis @@ -1478,6 +1491,9 @@ def engineer(run_id, prompt, fresh, model, output_dir, no_interactive, as_json): # without --fresh, --prompt is additive so the captured run's context is preserved. main_prompt = prompt if fresh else None additional = prompt if (prompt and not fresh) else None + # Either flag must suppress the post-generation follow-up prompt that + # would otherwise block on stdin (`input(" > ")`) inside ClaudeEngineer. + interactive = not (as_json or no_interactive) if not as_json: run_engineer( @@ -1487,6 +1503,7 @@ def engineer(run_id, prompt, fresh, model, output_dir, no_interactive, as_json): model=model, output_dir=output_dir, is_fresh=fresh, + interactive=interactive, ) return @@ -1500,6 +1517,7 @@ def engineer(run_id, prompt, fresh, model, output_dir, no_interactive, as_json): model=model, output_dir=output_dir, is_fresh=fresh, + interactive=interactive, ) payload = _build_engineer_payload(result, run_id=run_id, prompt=prompt, fresh=fresh) except KeyboardInterrupt: @@ -1525,6 +1543,7 @@ def run_engineer( additional_instructions=None, is_fresh=False, output_mode="client", + interactive=True, ): """Shared logic for reverse engineering.""" if not har_path or not prompt: @@ -1566,6 +1585,7 @@ def run_engineer( is_fresh=is_fresh, output_language=output_language, output_mode=output_mode, + interactive=interactive, ) elif sdk == "copilot": result = run_reverse_engineering( @@ -1581,6 +1601,7 @@ def run_engineer( is_fresh=is_fresh, output_language=output_language, output_mode=output_mode, + interactive=interactive, ) else: result = run_reverse_engineering( @@ -1595,6 +1616,7 @@ def run_engineer( is_fresh=is_fresh, output_language=output_language, output_mode=output_mode, + interactive=interactive, ) if result: diff --git a/src/reverse_api/engineer.py b/src/reverse_api/engineer.py index b958f4f..2a11792 100644 --- a/src/reverse_api/engineer.py +++ b/src/reverse_api/engineer.py @@ -214,6 +214,7 @@ def run_reverse_engineering( is_fresh: bool = False, output_language: str = "python", output_mode: str = "client", + interactive: bool = True, ) -> dict[str, Any] | None: """Run reverse engineering with the specified SDK. @@ -245,6 +246,7 @@ def run_reverse_engineering( is_fresh=is_fresh, output_language=output_language, output_mode=output_mode, + interactive=interactive, ) elif sdk == "copilot": from .copilot_engineer import CopilotEngineer @@ -263,6 +265,7 @@ def run_reverse_engineering( output_language=output_language, output_mode=output_mode, copilot_model=copilot_model, + interactive=interactive, ) else: engineer = ClaudeEngineer( @@ -278,6 +281,7 @@ def run_reverse_engineering( is_fresh=is_fresh, output_language=output_language, output_mode=output_mode, + interactive=interactive, ) # Start sync before analysis diff --git a/tests/test_cli_followups.py b/tests/test_cli_followups.py index 08d96da..6939cfb 100644 --- a/tests/test_cli_followups.py +++ b/tests/test_cli_followups.py @@ -200,3 +200,120 @@ def test_root_help_mentions_json_and_no_interactive(self): assert result.exit_code == 0 assert "--json" in result.output assert "--no-interactive" in result.output + + +# --------------------------------------------------------------------------- +# Follow-up suppression in non-interactive mode +# --------------------------------------------------------------------------- + + +class TestFollowUpPromptSuppressed: + """Regression for chatgpt-codex-connector PR #65 review (P2): + + `BaseEngineer._prompt_follow_up()` blocks on `input(" > ")` after the + first generation. In --json / --no-interactive mode the conversation loop + must terminate immediately so stdin is never read; otherwise scripted + invocations like `engineer --json | jq` hang before emitting the + payload. + """ + + def test_prompt_follow_up_returns_none_when_not_interactive(self, tmp_path): + """The base engineer's follow-up prompt short-circuits when + interactive=False, without ever touching stdin.""" + import asyncio + from unittest.mock import MagicMock, patch + + from reverse_api.base_engineer import BaseEngineer + + class _Eng(BaseEngineer): + def _build_prompts(self): + return ("", "") + + async def analyze_and_generate(self): + return None + + har_path = tmp_path / "test.har" + har_path.touch() + + with patch("reverse_api.base_engineer.get_scripts_dir", return_value=tmp_path): + with patch("reverse_api.base_engineer.MessageStore"): + with patch("reverse_api.base_engineer.SessionManager") as mock_sm: + mock_sm.return_value.get_run.return_value = None + eng = _Eng( + run_id="test123", + har_path=har_path, + prompt="x", + interactive=False, + ) + + # Patch input() at the builtin level — the test fails loudly if it's + # ever reached, proving the short-circuit happens before stdin access. + def _explode(*_args, **_kwargs): + raise AssertionError("input() must not be called when interactive=False") + + with patch("builtins.input", side_effect=_explode): + result = asyncio.run(eng._prompt_follow_up()) + + assert result is None + + def test_engineer_default_interactive_true(self, tmp_path): + """Sanity: the default value of interactive on BaseEngineer is True + so existing REPL UX is unchanged.""" + from reverse_api.base_engineer import BaseEngineer + + class _Eng(BaseEngineer): + def _build_prompts(self): + return ("", "") + + async def analyze_and_generate(self): + return None + + har_path = tmp_path / "test.har" + har_path.touch() + + with patch("reverse_api.base_engineer.get_scripts_dir", return_value=tmp_path): + with patch("reverse_api.base_engineer.MessageStore"): + with patch("reverse_api.base_engineer.SessionManager") as mock_sm: + mock_sm.return_value.get_run.return_value = None + eng = _Eng(run_id="test123", har_path=har_path, prompt="x") + assert eng.interactive is True + + def test_engineer_command_threads_interactive_through_run_engineer(self): + """`engineer --json` must pass interactive=False to run_engineer so + BaseEngineer drops the follow-up loop in the SDK.""" + runner = CliRunner() + with patch("reverse_api.cli.run_engineer", return_value={"script_path": "/x.py"}) as mock_run: + result = runner.invoke(engineer, ["abc123", "--json"]) + assert result.exit_code == 0, result.output + assert mock_run.call_args.kwargs["interactive"] is False + + def test_engineer_command_no_interactive_threads_through(self): + """`engineer --no-interactive` (without --json) also disables follow-up.""" + runner = CliRunner() + with patch("reverse_api.cli.run_engineer", return_value={"script_path": "/x.py"}) as mock_run: + result = runner.invoke(engineer, ["abc123", "--no-interactive"]) + assert result.exit_code == 0, result.output + assert mock_run.call_args.kwargs["interactive"] is False + + def test_engineer_default_threads_interactive_true(self): + """Without flags, run_engineer is called with interactive=True + (preserves the current REPL UX).""" + runner = CliRunner() + with patch("reverse_api.cli.run_engineer", return_value={"script_path": "/x.py"}) as mock_run: + result = runner.invoke(engineer, ["abc123"]) + assert result.exit_code == 0, result.output + assert mock_run.call_args.kwargs["interactive"] is True + + def test_agent_command_threads_interactive_through_run_agent_capture(self): + """`agent --json` must propagate interactive=False so + ClaudeAutoEngineer drops the follow-up loop too (same code path).""" + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + with patch( + "reverse_api.cli.run_agent_capture", + return_value={"run_id": "abc", "mode": "auto", "script_path": None, "usage": {}}, + ) as mock_run: + result = runner.invoke(agent_cmd, ["--json", "-p", "x"]) + assert result.exit_code == 0, result.output + assert mock_run.call_args.kwargs["interactive"] is False From af107417680f96fbc10460773efc298bc6a3b01c Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Tue, 5 May 2026 21:07:11 +0000 Subject: [PATCH 6/8] chore(test): drop unused MagicMock import in test_cli_followups Caught by ruff F401 during end-to-end smoke testing of PR #65. --- tests/test_cli_followups.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_cli_followups.py b/tests/test_cli_followups.py index 6939cfb..92d18b9 100644 --- a/tests/test_cli_followups.py +++ b/tests/test_cli_followups.py @@ -221,7 +221,6 @@ def test_prompt_follow_up_returns_none_when_not_interactive(self, tmp_path): """The base engineer's follow-up prompt short-circuits when interactive=False, without ever touching stdin.""" import asyncio - from unittest.mock import MagicMock, patch from reverse_api.base_engineer import BaseEngineer From 3edf1519d8a29802cd645c7d53bb784de60ac1da Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Tue, 5 May 2026 21:50:40 +0000 Subject: [PATCH 7/8] feat(cli): add --headless flag for manual and agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Required for scripted/CI/VPS use cases where no X server is available. Threads through the full chain to both ManualBrowser (Playwright launch) and the MCP-spawned browser (auto and chrome-mcp providers). Behavior: - `manual --headless`: Playwright launches Chromium with headless=True. Also drops `--no-sandbox` from `ignore_default_args` so Chromium can start on hosts without unprivileged user namespaces (Ubuntu 24.04+ with AppArmor restrictions). Headed mode keeps the existing stealth default (sandbox stripped for fingerprint realism). - `agent --headless` (auto provider): rae-playwright-mcp spawned with `--headless` so the MCP-controlled Chromium runs without UI. - `agent --headless` (chrome-mcp provider): drops `--autoConnect` from the MCP args because auto-connect requires a real headed Chrome with remote-debugging enabled. Adds `--headless` so the MCP spawns its own headless Chromium instead. The "auto-connect setup" preamble is replaced by a one-line note explaining the headless path. All three SDK paths (Claude/OpenCode/Copilot, both ClaudeAutoEngineer and the OpenCode/Copilot variants) accept and propagate `headless` via kwargs to their auto-engineer constructors. 8 new tests in test_cli_followups.py covering: click flag wiring on manual + agent (default false, --headless threads true), and the MCP arg construction for all four headed/headless × playwright/chrome-mcp combinations. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reverse_api/auto_engineer.py | 94 +++++++++++++++++++++++--------- src/reverse_api/browser.py | 13 ++++- src/reverse_api/cli.py | 49 ++++++++++++++--- tests/test_cli_followups.py | 90 ++++++++++++++++++++++++++++++ 4 files changed, 208 insertions(+), 38 deletions(-) diff --git a/src/reverse_api/auto_engineer.py b/src/reverse_api/auto_engineer.py index 71311d1..05191f8 100644 --- a/src/reverse_api/auto_engineer.py +++ b/src/reverse_api/auto_engineer.py @@ -37,6 +37,9 @@ def __init__( **kwargs, ): """Initialize auto engineer with expected HAR path (created by MCP).""" + # `headless` is auto-engineer specific (controls the MCP-spawned browser), + # not BaseEngineer concept; pop before super() to avoid an unknown kwarg. + headless = kwargs.pop("headless", False) har_dir = get_har_dir(run_id, output_dir) har_path = har_dir / "recording.har" @@ -50,6 +53,7 @@ def __init__( ) self.mcp_run_id = run_id self.agent_provider = agent_provider + self.headless = headless def _build_auto_prompts(self) -> tuple[str, str]: """Build (system_prompt, user_message) for auto mode. @@ -112,22 +116,35 @@ async def _handle_tool_permission(self, tool_name: str, input_data: dict[str, An return PermissionResultAllow(updated_input=input_data) def _get_mcp_config(self) -> tuple[str, dict]: - """Return (server_name, mcp_config) based on agent_provider.""" + """Return (server_name, mcp_config) based on agent_provider. + + Auto-connect requires a real headed Chrome instance with a remote + debugging server, so it is dropped in headless mode and the MCP + spawns its own headless Chromium instead. + """ if self.agent_provider == "chrome-mcp": + args = ["chrome-devtools-mcp@latest", "--no-usage-statistics"] + if self.headless: + args.append("--headless") + else: + args.append("--autoConnect") return "chrome-devtools", { "type": "stdio", "command": "npx", - "args": ["chrome-devtools-mcp@latest", "--autoConnect", "--no-usage-statistics"], + "args": args, } + playwright_args = [ + "rae-playwright-mcp@latest", + "run-mcp-server", + "--run-id", + self.mcp_run_id, + ] + if self.headless: + playwright_args.append("--headless") return "playwright", { "type": "stdio", "command": "npx", - "args": [ - "rae-playwright-mcp@latest", - "run-mcp-server", - "--run-id", - self.mcp_run_id, - ], + "args": playwright_args, } async def analyze_and_generate(self) -> dict[str, Any] | None: @@ -210,6 +227,7 @@ class OpenCodeAutoEngineer(OpenCodeEngineer): def __init__(self, run_id: str, prompt: str, output_dir: str | None = None, agent_provider: str = "auto", **kwargs): """Initialize auto engineer with expected HAR path (created by MCP).""" + headless = kwargs.pop("headless", False) har_dir = get_har_dir(run_id, output_dir) har_path = har_dir / "recording.har" @@ -223,36 +241,50 @@ def __init__(self, run_id: str, prompt: str, output_dir: str | None = None, agen self.mcp_run_id = run_id self.agent_provider = agent_provider self.mcp_name = None + self.headless = headless def _get_active_prompts(self) -> tuple[str, str]: return ClaudeAutoEngineer._build_auto_prompts(self) def _get_opencode_mcp_config(self) -> dict: - """Return OpenCode MCP registration payload based on agent_provider.""" + """Return OpenCode MCP registration payload based on agent_provider. + + Auto-connect requires a headed Chrome with a remote debugging server, + so it is dropped in headless mode in favor of an MCP-spawned headless + Chromium. + """ if self.agent_provider == "chrome-mcp": self.mcp_name = f"chrome-devtools-{self._session_id}" + cmd = ["npx", "-y", "chrome-devtools-mcp@latest", "--no-usage-statistics"] + if self.headless: + cmd.append("--headless") + else: + cmd.append("--autoConnect") return { "name": self.mcp_name, "config": { "type": "local", - "command": ["npx", "-y", "chrome-devtools-mcp@latest", "--autoConnect", "--no-usage-statistics"], + "command": cmd, "enabled": True, "timeout": 30000, }, } self.mcp_name = f"playwright-{self._session_id}" + cmd = [ + "npx", + "-y", + "rae-playwright-mcp@latest", + "run-mcp-server", + "--run-id", + self.mcp_run_id, + ] + if self.headless: + cmd.append("--headless") return { "name": self.mcp_name, "config": { "type": "local", - "command": [ - "npx", - "-y", - "rae-playwright-mcp@latest", - "run-mcp-server", - "--run-id", - self.mcp_run_id, - ], + "command": cmd, "enabled": True, "timeout": 30000, }, @@ -450,6 +482,7 @@ def __init__( ): from .copilot_engineer import CopilotEngineer + headless = kwargs.pop("headless", False) har_dir = get_har_dir(run_id, output_dir) har_path = har_dir / "recording.har" @@ -463,6 +496,7 @@ def __init__( ) self.mcp_run_id = run_id self.agent_provider = agent_provider + self.headless = headless def start_sync(self) -> None: self._engineer.start_sync() @@ -526,25 +560,33 @@ def on_event(event: Any) -> None: if self.agent_provider == "chrome-mcp": mcp_server_name = "chrome-devtools" + chrome_args = ["-y", "chrome-devtools-mcp@latest", "--no-usage-statistics"] + if self.headless: + chrome_args.append("--headless") + else: + chrome_args.append("--autoConnect") mcp_config = { "type": "local", "command": "npx", - "args": ["-y", "chrome-devtools-mcp@latest", "--autoConnect", "--no-usage-statistics"], + "args": chrome_args, "tools": ["*"], "timeout": 30000, } else: mcp_server_name = "playwright" + pw_args = [ + "-y", + "rae-playwright-mcp@latest", + "run-mcp-server", + "--run-id", + self.mcp_run_id, + ] + if self.headless: + pw_args.append("--headless") mcp_config = { "type": "local", "command": "npx", - "args": [ - "-y", - "rae-playwright-mcp@latest", - "run-mcp-server", - "--run-id", - self.mcp_run_id, - ], + "args": pw_args, "tools": ["*"], "timeout": 30000, } diff --git a/src/reverse_api/browser.py b/src/reverse_api/browser.py index 8986147..eae7771 100644 --- a/src/reverse_api/browser.py +++ b/src/reverse_api/browser.py @@ -187,11 +187,13 @@ def __init__( prompt: str, output_dir: str | None = None, use_real_chrome: bool = True, # New option to use real Chrome + headless: bool = False, ): self.run_id = run_id self.prompt = prompt self.output_dir = output_dir self.use_real_chrome = use_real_chrome + self.headless = headless self.har_dir = get_har_dir(run_id, output_dir) self.har_path = self.har_dir / "recording.har" @@ -249,7 +251,7 @@ def _start_with_real_chrome(self, start_url: str | None = None) -> Path: self._context = self._playwright.chromium.launch_persistent_context( user_data_dir=str(temp_profile_dir), channel="chrome", # Use real Chrome binary - headless=False, + headless=self.headless, record_har_path=str(self.har_path), record_har_content="embed", no_viewport=True, @@ -326,10 +328,15 @@ def _start_with_stealth_chromium(self, start_url: str | None = None) -> Path: "--use-mock-keychain", ] + # In headless mode (scripted / CI / VPS), keep Playwright's default + # `--no-sandbox` so Chromium can launch on hosts without unprivileged + # user namespaces. In headed mode we still strip it for a more + # realistic browser fingerprint. + ignore = ["--enable-automation"] if self.headless else ["--enable-automation", "--no-sandbox"] self._browser = self._playwright.chromium.launch( - headless=False, + headless=self.headless, args=chrome_args, - ignore_default_args=["--enable-automation", "--no-sandbox"], + ignore_default_args=ignore, ) # Create context with HAR recording and realistic settings diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index ee2e093..6c73cfd 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -1035,9 +1035,14 @@ def handle_messages(run_id: str, mode_color=THEME_PRIMARY): default=None, ) @click.option("--output-dir", "-o", default=None, help="Custom output directory.") -def manual(prompt, url, reverse_engineer, model, output_dir): +@click.option( + "--headless", + is_flag=True, + help="Launch the browser in headless mode (no UI). Required on machines without an X server (CI / VPS).", +) +def manual(prompt, url, reverse_engineer, model, output_dir, headless): """Start a manual browser session.""" - run_manual_capture(prompt, url, reverse_engineer, model, output_dir) + run_manual_capture(prompt, url, reverse_engineer, model, output_dir, headless=headless) @main.command( @@ -1089,7 +1094,12 @@ def manual(prompt, url, reverse_engineer, model, output_dir): is_flag=True, help="Emit a single JSON result on stdout (logs go to stderr). Implies --no-interactive.", ) -def agent(prompt, url, model, output_dir, no_interactive, as_json): +@click.option( + "--headless", + is_flag=True, + help="Launch the MCP-controlled browser in headless mode (required on machines without an X server). For chrome-mcp this drops --autoConnect since auto-connect requires a headed Chrome instance.", +) +def agent(prompt, url, model, output_dir, no_interactive, as_json, headless): """Run autonomous agent browser session. Agent mode runs an integrated capture + reverse-engineering pipeline; if you @@ -1117,7 +1127,12 @@ def agent(prompt, url, model, output_dir, no_interactive, as_json): if not as_json: run_agent_capture( - prompt=prompt, url=url, model=model, output_dir=output_dir, interactive=interactive + prompt=prompt, + url=url, + model=model, + output_dir=output_dir, + interactive=interactive, + headless=headless, ) return @@ -1130,6 +1145,7 @@ def agent(prompt, url, model, output_dir, no_interactive, as_json): model=model, output_dir=output_dir, interactive=interactive, + headless=headless, ) payload = _build_agent_payload(result, prompt=prompt, url=url, output_dir=output_dir) except KeyboardInterrupt: @@ -1146,7 +1162,7 @@ def agent(prompt, url, model, output_dir, no_interactive, as_json): sys.exit(0 if payload["status"] == "ok" else 1) -def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, output_dir=None): +def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, output_dir=None, headless=False): """Shared logic for manual capture.""" output_dir = output_dir or config_manager.get("output_dir") @@ -1180,7 +1196,7 @@ def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, paths={"har_dir": str(get_har_dir(run_id, output_dir))}, ) - browser = ManualBrowser(run_id=run_id, prompt=prompt, output_dir=output_dir) + browser = ManualBrowser(run_id=run_id, prompt=prompt, output_dir=output_dir, headless=headless) har_path = browser.start(start_url=url) if reverse_engineer: @@ -1205,7 +1221,7 @@ def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, console.print(f" [dim]>[/dim] [dim]use 'reverse-api-engineer engineer {run_id}' to engineer later[/dim]\n") -def run_agent_capture(prompt=None, url=None, model=None, output_dir=None, interactive=True): +def run_agent_capture(prompt=None, url=None, model=None, output_dir=None, interactive=True, headless=False): """Shared logic for agent capture mode.""" output_dir = output_dir or config_manager.get("output_dir") @@ -1230,6 +1246,7 @@ def run_agent_capture(prompt=None, url=None, model=None, output_dir=None, intera output_dir=output_dir, agent_provider=agent_provider, interactive=interactive, + headless=headless, ) @@ -1283,7 +1300,13 @@ def run_collector(prompt=None, model=None, output_dir=None): def run_auto_capture( - prompt=None, url=None, model=None, output_dir=None, agent_provider="auto", interactive=True + prompt=None, + url=None, + model=None, + output_dir=None, + agent_provider="auto", + interactive=True, + headless=False, ): """Auto mode: LLM-driven browser automation + real-time reverse engineering.""" output_dir = output_dir or config_manager.get("output_dir") @@ -1301,7 +1324,7 @@ def run_auto_capture( url = options.get("url") model = options["model"] - if agent_provider == "chrome-mcp": + if agent_provider == "chrome-mcp" and not headless: console.print() console.print(" [dim]chrome devtools mcp (auto-connect)[/dim]") console.print(" [dim]controlling your real chrome browser[/dim]") @@ -1316,6 +1339,11 @@ def run_auto_capture( console.print(" [dim]warning: the agent will execute actions on your browser[/dim]") console.print(" [dim]avoid browsing sensitive sites during the session[/dim]") console.print() + elif agent_provider == "chrome-mcp" and headless: + console.print() + console.print(" [dim]chrome devtools mcp (headless)[/dim]") + console.print(" [dim]auto-connect disabled — mcp will spawn its own headless chrome[/dim]") + console.print() run_id = generate_run_id() timestamp = get_timestamp() @@ -1349,6 +1377,7 @@ def run_auto_capture( sdk=sdk, output_language=output_language, interactive=interactive, + headless=headless, ) elif sdk == "copilot": from .auto_engineer import CopilotAutoEngineer @@ -1363,6 +1392,7 @@ def run_auto_capture( sdk=sdk, output_language=output_language, interactive=interactive, + headless=headless, ) else: from .auto_engineer import ClaudeAutoEngineer @@ -1377,6 +1407,7 @@ def run_auto_capture( sdk=sdk, output_language=output_language, interactive=interactive, + headless=headless, ) # Start sync before analysis diff --git a/tests/test_cli_followups.py b/tests/test_cli_followups.py index 92d18b9..1c8b920 100644 --- a/tests/test_cli_followups.py +++ b/tests/test_cli_followups.py @@ -316,3 +316,93 @@ def test_agent_command_threads_interactive_through_run_agent_capture(self): result = runner.invoke(agent_cmd, ["--json", "-p", "x"]) assert result.exit_code == 0, result.output assert mock_run.call_args.kwargs["interactive"] is False + + +# --------------------------------------------------------------------------- +# --headless flag (CI / VPS / scripted) +# --------------------------------------------------------------------------- + + +class TestHeadlessFlag: + """`--headless` wires through to ManualBrowser / auto-engineer constructors + and adjusts MCP args (drops --autoConnect for chrome-mcp, adds --headless + for both chrome-mcp and playwright).""" + + def test_manual_command_threads_headless(self): + from reverse_api.cli import manual as manual_cmd + + runner = CliRunner() + with patch("reverse_api.cli.run_manual_capture") as mock_run: + result = runner.invoke(manual_cmd, ["-p", "x", "-u", "https://example.com", "--headless"]) + assert result.exit_code == 0, result.output + assert mock_run.call_args.kwargs["headless"] is True + + def test_manual_default_headless_false(self): + from reverse_api.cli import manual as manual_cmd + + runner = CliRunner() + with patch("reverse_api.cli.run_manual_capture") as mock_run: + result = runner.invoke(manual_cmd, ["-p", "x", "-u", "https://example.com"]) + assert result.exit_code == 0, result.output + assert mock_run.call_args.kwargs["headless"] is False + + def test_agent_command_threads_headless(self): + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + with patch("reverse_api.cli.run_agent_capture", return_value={"run_id": "x", "mode": "auto"}) as mock_run: + result = runner.invoke(agent_cmd, ["-p", "x", "--json", "--headless"]) + assert result.exit_code == 0, result.output + assert mock_run.call_args.kwargs["headless"] is True + + def test_agent_default_headless_false(self): + from reverse_api.cli import agent as agent_cmd + + runner = CliRunner() + with patch("reverse_api.cli.run_agent_capture", return_value={"run_id": "x", "mode": "auto"}) as mock_run: + result = runner.invoke(agent_cmd, ["-p", "x", "--json"]) + assert result.exit_code == 0, result.output + assert mock_run.call_args.kwargs["headless"] is False + + +class TestHeadlessMcpConfig: + """ClaudeAutoEngineer._get_mcp_config arg construction for headed vs headless.""" + + def _make(self, agent_provider: str, headless: bool, tmp_path): + """Build a ClaudeAutoEngineer without invoking heavy init paths.""" + from reverse_api.auto_engineer import ClaudeAutoEngineer + + with patch("reverse_api.base_engineer.get_scripts_dir", return_value=tmp_path): + with patch("reverse_api.base_engineer.MessageStore"): + with patch("reverse_api.base_engineer.SessionManager"): + with patch("reverse_api.auto_engineer.get_har_dir", return_value=tmp_path): + eng = ClaudeAutoEngineer( + run_id="r", + prompt="x", + model="claude-sonnet-4-6", + agent_provider=agent_provider, + headless=headless, + ) + return eng + + def test_chrome_mcp_headed_uses_autoconnect(self, tmp_path): + eng = self._make("chrome-mcp", headless=False, tmp_path=tmp_path) + _, cfg = eng._get_mcp_config() + assert "--autoConnect" in cfg["args"] + assert "--headless" not in cfg["args"] + + def test_chrome_mcp_headless_drops_autoconnect_adds_headless(self, tmp_path): + eng = self._make("chrome-mcp", headless=True, tmp_path=tmp_path) + _, cfg = eng._get_mcp_config() + assert "--autoConnect" not in cfg["args"], "auto-connect cannot work without a real headed Chrome" + assert "--headless" in cfg["args"] + + def test_playwright_headless_adds_flag(self, tmp_path): + eng = self._make("auto", headless=True, tmp_path=tmp_path) + _, cfg = eng._get_mcp_config() + assert "--headless" in cfg["args"] + + def test_playwright_headed_no_headless_flag(self, tmp_path): + eng = self._make("auto", headless=False, tmp_path=tmp_path) + _, cfg = eng._get_mcp_config() + assert "--headless" not in cfg["args"] From 0e5ef06db77aa36d86245173948624202694a214 Mon Sep 17 00:00:00 2001 From: Kalil Bouzigues Date: Tue, 5 May 2026 21:59:53 +0000 Subject: [PATCH 8/8] =?UTF-8?q?revert(manual):=20drop=20--headless=20flag?= =?UTF-8?q?=20=E2=80=94=20manual=20mode=20requires=20a=20human?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per design intent, `manual` is interactive-only: a human navigates a real, visible browser, clicks through flows, submits forms, etc. A headless variant defeats the purpose; agents and CI should use `agent --json --headless` instead. - Remove --headless from the manual click command (passing it now fails with `No such option`) - Remove headless parameter from ManualBrowser; restore the original Playwright launch (always headed, always strips --no-sandbox for fingerprint realism) - Restore run_manual_capture signature - Update the manual command docstring to make it explicit that the mode requires a human and to redirect agents to `agent --headless` Tests: replace the two `manual --headless` wiring tests with checks that (a) `manual --headless` exits non-zero with `No such option` and (b) `manual --help` mentions both "human" and "agent" so an agent inspecting --help self-routes correctly. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reverse_api/browser.py | 13 +++---------- src/reverse_api/cli.py | 26 ++++++++++++++++---------- tests/test_cli_followups.py | 32 ++++++++++++++++++-------------- 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/reverse_api/browser.py b/src/reverse_api/browser.py index eae7771..8986147 100644 --- a/src/reverse_api/browser.py +++ b/src/reverse_api/browser.py @@ -187,13 +187,11 @@ def __init__( prompt: str, output_dir: str | None = None, use_real_chrome: bool = True, # New option to use real Chrome - headless: bool = False, ): self.run_id = run_id self.prompt = prompt self.output_dir = output_dir self.use_real_chrome = use_real_chrome - self.headless = headless self.har_dir = get_har_dir(run_id, output_dir) self.har_path = self.har_dir / "recording.har" @@ -251,7 +249,7 @@ def _start_with_real_chrome(self, start_url: str | None = None) -> Path: self._context = self._playwright.chromium.launch_persistent_context( user_data_dir=str(temp_profile_dir), channel="chrome", # Use real Chrome binary - headless=self.headless, + headless=False, record_har_path=str(self.har_path), record_har_content="embed", no_viewport=True, @@ -328,15 +326,10 @@ def _start_with_stealth_chromium(self, start_url: str | None = None) -> Path: "--use-mock-keychain", ] - # In headless mode (scripted / CI / VPS), keep Playwright's default - # `--no-sandbox` so Chromium can launch on hosts without unprivileged - # user namespaces. In headed mode we still strip it for a more - # realistic browser fingerprint. - ignore = ["--enable-automation"] if self.headless else ["--enable-automation", "--no-sandbox"] self._browser = self._playwright.chromium.launch( - headless=self.headless, + headless=False, args=chrome_args, - ignore_default_args=ignore, + ignore_default_args=["--enable-automation", "--no-sandbox"], ) # Create context with HAR recording and realistic settings diff --git a/src/reverse_api/cli.py b/src/reverse_api/cli.py index 6c73cfd..be94944 100644 --- a/src/reverse_api/cli.py +++ b/src/reverse_api/cli.py @@ -1035,14 +1035,20 @@ def handle_messages(run_id: str, mode_color=THEME_PRIMARY): default=None, ) @click.option("--output-dir", "-o", default=None, help="Custom output directory.") -@click.option( - "--headless", - is_flag=True, - help="Launch the browser in headless mode (no UI). Required on machines without an X server (CI / VPS).", -) -def manual(prompt, url, reverse_engineer, model, output_dir, headless): - """Start a manual browser session.""" - run_manual_capture(prompt, url, reverse_engineer, model, output_dir, headless=headless) +def manual(prompt, url, reverse_engineer, model, output_dir): + """Start a manual browser session. + + \b + Interactive only — this mode requires a human in front of a real, + visible browser to navigate, click, and submit forms. It cannot be + driven by an agent or run on a headless machine. There is no + --headless / --json / --no-interactive flag here on purpose. + + For scripted / agent / CI use cases, use `agent --json --headless` + instead, which runs an autonomous AI-driven capture without needing + a human or an X server. + """ + run_manual_capture(prompt, url, reverse_engineer, model, output_dir) @main.command( @@ -1162,7 +1168,7 @@ def agent(prompt, url, model, output_dir, no_interactive, as_json, headless): sys.exit(0 if payload["status"] == "ok" else 1) -def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, output_dir=None, headless=False): +def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, output_dir=None): """Shared logic for manual capture.""" output_dir = output_dir or config_manager.get("output_dir") @@ -1196,7 +1202,7 @@ def run_manual_capture(prompt=None, url=None, reverse_engineer=True, model=None, paths={"har_dir": str(get_har_dir(run_id, output_dir))}, ) - browser = ManualBrowser(run_id=run_id, prompt=prompt, output_dir=output_dir, headless=headless) + browser = ManualBrowser(run_id=run_id, prompt=prompt, output_dir=output_dir) har_path = browser.start(start_url=url) if reverse_engineer: diff --git a/tests/test_cli_followups.py b/tests/test_cli_followups.py index 1c8b920..c4d99af 100644 --- a/tests/test_cli_followups.py +++ b/tests/test_cli_followups.py @@ -324,27 +324,31 @@ def test_agent_command_threads_interactive_through_run_agent_capture(self): class TestHeadlessFlag: - """`--headless` wires through to ManualBrowser / auto-engineer constructors - and adjusts MCP args (drops --autoConnect for chrome-mcp, adds --headless - for both chrome-mcp and playwright).""" + """`--headless` wires through to auto-engineer constructors (agent only — + manual mode is intentionally human-only and rejects --headless).""" - def test_manual_command_threads_headless(self): + def test_manual_rejects_headless_flag(self): + """`manual --headless` must fail: manual mode requires a human and a + visible browser; agents should use `agent --headless` instead.""" from reverse_api.cli import manual as manual_cmd runner = CliRunner() - with patch("reverse_api.cli.run_manual_capture") as mock_run: - result = runner.invoke(manual_cmd, ["-p", "x", "-u", "https://example.com", "--headless"]) - assert result.exit_code == 0, result.output - assert mock_run.call_args.kwargs["headless"] is True - - def test_manual_default_headless_false(self): + result = runner.invoke(manual_cmd, ["-p", "x", "-u", "https://example.com", "--headless"]) + assert result.exit_code != 0 + assert "no such option" in result.output.lower() or "--headless" in result.output + + def test_manual_help_mentions_human_only(self): + """`manual --help` must make it explicit that the mode requires a + human and is not scriptable (so agents inspecting --help self-route + to `agent` instead).""" from reverse_api.cli import manual as manual_cmd runner = CliRunner() - with patch("reverse_api.cli.run_manual_capture") as mock_run: - result = runner.invoke(manual_cmd, ["-p", "x", "-u", "https://example.com"]) - assert result.exit_code == 0, result.output - assert mock_run.call_args.kwargs["headless"] is False + result = runner.invoke(manual_cmd, ["--help"]) + assert result.exit_code == 0 + out = result.output.lower() + assert "human" in out + assert "agent" in out # tells the agent where to go instead def test_agent_command_threads_headless(self): from reverse_api.cli import agent as agent_cmd