diff --git a/src/bub/agent/core.py b/src/bub/agent/core.py index dce12654..fc5bb55f 100644 --- a/src/bub/agent/core.py +++ b/src/bub/agent/core.py @@ -20,12 +20,12 @@ HUMAN_PREVIEW_MAX_LEN = 240 HUMAN_PREVIEW_TRUNCATE_LEN = 237 VERIFICATION_TOOL_NAMES = { - "fs.read", - "fs.grep", - "fs.glob", - "tape.search", - "tape.info", - "tape.anchors", + "fs_read", + "fs_grep", + "fs_glob", + "tape_search", + "tape_info", + "tape_anchors", "status", } BASH_VERIFICATION_TOKENS = ( diff --git a/src/bub/cli/app.py b/src/bub/cli/app.py index 953f002c..9c027c83 100644 --- a/src/bub/cli/app.py +++ b/src/bub/cli/app.py @@ -78,11 +78,28 @@ def run( model: str | None = None, max_tokens: int | None = None, ) -> None: - """Run a single command with Bub.""" - # Keep signature for CLI compatibility while run mode is intentionally disabled. - _ = (command, workspace, model, max_tokens) - renderer.error("bub run is not supported in async mode. Use bub chat.") - raise typer.Exit(1) + """Run a single request-response turn with Bub.""" + try: + workspace_path = workspace or Path.cwd() + runtime = _build_runtime(workspace_path, model, max_tokens) + _run_once(runtime, command) + except Exception as exc: + renderer.error(f"Failed to run command: {exc!s}") + raise typer.Exit(1) from exc + + +def _run_once(runtime: Runtime, command: str) -> None: + route = runtime.session.handle_input(command, origin="human") + if route.exit_requested or route.done_requested or not route.enter_agent: + return + + response = runtime.session.agent_respond( + on_event=lambda event: runtime.tape.record_tool_event(event.kind, event.payload) + ) + assistant_result = runtime.session.interpret_assistant(response) + if assistant_result.visible_text: + runtime.tape.record_assistant_message(assistant_result.visible_text) + renderer.info(assistant_result.visible_text) if __name__ == "__main__": diff --git a/src/bub/config.py b/src/bub/config.py index a512f515..fb427e97 100644 --- a/src/bub/config.py +++ b/src/bub/config.py @@ -31,9 +31,9 @@ class Settings(BaseSettings): default=( "You are Bub, a concise coding assistant.\n" "Use tools when they help you answer or modify the project.\n" - "Available tools: fs.read, fs.write, fs.edit, fs.glob, fs.grep, bash, tape.search, tape.anchors, " - "tape.info, tape.reset, handoff, status, help, tools.\n" - "Use exact tool names as listed above; do not invent aliases.\n" + "Available tools: fs_read, fs_write, fs_edit, fs_glob, fs_grep, bash, tape_search, tape_anchors, " + "tape_info, tape_reset, handoff, status, help, tools.\n" + "Use exact tool names as listed above for tool calling.\n" "Tool observations are returned as JSON with keys: tool, signature, category, status, repeat, " "machine_readable, human_preview.\n" "If a tool observation status is stagnant, stop repeating that call and provide a final answer.\n" diff --git a/src/bub/tools/__init__.py b/src/bub/tools/__init__.py index 8a344854..a9828751 100644 --- a/src/bub/tools/__init__.py +++ b/src/bub/tools/__init__.py @@ -1,5 +1,7 @@ """Tools package for Bub.""" +from dataclasses import replace + from republic import Tool from ..agent.context import Context @@ -9,7 +11,8 @@ def build_agent_tools(context: Context, catalog: ToolCatalog | None = None) -> list[Tool]: """Build the tool set for the agent runtime.""" catalog = catalog or build_tool_catalog() - return catalog.build_tools(context, audience="agent") + tools = catalog.build_tools(context, audience="agent") + return [replace(tool, name=tool.name.replace(".", "_")) for tool in tools] def build_cli_tools(context: Context, catalog: ToolCatalog | None = None) -> list[Tool]: diff --git a/src/bub/tools/catalog.py b/src/bub/tools/catalog.py index 9e3fab7d..8ecd898e 100644 --- a/src/bub/tools/catalog.py +++ b/src/bub/tools/catalog.py @@ -108,10 +108,11 @@ def render_help(self) -> str: return "\n".join(lines).strip() def render_tools(self) -> str: - specs = sorted(self.agent_specs(), key=lambda spec: spec.name) + specs = self.agent_specs() if not specs: return "(no tools)" - return "\n".join(spec.name for spec in specs) + tool_names = sorted(spec.name.replace(".", "_") for spec in specs) + return "\n".join(tool_names) def render_bub_notice(self, args: list[str]) -> str: if args and args[0] == "chat": diff --git a/tests/test_agent_contract.py b/tests/test_agent_contract.py index a4f59af0..376d821d 100644 --- a/tests/test_agent_contract.py +++ b/tests/test_agent_contract.py @@ -81,13 +81,13 @@ def test_tool_result_payload_is_structured_json(tmp_path, monkeypatch) -> None: monkeypatch.setenv("BUB_MODEL", "openai:gpt-4o-mini") monkeypatch.setattr("bub.agent.core.LLM", _FakeLLM) _FakeLLM.queued_responses = [ - _tool_call_response(name="fs.read", arguments='{"path":"calc.py"}'), + _tool_call_response(name="fs_read", arguments='{"path":"calc.py"}'), _text_response("done"), ] _FakeLLM.queued_outputs = ["file-content"] _FakeLLM.forced_text_when_no_tools = "forced final answer" - agent = Agent(context=Context(tmp_path), tools=[SimpleNamespace(name="fs.read")]) + agent = Agent(context=Context(tmp_path), tools=[SimpleNamespace(name="fs_read")]) events: list[ToolEvent] = [] result = agent.respond([{"role": "user", "content": "check file"}], on_event=_capture_events(events)) @@ -98,7 +98,7 @@ def test_tool_result_payload_is_structured_json(tmp_path, monkeypatch) -> None: payload_text = tool_result_events[0].payload["result"][0]["content"] payload = json.loads(payload_text) - assert payload["tool"] == "fs.read" + assert payload["tool"] == "fs_read" assert payload["category"] == "verification" assert payload["status"] == "ok" assert payload["repeat"] is False @@ -106,20 +106,20 @@ def test_tool_result_payload_is_structured_json(tmp_path, monkeypatch) -> None: assert machine["format"] == "text" assert machine["value"] == "file-content" assert payload["human_preview"] == "file-content" - assert payload["signature"] == 'fs.read:{"path":"calc.py"}' + assert payload["signature"] == 'fs_read:{"path":"calc.py"}' def test_agent_recovers_when_observations_are_stagnant(tmp_path, monkeypatch) -> None: monkeypatch.setenv("BUB_MODEL", "openai:gpt-4o-mini") monkeypatch.setattr("bub.agent.core.LLM", _FakeLLM) _FakeLLM.queued_responses = [ - _tool_call_response(name="fs.read", arguments='{"path":"calc.py"}'), - _tool_call_response(name="fs.read", arguments='{"path":"calc.py"}'), + _tool_call_response(name="fs_read", arguments='{"path":"calc.py"}'), + _tool_call_response(name="fs_read", arguments='{"path":"calc.py"}'), ] _FakeLLM.queued_outputs = ["same-output", "same-output"] _FakeLLM.forced_text_when_no_tools = "final answer without more tools" - agent = Agent(context=Context(tmp_path), tools=[SimpleNamespace(name="fs.read")]) + agent = Agent(context=Context(tmp_path), tools=[SimpleNamespace(name="fs_read")]) events: list[ToolEvent] = [] result = agent.respond([{"role": "user", "content": "fix and verify"}], on_event=_capture_events(events)) @@ -141,7 +141,7 @@ def test_agent_does_not_hard_block_completion_without_verification_evidence(tmp_ _FakeLLM.queued_outputs = [] _FakeLLM.forced_text_when_no_tools = "forced final answer" - agent = Agent(context=Context(tmp_path), tools=[SimpleNamespace(name="fs.read")]) + agent = Agent(context=Context(tmp_path), tools=[SimpleNamespace(name="fs_read")]) result = agent.respond([{"role": "user", "content": "请验证完成结果后再回复完成"}]) @@ -152,13 +152,13 @@ def test_agent_allows_completion_with_verification_evidence(tmp_path, monkeypatc monkeypatch.setenv("BUB_MODEL", "openai:gpt-4o-mini") monkeypatch.setattr("bub.agent.core.LLM", _FakeLLM) _FakeLLM.queued_responses = [ - _tool_call_response(name="fs.read", arguments='{"path":"out.txt"}'), + _tool_call_response(name="fs_read", arguments='{"path":"out.txt"}'), _text_response("verified completion"), ] _FakeLLM.queued_outputs = ["ok-content"] _FakeLLM.forced_text_when_no_tools = "forced final answer" - agent = Agent(context=Context(tmp_path), tools=[SimpleNamespace(name="fs.read")]) + agent = Agent(context=Context(tmp_path), tools=[SimpleNamespace(name="fs_read")]) result = agent.respond([{"role": "user", "content": "please verify completion and then conclude"}]) diff --git a/tests/test_bub.py b/tests/test_bub.py index ff9f94a0..91515e52 100644 --- a/tests/test_bub.py +++ b/tests/test_bub.py @@ -38,45 +38,45 @@ def test_default_tool_names(self, tmp_path, monkeypatch): tool_map = self._tool_map(tmp_path, monkeypatch) assert set(tool_map.keys()) == { "bash", - "fs.edit", - "fs.glob", - "fs.grep", - "fs.read", - "fs.write", + "fs_edit", + "fs_glob", + "fs_grep", + "fs_read", + "fs_write", "handoff", "help", "status", - "tape.anchors", - "tape.info", - "tape.reset", - "tape.search", + "tape_anchors", + "tape_info", + "tape_reset", + "tape_search", "tools", } def test_write_and_read(self, tmp_path, monkeypatch): """Test write then read tool.""" tool_map = self._tool_map(tmp_path, monkeypatch) - result = tool_map["fs.write"].run(path="test.txt", content="line1\nline2\nline3\n") + result = tool_map["fs_write"].run(path="test.txt", content="line1\nline2\nline3\n") assert result == "ok" - read_result = tool_map["fs.read"].run(path="test.txt", offset=1, limit=1) + read_result = tool_map["fs_read"].run(path="test.txt", offset=1, limit=1) assert "2| line2" in read_result def test_edit_tool(self, tmp_path, monkeypatch): """Test edit tool replacement.""" tool_map = self._tool_map(tmp_path, monkeypatch) - tool_map["fs.write"].run(path="edit.txt", content="hello world") + tool_map["fs_write"].run(path="edit.txt", content="hello world") - result = tool_map["fs.edit"].run(path="edit.txt", old="world", new="bub") + result = tool_map["fs_edit"].run(path="edit.txt", old="world", new="bub") assert result == "ok" assert (tmp_path / "edit.txt").read_text() == "hello bub" def test_edit_requires_unique(self, tmp_path, monkeypatch): """Test edit tool requires unique match unless all=true.""" tool_map = self._tool_map(tmp_path, monkeypatch) - tool_map["fs.write"].run(path="dup.txt", content="a a a") + tool_map["fs_write"].run(path="dup.txt", content="a a a") - result = tool_map["fs.edit"].run(path="dup.txt", old="a", new="b") + result = tool_map["fs_edit"].run(path="dup.txt", old="a", new="b") assert result.startswith("error: old_string appears") def test_glob_tool(self, tmp_path, monkeypatch): @@ -85,7 +85,7 @@ def test_glob_tool(self, tmp_path, monkeypatch): (tmp_path / "a.txt").write_text("one") (tmp_path / "b.md").write_text("two") - result = tool_map["fs.glob"].run(path=".", pattern="*.txt") + result = tool_map["fs_glob"].run(path=".", pattern="*.txt") assert "a.txt" in result def test_grep_tool(self, tmp_path, monkeypatch): @@ -93,7 +93,7 @@ def test_grep_tool(self, tmp_path, monkeypatch): tool_map = self._tool_map(tmp_path, monkeypatch) (tmp_path / "hello.txt").write_text("hello\nworld\n") - result = tool_map["fs.grep"].run(pattern="hello", path=".") + result = tool_map["fs_grep"].run(pattern="hello", path=".") assert "hello.txt:1:hello" in result def test_bash_tool(self, tmp_path, monkeypatch): diff --git a/tests/test_cli_app.py b/tests/test_cli_app.py new file mode 100644 index 00000000..5496a1cc --- /dev/null +++ b/tests/test_cli_app.py @@ -0,0 +1,166 @@ +"""Tests for CLI app commands.""" + +from __future__ import annotations + +import importlib +from dataclasses import dataclass, field +from pathlib import Path + +from bub.runtime.router import AssistantResult, RouteResult + +cli_app = importlib.import_module("bub.cli.app") + + +@dataclass +class _FakeTape: + tool_events: list[tuple[str, dict]] = field(default_factory=list) + assistant_messages: list[str] = field(default_factory=list) + + def record_tool_event(self, kind: str, payload: dict) -> None: + self.tool_events.append((kind, payload)) + + def record_assistant_message(self, content: str) -> None: + self.assistant_messages.append(content) + + +@dataclass +class _FakeSession: + route_result: RouteResult + assistant_result: AssistantResult + + handle_calls: int = 0 + respond_calls: int = 0 + interpret_calls: int = 0 + + def handle_input(self, raw: str, *, origin: str = "human") -> RouteResult: + _ = (raw, origin) + self.handle_calls += 1 + return self.route_result + + def agent_respond(self, on_event=None) -> str: + self.respond_calls += 1 + if on_event is not None: + on_event(type("Event", (), {"kind": "tool_call", "payload": {"name": "fs_read"}})()) + return "raw assistant" + + def interpret_assistant(self, raw: str) -> AssistantResult: + _ = raw + self.interpret_calls += 1 + return self.assistant_result + + +@dataclass +class _FakeRuntime: + session: _FakeSession + tape: _FakeTape + + +@dataclass +class _FakeRenderer: + infos: list[str] = field(default_factory=list) + errors: list[str] = field(default_factory=list) + + def info(self, message: str) -> None: + self.infos.append(message) + + def error(self, message: str) -> None: + self.errors.append(message) + + +def test_run_performs_single_request_response(monkeypatch, tmp_path: Path) -> None: + session = _FakeSession( + route_result=RouteResult(agent_input="request", enter_agent=True, exit_requested=False, done_requested=False), + assistant_result=AssistantResult( + followup_input='\\n...\\n', + exit_requested=False, + done_requested=False, + visible_text="final response", + ), + ) + tape = _FakeTape() + runtime = _FakeRuntime(session=session, tape=tape) + renderer = _FakeRenderer() + + monkeypatch.setattr(cli_app, "renderer", renderer) + monkeypatch.setattr(cli_app, "_build_runtime", lambda workspace_path, model, max_tokens: runtime) + + cli_app.run("say hi", workspace=tmp_path) + + assert session.handle_calls == 1 + assert session.respond_calls == 1 + assert session.interpret_calls == 1 + assert renderer.infos == ["final response"] + assert tape.assistant_messages == ["final response"] + assert tape.tool_events == [("tool_call", {"name": "fs_read"})] + + +def test_run_skips_agent_when_route_does_not_enter_agent(monkeypatch, tmp_path: Path) -> None: + session = _FakeSession( + route_result=RouteResult(agent_input="", enter_agent=False, exit_requested=False, done_requested=False), + assistant_result=AssistantResult( + followup_input="", + exit_requested=False, + done_requested=False, + visible_text="should not appear", + ), + ) + runtime = _FakeRuntime(session=session, tape=_FakeTape()) + renderer = _FakeRenderer() + + monkeypatch.setattr(cli_app, "renderer", renderer) + monkeypatch.setattr(cli_app, "_build_runtime", lambda workspace_path, model, max_tokens: runtime) + + cli_app.run("$tape.info", workspace=tmp_path) + + assert session.handle_calls == 1 + assert session.respond_calls == 0 + assert session.interpret_calls == 0 + assert renderer.infos == [] + + +def test_run_skips_agent_when_done_requested(monkeypatch, tmp_path: Path) -> None: + session = _FakeSession( + route_result=RouteResult(agent_input="", enter_agent=True, exit_requested=False, done_requested=True), + assistant_result=AssistantResult( + followup_input="", + exit_requested=False, + done_requested=False, + visible_text="should not appear", + ), + ) + runtime = _FakeRuntime(session=session, tape=_FakeTape()) + renderer = _FakeRenderer() + + monkeypatch.setattr(cli_app, "renderer", renderer) + monkeypatch.setattr(cli_app, "_build_runtime", lambda workspace_path, model, max_tokens: runtime) + + cli_app.run("$done", workspace=tmp_path) + + assert session.handle_calls == 1 + assert session.respond_calls == 0 + assert session.interpret_calls == 0 + assert renderer.infos == [] + + +def test_run_skips_agent_when_exit_requested(monkeypatch, tmp_path: Path) -> None: + session = _FakeSession( + route_result=RouteResult(agent_input="", enter_agent=True, exit_requested=True, done_requested=False), + assistant_result=AssistantResult( + followup_input="", + exit_requested=False, + done_requested=False, + visible_text="should not appear", + ), + ) + runtime = _FakeRuntime(session=session, tape=_FakeTape()) + renderer = _FakeRenderer() + + monkeypatch.setattr(cli_app, "renderer", renderer) + monkeypatch.setattr(cli_app, "_build_runtime", lambda workspace_path, model, max_tokens: runtime) + + cli_app.run("$quit", workspace=tmp_path) + + assert session.handle_calls == 1 + assert session.respond_calls == 0 + assert session.interpret_calls == 0 + assert renderer.infos == []