diff --git a/anton/chat.py b/anton/chat.py index 94e65ad6..21a04c28 100644 --- a/anton/chat.py +++ b/anton/chat.py @@ -92,6 +92,7 @@ from prompt_toolkit.styles import Style as PTStyle from rich.prompt import Prompt from anton.memory.manage import MemoryManage, MEMORY_COMMANDS +from anton.commands.goal import parse_goal_args, run_goal_loop if TYPE_CHECKING: from rich.console import Console @@ -1056,6 +1057,8 @@ def _desktop_greeting(console: Console, settings) -> None: _persist_first_run_done(settings) + + def run_chat( console: Console, settings: AntonSettings, *, resume: bool = False, first_run: bool = False, desktop_first_run: bool = False ) -> None: @@ -1528,6 +1531,19 @@ def _bottom_toolbar(): elif cmd == "/explain": handle_explain(console, settings.workspace_path) continue + elif cmd == "/goal": + _raw_goal_arg = parts[1] if len(parts) > 1 else "" + if not _raw_goal_arg.strip(): + console.print("[anton.warning]Usage: /goal \"objective\" [--turns N][/]") + console.print() + continue + goal_objective, goal_max_turns = parse_goal_args(_raw_goal_arg) + if not goal_objective: + console.print("[anton.warning]Usage: /goal \"objective\" [--turns N][/]") + console.print() + continue + await run_goal_loop(console, session, display, goal_objective, goal_max_turns) + continue elif cmd == "/help": print_slash_help(console) continue diff --git a/anton/commands/goal.py b/anton/commands/goal.py new file mode 100644 index 00000000..73d34213 --- /dev/null +++ b/anton/commands/goal.py @@ -0,0 +1,191 @@ +"""Handler for the /goal autonomous execution command.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from anton.core.llm.provider import ( + StreamContextCompacted, + StreamTaskProgress, + StreamTextDelta, + StreamToolResult, + StreamToolUseEnd, + StreamToolUseDelta, + StreamToolUseStart, +) +from anton.core.tools.tool_defs import ToolDef +from anton.prompts import GOAL_CONTINUATION_PROMPT + +if TYPE_CHECKING: + from rich.console import Console + from anton.chat_ui import StreamDisplay + from anton.core.session import ChatSession + + +def parse_goal_args(raw: str, default_turns: int = 50) -> tuple[str, int]: + """Parse '/goal' argument string into (objective, max_turns). + + Normalises embedded newlines (terminal line-wrap artefacts) before + extracting the optional --turns flag, so inputs like + ``"my goal" --tur\\nns 20`` are handled correctly. + """ + arg = raw.replace("\r\n", "").replace("\r", "").replace("\n", "").strip() + turns_match = re.search(r"--turns\s+(\d+)", arg) + max_turns = int(turns_match.group(1)) if turns_match else default_turns + objective = re.sub(r"--turns\s+\d+", "", arg).strip().strip('"').strip("'").strip() + return objective, max_turns + + +async def run_goal_loop( + console: "Console", + session: "ChatSession", + display: "StreamDisplay", + objective: str, + max_turns: int, +) -> None: + """Run autonomous goal-directed turns until complete, exhausted, or interrupted.""" + from anton.chat_ui import EscapeWatcher + + @dataclass + class _GoalState: + completed: bool = False + completion_reason: str = "" + + goal_state = _GoalState() + + async def _handle_mark_goal_complete(_session, tc_input: dict) -> str: + reason = tc_input.get("reason", "Goal completed.") + goal_state.completed = True + goal_state.completion_reason = reason + return f"Goal marked as complete: {reason}" + + mark_goal_complete_tool = ToolDef( + name="mark_goal_complete", + description=( + "Signal that the goal has been fully achieved. Call this ONLY when you have " + "concrete proof that every requirement implied by the goal is satisfied. " + "Do not call this speculatively — treat uncertain evidence as 'not yet done'." + ), + input_schema={ + "type": "object", + "properties": { + "reason": { + "type": "string", + "description": "One-sentence summary of what was accomplished and why the goal is complete.", + }, + }, + "required": ["reason"], + }, + handler=_handle_mark_goal_complete, + ) + + # Ensure the core tools are built, then register the goal tool on top. + session._build_tools() + session.tool_registry.register_tool(mark_goal_complete_tool) + + console.print() + console.print(f"[anton.cyan]Goal:[/] {objective}") + console.print(f"[anton.muted]Running up to {max_turns} turns autonomously. Ctrl+C to stop.[/]") + console.print() + + consecutive_failures = 0 + completed_turn = 0 + try: + for turn in range(1, max_turns + 1): + if goal_state.completed: + break + completed_turn = turn + + continuation_msg = GOAL_CONTINUATION_PROMPT.format( + objective=objective, + turn=turn, + max_turns=max_turns, + ) + + console.print(f"[anton.muted][goal {turn}/{max_turns}] working...[/]") + display.start() + session._cancel_event.clear() + + try: + async with EscapeWatcher(on_cancel=display.show_cancelling) as esc: + session._escape_watcher = esc + async for event in session.turn_stream(continuation_msg): + if esc.cancelled.is_set(): + session._cancel_event.set() + raise KeyboardInterrupt + if isinstance(event, StreamTextDelta): + display.append_text(event.text) + elif isinstance(event, StreamToolResult): + if event.name == "scratchpad" and event.action == "dump": + display.show_tool_result(event.content) + elif isinstance(event, StreamToolUseStart): + display.on_tool_use_start(event.id, event.name) + elif isinstance(event, StreamToolUseDelta): + display.on_tool_use_delta(event.id, event.json_delta) + elif isinstance(event, StreamToolUseEnd): + display.on_tool_use_end(event.id) + elif isinstance(event, StreamTaskProgress): + display.update_progress(event.phase, event.message, event.eta_seconds) + elif isinstance(event, StreamContextCompacted): + display.show_context_compacted(event.message) + + display.finish() + consecutive_failures = 0 + if goal_state.completed: + break + + except KeyboardInterrupt: + display.abort() + raise + except Exception as exc: + display.abort() + consecutive_failures += 1 + console.print(f"\n[anton.warning][goal {turn}/{max_turns}] Turn failed: {exc}[/]") + if consecutive_failures >= 3: + console.print("[anton.error]3 consecutive failures. Aborting goal.[/]") + break + session.repair_history() + continue + + console.print() + if goal_state.completed: + console.print(f"[anton.cyan]Goal complete[/] after {completed_turn} turn(s): {goal_state.completion_reason}") + else: + console.print(f"[anton.warning]Goal not completed after {completed_turn} turn(s).[/]") + console.print() + + except KeyboardInterrupt: + session.repair_history() + console.print() + console.print(f"[anton.muted]Goal interrupted after {completed_turn} turn(s).[/]") + console.print() + + finally: + session.tool_registry.unregister_tool("mark_goal_complete") + # Anchor the model back to normal chat mode. Two synthetic turns: + # + # 1. If repair_history() ran, history ends with user:[tool_results]. + # Merging a text block into that is a malformed mixed-type + # message Anthropic rejects — close it with an assistant ack first. + # 2. A SYSTEM user message declaring goal end. + # 3. A synthetic assistant acknowledgment so the user's NEXT message + # arrives after an explicit context break. Without (3), ambiguous + # replies like "ok" / "oj" are interpreted as task continuations. + if session._history and session._history[-1].get("role") == "user": + session._append_history({ + "role": "assistant", + "content": "[Goal session interrupted.]", + }) + session._append_history({ + "role": "user", + "content": ( + "SYSTEM: The autonomous goal session has ended. " + "Do NOT continue any prior task unless the user explicitly asks." + ), + }) + session._append_history({ + "role": "assistant", + "content": "Understood — the goal session has ended. What would you like to do?", + }) diff --git a/anton/commands/ui.py b/anton/commands/ui.py index 652568d4..1bdc718a 100644 --- a/anton/commands/ui.py +++ b/anton/commands/ui.py @@ -39,6 +39,7 @@ class Command: Command("/skill", "Manage skills"), None, "Chat Tools", + Command("/goal", "Run a goal autonomously until complete (/goal \"objective\" [--turns N])"), Command("/paste", "Attach an image from your clipboard"), Command("/resume", "Continue a previous session"), Command("/remote", "Set up or manage remote scratchpad"), diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py index d8b1c605..06f49250 100644 --- a/anton/core/tools/registry.py +++ b/anton/core/tools/registry.py @@ -37,6 +37,10 @@ async def dispatch_tool( raise ValueError(f"Tool {tool_name} not found") return await tool_def.handler(session, tc_input) + def unregister_tool(self, name: str) -> None: + """Remove a tool by name. No-op if not found.""" + self._tools = [t for t in self._tools if t.name != name] + def dump(self) -> list[dict]: """ Dump the registry as a list of LLM-facing tool schemas. diff --git a/anton/prompts.py b/anton/prompts.py index 1a036dde..03c846c1 100644 --- a/anton/prompts.py +++ b/anton/prompts.py @@ -1,5 +1,26 @@ """Extra prompts for the open source terminal agent.""" +GOAL_CONTINUATION_PROMPT = """\ +You are working autonomously on the following goal: + + +{objective} + + +Progress: turn {turn} of {max_turns}. + +Continue working toward the goal. When you believe you may be done, conduct a \ +rigorous self-audit before calling `mark_goal_complete`: + +1. Derive every concrete requirement implied by the goal. +2. For each requirement, identify specific, authoritative evidence it is \ +satisfied (e.g. tests passing, files written, output verified). +3. Treat indirect, assumed, or unverified evidence as "not yet satisfied." +4. Only call `mark_goal_complete(reason)` when every requirement has ironclad proof. + +If any requirement is unmet, continue working without calling `mark_goal_complete`.\ +""" + FILE_ATTACHMENTS_PROMPT = """ FILE ATTACHMENTS: - Users can drag files or paste clipboard images. These appear as tags. diff --git a/tests/test_goal.py b/tests/test_goal.py new file mode 100644 index 00000000..b95f5939 --- /dev/null +++ b/tests/test_goal.py @@ -0,0 +1,103 @@ +"""Tests for /goal argument parsing and ToolRegistry.unregister_tool.""" + +from __future__ import annotations + +from anton.commands.goal import parse_goal_args as _parse_goal_args +from anton.core.tools.registry import ToolRegistry +from anton.core.tools.tool_defs import ToolDef + + +class TestParseGoalArgs: + def test_objective_only(self): + obj, turns = _parse_goal_args('"write hello.txt"') + assert obj == "write hello.txt" + assert turns == 50 + + def test_objective_with_turns(self): + obj, turns = _parse_goal_args('"write hello.txt" --turns 10') + assert obj == "write hello.txt" + assert turns == 10 + + def test_newline_splits_turns_flag(self): + # Terminal line-wrap can split '--turns 20' into '--tur\nns 20'. + # Without normalisation this would default to 50 and leave the + # fragment in the objective — the bug seen in manual testing. + obj, turns = _parse_goal_args('"write test suite" --tur\nns 20') + assert turns == 20 + assert "tur" not in obj + assert "ns 20" not in obj + + def test_carriage_return_normalised(self): + # \r\n within a word (Windows-style terminal wrap artefact). + obj, turns = _parse_goal_args('"my goal" --tur\r\nns 20') + assert turns == 20 + assert obj == "my goal" + + def test_unquoted_objective(self): + obj, turns = _parse_goal_args('do something useful --turns 3') + assert obj == "do something useful" + assert turns == 3 + + def test_single_quoted_objective(self): + obj, turns = _parse_goal_args("'run the linter'") + assert obj == "run the linter" + assert turns == 50 + + def test_empty_string_returns_empty_objective(self): + obj, turns = _parse_goal_args("") + assert obj == "" + assert turns == 50 + + def test_only_turns_flag_returns_empty_objective(self): + obj, turns = _parse_goal_args("--turns 5") + assert obj == "" + assert turns == 5 + + +def _make_tool(name: str) -> ToolDef: + async def _noop(_session, _input): + return "" + + return ToolDef( + name=name, + description=f"tool {name}", + input_schema={"type": "object", "properties": {}}, + handler=_noop, + ) + + +class TestUnregisterTool: + def test_removes_named_tool(self): + reg = ToolRegistry() + reg.register_tool(_make_tool("alpha")) + reg.register_tool(_make_tool("beta")) + reg.unregister_tool("alpha") + names = [t.name for t in reg.get_tool_defs()] + assert "alpha" not in names + assert "beta" in names + + def test_noop_when_tool_not_found(self): + reg = ToolRegistry() + reg.register_tool(_make_tool("alpha")) + reg.unregister_tool("nonexistent") # must not raise + assert len(reg.get_tool_defs()) == 1 + + def test_removes_only_matching_tool(self): + reg = ToolRegistry() + for name in ("a", "b", "c"): + reg.register_tool(_make_tool(name)) + reg.unregister_tool("b") + names = [t.name for t in reg.get_tool_defs()] + assert names == ["a", "c"] + + def test_registry_empty_after_removing_last_tool(self): + reg = ToolRegistry() + reg.register_tool(_make_tool("only")) + reg.unregister_tool("only") + assert not reg # __bool__ returns False when empty + + def test_dump_excludes_unregistered_tool(self): + reg = ToolRegistry() + reg.register_tool(_make_tool("target")) + reg.unregister_tool("target") + assert reg.dump() == []