From af4856826f07e7a25ad702ff11da38dd2a5f1569 Mon Sep 17 00:00:00 2001 From: Max Abouchar Date: Wed, 6 May 2026 15:59:52 -0700 Subject: [PATCH 1/7] ignore agent files --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index fc7b86e0..e3c6cf75 100644 --- a/.gitignore +++ b/.gitignore @@ -201,6 +201,11 @@ cython_debug/ .cursorignore .cursorindexingignore +# personal AI agents +CLAUDE.md +.claude/* +AGENTS.md + # Marimo marimo/_static/ marimo/_lsp/ From daaa5e5bd518a757add1358614af0ade8d0990be Mon Sep 17 00:00:00 2001 From: Max Abouchar Date: Wed, 6 May 2026 16:00:39 -0700 Subject: [PATCH 2/7] add native ability to do web search --- anton/chat.py | 3 + anton/chat_session.py | 3 + anton/cli.py | 227 ++++++++++++++ anton/config/settings.py | 14 + anton/core/llm/anthropic.py | 56 +++- anton/core/llm/client.py | 37 +++ anton/core/llm/openai.py | 541 +++++++++++++++++++++++++++++++++- anton/core/llm/provider.py | 19 ++ anton/core/session.py | 44 +++ anton/core/tools/web_tools.py | 290 ++++++++++++++++++ 10 files changed, 1228 insertions(+), 6 deletions(-) create mode 100644 anton/core/tools/web_tools.py diff --git a/anton/chat.py b/anton/chat.py index e981000f..30c1d928 100644 --- a/anton/chat.py +++ b/anton/chat.py @@ -1115,6 +1115,7 @@ async def _chat_loop( session = ChatSession(ChatSessionConfig( llm_client=state["llm_client"], runtime_factory=get_runtime_factory(settings), + settings=settings, self_awareness=self_awareness, cortex=cortex, episodic=episodic, @@ -1128,6 +1129,8 @@ async def _chat_loop( session_id=current_session_id, proactive_dashboards=settings.proactive_dashboards, tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL], + web_search_enabled=settings.web_search_enabled, + web_fetch_enabled=settings.web_fetch_enabled, )) # Handle --resume flag at startup diff --git a/anton/chat_session.py b/anton/chat_session.py index c7daf543..e471cde4 100644 --- a/anton/chat_session.py +++ b/anton/chat_session.py @@ -105,6 +105,7 @@ def rebuild_session( return ChatSession(ChatSessionConfig( llm_client=state["llm_client"], runtime_factory=get_runtime_factory(settings), + settings=settings, self_awareness=self_awareness, cortex=cortex, episodic=episodic, @@ -117,4 +118,6 @@ def rebuild_session( history_store=history_store, session_id=session_id, proactive_dashboards=settings.proactive_dashboards, + web_search_enabled=settings.web_search_enabled, + web_fetch_enabled=settings.web_fetch_enabled, )) diff --git a/anton/cli.py b/anton/cli.py index 74b9cfc2..a09f1a04 100644 --- a/anton/cli.py +++ b/anton/cli.py @@ -1087,6 +1087,207 @@ def _test(): ws.set_secret("ANTON_PLANNING_MODEL", model) ws.set_secret("ANTON_CODING_MODEL", model) + # The custom endpoint is generic openai-compatible (i.e. NOT mdb.ai + # passthrough), so the LLM provider doesn't expose web_search natively. + # Offer to configure Exa or Brave so the agent has search available. + # Skip the prompt in non-interactive contexts (tests, CI) — the user can + # always run ``anton setup-search`` later. + if not _looks_like_mdb_ai(base_url, settings) and sys.stdout.isatty(): + console.print() + console.print( + " [anton.muted]Web search needs an external provider on this endpoint. " + "You can configure one now or run [bold]anton setup-search[/] later.[/]" + ) + try: + _setup_search_provider(settings, ws) + except _SetupRetry: + # User pressed ESC out of the search-provider step — that's fine, + # the LLM is already configured. They can rerun `anton setup-search`. + pass + + +def _looks_like_mdb_ai(base_url: str, settings) -> bool: + """Match the same condition LLMClient.from_settings uses for mdb.ai.""" + base = (base_url or "").rstrip("/").lower() + minds = (getattr(settings, "minds_url", None) or "").rstrip("/").lower() + if not minds: + return False + return base == minds or base == f"{minds}/api/v1" + + +def _setup_search_provider(settings, ws) -> None: + """Configure an external search provider (Exa.ai or Brave Search). + + Used by Case 3 in the web-tools design (generic OpenAI-compatible endpoints + that don't have a native ``web_search`` capability). The user picks a + provider and supplies a key; we validate the key with a probe call before + persisting it to the global ``~/.anton/.env`` so it survives across + sessions and workspaces — same scope as the LLM provider keys. + """ + console.print() + console.print("[anton.cyan]Search provider[/]") + console.print( + " [bold]1[/] [link=https://exa.ai][anton.cyan]Exa.ai[/][/link] " + "[anton.muted]AI-native semantic search[/]" + ) + console.print( + " [bold]2[/] [link=https://brave.com/search/api][anton.cyan]Brave Search[/][/link] " + "[anton.muted]privacy-focused web search[/]" + ) + console.print(" [bold]3[/] [anton.muted]Skip — disable web_search for now[/]") + console.print() + + from rich.prompt import Prompt + choice = Prompt.ask( + " Choose", + choices=["1", "2", "3"], + default="1", + console=console, + ) + + if choice == "3": + settings.external_search_provider = None + ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "") + console.print( + " [anton.muted]web_search will be unavailable until you run " + "[bold]anton setup-search[/].[/]" + ) + return + + if choice == "1": + _setup_exa(settings, ws) + else: + _setup_brave(settings, ws) + + +def _setup_exa(settings, ws) -> None: + """Collect and validate an Exa.ai API key.""" + console.print() + console.print( + " [anton.muted]Get an API key at " + "[link=https://dashboard.exa.ai/api-keys]" + "[anton.cyan]dashboard.exa.ai/api-keys[/][/link][/]" + ) + console.print() + + while True: + api_key = _setup_prompt("Exa API key", is_password=True) + if api_key.strip(): + break + console.print(" [anton.warning]Please enter your API key.[/]") + api_key = api_key.strip() + + try: + + def _test(): + # Sync httpx call — _validate_with_spinner runs us inside a Live. + import httpx as _httpx + + resp = _httpx.post( + "https://api.exa.ai/search", + headers={"Authorization": f"Bearer {api_key}"}, + json={"query": "anton ping", "num_results": 1}, + timeout=15.0, + ) + if resp.status_code in (401, 403): + raise PermissionError("Authentication failed. Check your API key.") + if resp.status_code >= 400: + raise RuntimeError(f"HTTP {resp.status_code}: {resp.text[:200]}") + + _validate_with_spinner(console, "Exa.ai", _test) + except PermissionError as exc: + console.print(f" [anton.error]{exc}[/]") + _handle_search_retry(settings, ws) + return + except Exception as exc: + if _is_transient_error(exc): + console.print(" [anton.warning]Search service is temporarily overloaded.[/]") + else: + console.print(f" [anton.error]Failed:[/] {exc}") + _handle_search_retry(settings, ws) + return + + settings.external_search_provider = "exa" + settings.exa_api_key = api_key + ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "exa") + ws.set_secret("ANTON_EXA_API_KEY", api_key) + console.print(" [anton.success]Exa.ai configured.[/]") + + +def _setup_brave(settings, ws) -> None: + """Collect and validate a Brave Search API key.""" + console.print() + console.print( + " [anton.muted]Get an API key at " + "[link=https://api.search.brave.com/app/keys]" + "[anton.cyan]api.search.brave.com/app/keys[/][/link][/]" + ) + console.print() + + while True: + api_key = _setup_prompt("Brave Search API key", is_password=True) + if api_key.strip(): + break + console.print(" [anton.warning]Please enter your API key.[/]") + api_key = api_key.strip() + + try: + + def _test(): + import httpx as _httpx + + resp = _httpx.get( + "https://api.search.brave.com/res/v1/web/search", + headers={ + "X-Subscription-Token": api_key, + "Accept": "application/json", + }, + params={"q": "anton ping", "count": 1}, + timeout=15.0, + ) + if resp.status_code in (401, 403): + raise PermissionError("Authentication failed. Check your API key.") + if resp.status_code >= 400: + raise RuntimeError(f"HTTP {resp.status_code}: {resp.text[:200]}") + + _validate_with_spinner(console, "Brave Search", _test) + except PermissionError as exc: + console.print(f" [anton.error]{exc}[/]") + _handle_search_retry(settings, ws) + return + except Exception as exc: + if _is_transient_error(exc): + console.print(" [anton.warning]Search service is temporarily overloaded.[/]") + else: + console.print(f" [anton.error]Failed:[/] {exc}") + _handle_search_retry(settings, ws) + return + + settings.external_search_provider = "brave" + settings.brave_api_key = api_key + ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "brave") + ws.set_secret("ANTON_BRAVE_API_KEY", api_key) + console.print(" [anton.success]Brave Search configured.[/]") + + +def _handle_search_retry(settings, ws) -> None: + """Retry / switch / skip after a search-provider validation failure.""" + from rich.prompt import Prompt + choice = Prompt.ask( + " Retry, switch provider, or skip?", + choices=["retry", "switch", "skip", "r", "s", "k"], + default="retry", + console=console, + ) + if choice in ("retry", "r"): + _setup_search_provider(settings, ws) + elif choice in ("switch", "s"): + # Re-show the picker so the user can pick the other provider. + _setup_search_provider(settings, ws) + else: + settings.external_search_provider = None + ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "") + @app.command("setup") def setup(ctx: typer.Context) -> None: @@ -1097,6 +1298,32 @@ def setup(ctx: typer.Context) -> None: console.print("[anton.success]Setup complete.[/]") +@app.command("setup-search") +def setup_search(ctx: typer.Context) -> None: + """Configure an external search provider (Exa.ai or Brave Search). + + Only used when the active LLM endpoint is a generic OpenAI-compatible + third-party (i.e. NOT Anthropic, OpenAI BYOK, or the mdb.ai passthrough — + those expose web_search natively on the LLM provider's key). The chosen + key is persisted to the global ``~/.anton/.env`` so it survives across + sessions and workspaces, exactly like LLM provider keys. + """ + from pathlib import Path + from anton.workspace import Workspace + + settings = _get_settings(ctx) + _ensure_workspace(settings) + # Search-provider keys live globally — same scope as LLM keys. + global_ws = Workspace(Path.home()) + try: + _setup_search_provider(settings, global_ws) + except _SetupRetry: + console.print(" [anton.muted]Cancelled.[/]") + return + global_ws.apply_env_to_process() + console.print("[anton.success]Search provider setup complete.[/]") + + @app.command("dashboard") def dashboard() -> None: """Show the Anton status dashboard.""" diff --git a/anton/config/settings.py b/anton/config/settings.py index acc8130b..fcfb66f9 100644 --- a/anton/config/settings.py +++ b/anton/config/settings.py @@ -37,6 +37,20 @@ class AntonSettings(CoreSettings): openai_base_url: str | None = None openai_api_version: str | None = None # Azure api-version query param + # Web tools — on by default. For LLM providers that ship native server-side + # web search/fetch (Anthropic, OpenAI, mdb.ai passthrough), the tools execute + # inside the provider on the user's existing key. For generic + # openai-compatible endpoints, web_search needs an external provider key + # (Exa or Brave); web_fetch always falls back to stdlib HTTP. + web_search_enabled: bool = True + web_fetch_enabled: bool = True + + # Case 3 fallback — only consulted when the LLM provider lacks native web + # search and the user is on a generic OpenAI-compatible endpoint. + external_search_provider: str | None = None # "exa" | "brave" | None + exa_api_key: str | None = None + brave_api_key: str | None = None + memory_enabled: bool = True memory_dir: str = ".anton" diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py index 6c6114d8..a80e41d4 100644 --- a/anton/core/llm/anthropic.py +++ b/anton/core/llm/anthropic.py @@ -21,10 +21,45 @@ compute_context_pressure, ) +# Native server-side web tool type strings exposed by the Anthropic Messages API. +# The model invokes these inside the provider — Anton's tool-dispatch loop never +# sees a tool_use for them; the model's final text content already incorporates +# the search/fetch results. Bump these constants when newer revisions ship. +ANTHROPIC_WEB_SEARCH_TOOL_TYPE = "web_search_20250305" +ANTHROPIC_WEB_FETCH_TOOL_TYPE = "web_fetch_20250910" +# web_fetch is gated behind a beta header; web_search is GA and needs no header. +ANTHROPIC_WEB_FETCH_BETA_HEADER = "web-fetch-2025-09-10" + + +def _build_native_web_tools( + native_web_tools: set[str] | None, +) -> tuple[list[dict], list[str]]: + """Translate the unified web-tool set into Anthropic server-tool entries. + + Returns ``(tool_entries, beta_headers)`` — entries to append to the + Messages API ``tools`` array, and any ``anthropic-beta`` header values that + must be set for the call. + """ + if not native_web_tools: + return [], [] + entries: list[dict] = [] + beta: list[str] = [] + if "web_search" in native_web_tools: + entries.append({"type": ANTHROPIC_WEB_SEARCH_TOOL_TYPE, "name": "web_search"}) + if "web_fetch" in native_web_tools: + entries.append({"type": ANTHROPIC_WEB_FETCH_TOOL_TYPE, "name": "web_fetch"}) + beta.append(ANTHROPIC_WEB_FETCH_BETA_HEADER) + return entries, beta + class AnthropicProvider(LLMProvider): name: str = "anthropic" + def native_web_tools(self) -> set[str]: + # Anthropic's Messages API ships both server-side web_search and + # web_fetch tools; we route both through the provider when enabled. + return {"web_search", "web_fetch"} + def __init__(self, api_key: str | None = None) -> None: self._api_key = api_key kwargs = {} @@ -44,17 +79,24 @@ async def complete( tools: list[dict] | None = None, tool_choice: dict | None = None, max_tokens: int = 4096, + native_web_tools: set[str] | None = None, ) -> LLMResponse: + web_entries, beta_headers = _build_native_web_tools(native_web_tools) + merged_tools = list(tools or []) + web_entries + kwargs: dict = { "model": model, "max_tokens": max_tokens, "system": system, "messages": messages, } - if tools: - kwargs["tools"] = tools + if merged_tools: + kwargs["tools"] = merged_tools if tool_choice: kwargs["tool_choice"] = tool_choice + if beta_headers: + # Anthropic accepts a comma-separated list of beta features. + kwargs["extra_headers"] = {"anthropic-beta": ",".join(beta_headers)} try: response = await self._client.messages.create(**kwargs) @@ -113,15 +155,21 @@ async def stream( messages: list[dict], tools: list[dict] | None = None, max_tokens: int = 4096, + native_web_tools: set[str] | None = None, ) -> AsyncIterator[StreamEvent]: + web_entries, beta_headers = _build_native_web_tools(native_web_tools) + merged_tools = list(tools or []) + web_entries + kwargs: dict = { "model": model, "max_tokens": max_tokens, "system": system, "messages": messages, } - if tools: - kwargs["tools"] = tools + if merged_tools: + kwargs["tools"] = merged_tools + if beta_headers: + kwargs["extra_headers"] = {"anthropic-beta": ",".join(beta_headers)} content_text = "" tool_calls: list[ToolCall] = [] diff --git a/anton/core/llm/client.py b/anton/core/llm/client.py index 92a50759..9d9ffb11 100644 --- a/anton/core/llm/client.py +++ b/anton/core/llm/client.py @@ -9,6 +9,29 @@ from anton.config.settings import AntonSettings +def _resolve_openai_compatible_flavor(settings: AntonSettings) -> str: + """Distinguish mdb.ai passthrough from a generic openai-compatible endpoint. + + The "Minds-Enterprise-Cloud" setup path writes ``openai_base_url = + f"{minds_url.rstrip('/')}/api/v1"`` and ``openai_api_key = minds_api_key`` + (see ``AntonSettings.model_post_init``). When that exact pairing matches + the user's current settings, the OpenAI provider is talking to mdb.ai and + can therefore use the chat.completions native web tool passthrough. Any + other base URL is a generic third-party endpoint that needs the + handler-dispatched fallback at the session layer. + + No new env var is introduced — we infer flavor purely from the existing + config the setup flow already produces. + """ + from .openai import OpenAIProvider + + base = (getattr(settings, "openai_base_url", None) or "").rstrip("/").lower() + minds = (getattr(settings, "minds_url", None) or "").rstrip("/").lower() + if minds and (base == minds or base == f"{minds}/api/v1"): + return OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH + return OpenAIProvider.FLAVOR_OPENAI_COMPATIBLE_GENERIC + + class LLMClient: def __init__( self, @@ -32,6 +55,7 @@ async def plan( messages: list[dict], tools: list[dict] | None = None, max_tokens: int | None = None, + native_web_tools: set[str] | None = None, ) -> LLMResponse: return await self._planning_provider.complete( model=self._planning_model, @@ -39,6 +63,7 @@ async def plan( messages=messages, tools=tools, max_tokens=max_tokens or self._max_tokens, + native_web_tools=native_web_tools, ) async def plan_stream( @@ -48,6 +73,7 @@ async def plan_stream( messages: list[dict], tools: list[dict] | None = None, max_tokens: int | None = None, + native_web_tools: set[str] | None = None, ) -> AsyncIterator[StreamEvent]: async for event in self._planning_provider.stream( model=self._planning_model, @@ -55,9 +81,15 @@ async def plan_stream( messages=messages, tools=tools, max_tokens=max_tokens or self._max_tokens, + native_web_tools=native_web_tools, ): yield event + @property + def planning_provider(self) -> LLMProvider: + """The LLM provider used for planning / the user-facing turn loop.""" + return self._planning_provider + @property def coding_provider(self) -> LLMProvider: """The LLM provider used for coding/skill execution.""" @@ -75,6 +107,7 @@ async def code( messages: list[dict], tools: list[dict] | None = None, max_tokens: int | None = None, + native_web_tools: set[str] | None = None, ) -> LLMResponse: return await self._coding_provider.complete( model=self._coding_model, @@ -82,6 +115,7 @@ async def code( messages=messages, tools=tools, max_tokens=max_tokens or self._max_tokens, + native_web_tools=native_web_tools, ) async def _generate_object_with( @@ -219,6 +253,7 @@ def from_settings(cls, settings: AntonSettings) -> LLMClient: from .openai import OpenAIProvider api_version = getattr(settings, "openai_api_version", None) + compatible_flavor = _resolve_openai_compatible_flavor(settings) providers = { "anthropic": lambda: AnthropicProvider(api_key=settings.anthropic_api_key), "openai": lambda: OpenAIProvider( @@ -226,6 +261,7 @@ def from_settings(cls, settings: AntonSettings) -> LLMClient: base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify, api_version=api_version, + flavor=OpenAIProvider.FLAVOR_OPENAI, ), "openai-compatible": lambda: OpenAIProvider( api_key=settings.openai_api_key, @@ -233,6 +269,7 @@ def from_settings(cls, settings: AntonSettings) -> LLMClient: ssl_verify=settings.minds_ssl_verify, api_version=api_version, supports_vision=False, + flavor=compatible_flavor, ), } diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py index af93ec72..bf49ffef 100644 --- a/anton/core/llm/openai.py +++ b/anton/core/llm/openai.py @@ -186,6 +186,220 @@ def _is_azure_endpoint(url: str | None) -> bool: return host.endswith(".openai.azure.com") or host.endswith(".cognitiveservices.azure.com") +# ───────────────────────────────────────────────────────────────────────────── +# Responses API translation +# +# Used only for ``flavor="openai"`` (BYOK direct OpenAI). The Responses API is +# OpenAI's recommended transport going forward, supports native server-side +# web_search, and has a different request/response shape than chat.completions +# (flat function-tool params, ``input``/``instructions`` instead of +# ``messages``/``system``, ``output`` array instead of ``choices``). +# ───────────────────────────────────────────────────────────────────────────── + + +def _translate_tools_to_responses(tools: list[dict]) -> list[dict]: + """Anthropic tool format -> OpenAI Responses API function-tool format. + + The Responses API uses a flat shape (``{"type": "function", "name": ..., + "description": ..., "parameters": ...}``) rather than the chat.completions + nested shape under a ``function`` key. + """ + result: list[dict] = [] + for tool in tools: + result.append( + { + "type": "function", + "name": tool["name"], + "description": tool.get("description", ""), + "parameters": tool.get("input_schema", {}), + } + ) + return result + + +def _translate_tool_choice_to_responses(tool_choice: dict) -> dict | str: + """Anthropic tool_choice -> OpenAI Responses API tool_choice.""" + tc_type = tool_choice.get("type") + if tc_type == "tool": + return {"type": "function", "name": tool_choice["name"]} + if tc_type == "any": + return "required" + if tc_type == "auto": + return "auto" + return "auto" + + +def _translate_messages_to_responses_input( + messages: list[dict], supports_vision: bool = True +) -> list[dict]: + """Convert Anthropic-style messages to Responses API ``input`` items. + + The Responses API accepts a list of items where: + + - User/assistant text messages → ``{"role": ..., "content": ..., "type": "message"}`` + - Assistant tool calls → ``{"type": "function_call", "call_id": ..., "name": ..., "arguments": ...}`` + - Tool results → ``{"type": "function_call_output", "call_id": ..., "output": ...}`` + + The system prompt is passed via the top-level ``instructions`` parameter + rather than as a message item, so it is *not* emitted here. + """ + items: list[dict] = [] + + for msg in messages: + role = msg["role"] + content = msg.get("content") + + # Plain string content — pass through as a simple message item. + if isinstance(content, str): + items.append({"role": role, "content": content, "type": "message"}) + continue + + if isinstance(content, list): + if role == "assistant": + items.extend(_translate_assistant_blocks_to_responses(content)) + elif role == "user": + items.extend( + _translate_user_blocks_to_responses( + content, supports_vision=supports_vision + ) + ) + else: + # Fallback: join text blocks + text = " ".join( + b.get("text", "") for b in content if b.get("type") == "text" + ) + items.append({"role": role, "content": text or "", "type": "message"}) + continue + + items.append( + {"role": role, "content": str(content) if content else "", "type": "message"} + ) + + return items + + +def _translate_assistant_blocks_to_responses(blocks: list[dict]) -> list[dict]: + """Convert assistant content blocks to Responses API input items. + + Tool-use blocks become ``function_call`` items; text blocks become a single + assistant message item. The ordering matters less here than in + chat.completions because each item is independent. + """ + text_parts: list[str] = [] + tool_calls: list[dict] = [] + + for block in blocks: + if block.get("type") == "text": + text_parts.append(block["text"]) + elif block.get("type") == "tool_use": + tool_calls.append( + { + "type": "function_call", + "call_id": block["id"], + "name": block["name"], + "arguments": json.dumps(block.get("input", {})), + } + ) + + items: list[dict] = [] + if text_parts: + items.append( + {"role": "assistant", "content": "\n".join(text_parts), "type": "message"} + ) + items.extend(tool_calls) + return items + + +def _translate_user_blocks_to_responses( + blocks: list[dict], supports_vision: bool = True +) -> list[dict]: + """Convert user content blocks (text, tool_result, image) to Responses API items.""" + result: list[dict] = [] + content_parts: list[dict] = [] + + for block in blocks: + if block.get("type") == "tool_result": + # Flush any accumulated content parts first as a user message. + if content_parts: + result.append(_user_message_from_parts(content_parts)) + content_parts = [] + tool_content = block.get("content", "") + if isinstance(tool_content, list): + tool_content = "\n".join( + b.get("text", "") for b in tool_content if b.get("type") == "text" + ) + result.append( + { + "type": "function_call_output", + "call_id": block["tool_use_id"], + "output": str(tool_content), + } + ) + elif block.get("type") == "text": + content_parts.append({"type": "input_text", "text": block.get("text", "")}) + elif block.get("type") == "image" and supports_vision: + source = block.get("source", {}) + if source.get("type") == "base64": + media_type = source.get("media_type", "image/png") + data = source.get("data", "") + content_parts.append( + { + "type": "input_image", + "image_url": f"data:{media_type};base64,{data}", + } + ) + + if content_parts: + result.append(_user_message_from_parts(content_parts)) + + return result + + +def _user_message_from_parts(parts: list[dict]) -> dict: + """Build a Responses API user message from accumulated content parts. + + If the message is text-only, flatten to a plain string for compatibility; + otherwise emit the structured content list (images + text). + """ + if all(p.get("type") == "input_text" for p in parts): + return { + "role": "user", + "content": "\n".join(p["text"] for p in parts), + "type": "message", + } + return {"role": "user", "content": parts, "type": "message"} + + +def _native_web_entries_for_flavor( + flavor: str, native_web_tools: set[str] | None +) -> list[dict]: + """Build the list of native server-tool entries to append to the tools array. + + - ``flavor="openai"`` (Responses API): ``{"type": "web_search"}`` covers + both search and fetch (per OpenAI docs, web_search handles fetch implicitly). + - ``flavor="minds-passthrough"`` (chat.completions): mdb.ai accepts + ``{"type": "web_search"}`` and ``{"type": "fetch"}`` directly in the + OpenAI-shaped tools array. + - ``flavor="openai-compatible-generic"``: never returns native entries — + these endpoints get the handler-dispatched fallback at the session layer. + """ + if not native_web_tools: + return [] + if flavor == "openai": + # Single Responses API tool covers search + fetch. + if "web_search" in native_web_tools or "web_fetch" in native_web_tools: + return [{"type": "web_search"}] + return [] + if flavor == "minds-passthrough": + entries: list[dict] = [] + if "web_search" in native_web_tools: + entries.append({"type": "web_search"}) + if "web_fetch" in native_web_tools: + entries.append({"type": "fetch"}) + return entries + return [] + + def build_chat_completion_kwargs( *, model: str, @@ -208,6 +422,13 @@ def build_chat_completion_kwargs( class OpenAIProvider(LLMProvider): name: str = "openai" + # Three flavors distinguish the transport + native-tool behavior. See + # ``_native_web_entries_for_flavor`` for the per-flavor tool injection rules, + # and the ``complete``/``stream`` methods for the per-flavor transport split. + FLAVOR_OPENAI = "openai" # Direct OpenAI BYOK — uses Responses API. + FLAVOR_MINDS_PASSTHROUGH = "minds-passthrough" # mdb.ai — chat.completions w/ native tools. + FLAVOR_OPENAI_COMPATIBLE_GENERIC = "openai-compatible-generic" # third-party. + def __init__( self, api_key: str | None = None, @@ -215,12 +436,14 @@ def __init__( ssl_verify: bool = True, api_version: str | None = None, supports_vision: bool = True, + flavor: str = FLAVOR_OPENAI_COMPATIBLE_GENERIC, ) -> None: self._api_key = api_key self._base_url = base_url self._ssl_verify = ssl_verify self._api_version = api_version self._supports_vision = supports_vision + self._flavor = flavor import httpx @@ -254,6 +477,16 @@ def export_connection_info(self) -> ProviderConnectionInfo: api_version=self._api_version, ) + def native_web_tools(self) -> set[str]: + # BYOK OpenAI exposes web_search via Responses API (which covers fetch + # implicitly). The mdb.ai passthrough accepts both web_search and fetch + # directly in the chat.completions tools array. Generic OpenAI-compatible + # endpoints have no native support — the session falls back to handler + # ToolDefs (Exa/Brave for search, stdlib HTTP for fetch). + if self._flavor in (self.FLAVOR_OPENAI, self.FLAVOR_MINDS_PASSTHROUGH): + return {"web_search", "web_fetch"} + return set() + async def complete( self, *, @@ -263,7 +496,19 @@ async def complete( tools: list[dict] | None = None, tool_choice: dict | None = None, max_tokens: int = 4096, + native_web_tools: set[str] | None = None, ) -> LLMResponse: + if self._flavor == self.FLAVOR_OPENAI: + return await self._complete_via_responses( + model=model, + system=system, + messages=messages, + tools=tools, + tool_choice=tool_choice, + max_tokens=max_tokens, + native_web_tools=native_web_tools, + ) + oai_messages = _translate_messages(system, messages, supports_vision=self._supports_vision) kwargs = build_chat_completion_kwargs( @@ -271,8 +516,14 @@ async def complete( messages=oai_messages, max_tokens=max_tokens, ) + merged_tools: list[dict] = [] if tools: - kwargs["tools"] = _translate_tools(tools) + merged_tools.extend(_translate_tools(tools)) + # Native server-tool entries (mdb.ai passthrough) are appended *raw* so + # they aren't routed through the function-shape translation. + merged_tools.extend(_native_web_entries_for_flavor(self._flavor, native_web_tools)) + if merged_tools: + kwargs["tools"] = merged_tools if tool_choice: kwargs["tool_choice"] = _translate_tool_choice(tool_choice) @@ -341,7 +592,20 @@ async def stream( messages: list[dict], tools: list[dict] | None = None, max_tokens: int = 4096, + native_web_tools: set[str] | None = None, ) -> AsyncIterator[StreamEvent]: + if self._flavor == self.FLAVOR_OPENAI: + async for event in self._stream_via_responses( + model=model, + system=system, + messages=messages, + tools=tools, + max_tokens=max_tokens, + native_web_tools=native_web_tools, + ): + yield event + return + oai_messages = _translate_messages(system, messages, supports_vision=self._supports_vision) kwargs = build_chat_completion_kwargs( @@ -350,8 +614,12 @@ async def stream( max_tokens=max_tokens, stream=True, ) + merged_tools: list[dict] = [] if tools: - kwargs["tools"] = _translate_tools(tools) + merged_tools.extend(_translate_tools(tools)) + merged_tools.extend(_native_web_entries_for_flavor(self._flavor, native_web_tools)) + if merged_tools: + kwargs["tools"] = merged_tools content_text = "" tool_calls: list[ToolCall] = [] @@ -461,3 +729,272 @@ async def stream( stop_reason=stop_reason, ) ) + + # ───────────────────────────────────────────────────────────────────── + # Responses API path — used for ``flavor="openai"`` (BYOK direct OpenAI) + # ───────────────────────────────────────────────────────────────────── + + def _build_responses_kwargs( + self, + *, + model: str, + system: str, + messages: list[dict], + tools: list[dict] | None, + tool_choice: dict | None, + max_tokens: int, + native_web_tools: set[str] | None, + ) -> dict: + """Common Responses API kwargs for both ``complete`` and ``stream``.""" + responses_input = _translate_messages_to_responses_input( + messages, supports_vision=self._supports_vision + ) + kwargs: dict = { + "model": model, + "input": responses_input, + "max_output_tokens": max_tokens, + } + if system: + kwargs["instructions"] = system + + merged_tools: list[dict] = [] + if tools: + merged_tools.extend(_translate_tools_to_responses(tools)) + merged_tools.extend(_native_web_entries_for_flavor(self._flavor, native_web_tools)) + if merged_tools: + kwargs["tools"] = merged_tools + if tool_choice: + kwargs["tool_choice"] = _translate_tool_choice_to_responses(tool_choice) + return kwargs + + async def _complete_via_responses( + self, + *, + model: str, + system: str, + messages: list[dict], + tools: list[dict] | None, + tool_choice: dict | None, + max_tokens: int, + native_web_tools: set[str] | None, + ) -> LLMResponse: + kwargs = self._build_responses_kwargs( + model=model, + system=system, + messages=messages, + tools=tools, + tool_choice=tool_choice, + max_tokens=max_tokens, + native_web_tools=native_web_tools, + ) + + try: + response = await self._client.responses.create(**kwargs) + except openai.BadRequestError as exc: + msg = str(exc).lower() + if "context_length_exceeded" in msg or "maximum context length" in msg: + raise ContextOverflowError(str(exc)) from exc + raise + except openai.APIStatusError as exc: + if ( + exc.status_code == 429 + and isinstance(exc.body, dict) + and exc.body.get("detail") + ): + msg = f"Server returned 429 — {exc.body['detail']}" + msg += " Visit https://mdb.ai to upgrade or to top up your tokens." + from .provider import TokenLimitExceeded + + raise TokenLimitExceeded(msg) from exc + else: + msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment." + raise ConnectionError(msg) from exc + except openai.APIConnectionError as exc: + raise ConnectionError( + "Could not reach the LLM server — check your connection or try again in a moment." + ) from exc + + return _parse_response_object(response, model) + + async def _stream_via_responses( + self, + *, + model: str, + system: str, + messages: list[dict], + tools: list[dict] | None, + max_tokens: int, + native_web_tools: set[str] | None, + ) -> AsyncIterator[StreamEvent]: + kwargs = self._build_responses_kwargs( + model=model, + system=system, + messages=messages, + tools=tools, + tool_choice=None, # streaming path does not force tool_choice today + max_tokens=max_tokens, + native_web_tools=native_web_tools, + ) + kwargs["stream"] = True + + content_text = "" + tool_calls: list[ToolCall] = [] + input_tokens = 0 + output_tokens = 0 + stop_reason: str | None = None + + # Map output_index → in-flight function-call state. Responses API uses + # a per-output_index stable handle for streaming arguments. + fc_state: dict[int, dict] = {} + + try: + stream = await self._client.responses.create(**kwargs) + async for event in stream: + etype = getattr(event, "type", "") + + # Text deltas + if etype == "response.output_text.delta": + delta = getattr(event, "delta", "") + if delta: + content_text += delta + yield StreamTextDelta(text=delta) + + # New output item (could be a function_call, server-tool call, + # or message). We only need to react when a function_call + # appears so we can emit the StreamToolUseStart with id+name. + elif etype == "response.output_item.added": + item = getattr(event, "item", None) + item_type = getattr(item, "type", None) + if item_type == "function_call": + idx = event.output_index + call_id = getattr(item, "call_id", "") or getattr(item, "id", "") + name = getattr(item, "name", "") or "" + fc_state[idx] = {"call_id": call_id, "name": name, "args_parts": []} + if call_id and name: + yield StreamToolUseStart(id=call_id, name=name) + + # Function-call argument deltas + elif etype == "response.function_call_arguments.delta": + idx = event.output_index + delta = getattr(event, "delta", "") + info = fc_state.get(idx) + if info is None: + # output_item.added didn't surface this call yet — buffer + info = {"call_id": "", "name": "", "args_parts": []} + fc_state[idx] = info + info["args_parts"].append(delta) + if info["call_id"]: + yield StreamToolUseDelta(id=info["call_id"], json_delta=delta) + + # Function-call arguments complete — finalize this call. + elif etype == "response.function_call_arguments.done": + idx = event.output_index + info = fc_state.get(idx) + if info is None: + continue + raw_json = "".join(info["args_parts"]) or getattr( + event, "arguments", "" + ) + parsed = json.loads(raw_json) if raw_json else {} + tool_calls.append( + ToolCall( + id=info["call_id"], name=info["name"], input=parsed + ) + ) + if info["call_id"]: + yield StreamToolUseEnd(id=info["call_id"]) + + # Final completion event carries the resolved Response object + # with usage/stop_reason. We trust the structured parse here in + # case the streamed deltas missed something (e.g. server-tool + # calls produce text we already streamed but no function call). + elif etype == "response.completed": + final_response = getattr(event, "response", None) + if final_response is not None: + usage = getattr(final_response, "usage", None) + if usage is not None: + input_tokens = getattr(usage, "input_tokens", 0) or 0 + output_tokens = getattr(usage, "output_tokens", 0) or 0 + stop_reason = getattr(final_response, "status", None) + except openai.BadRequestError as exc: + msg = str(exc).lower() + if "context_length_exceeded" in msg or "maximum context length" in msg: + raise ContextOverflowError(str(exc)) from exc + raise + except openai.APIStatusError as exc: + if ( + exc.status_code == 429 + and isinstance(exc.body, dict) + and exc.body.get("detail") + ): + msg = f"Server returned 429 — {exc.body['detail']}" + msg += " Visit https://mdb.ai to upgrade or top up your tokens." + from .provider import TokenLimitExceeded + + raise TokenLimitExceeded(msg) from exc + else: + msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment." + raise ConnectionError(msg) from exc + except openai.APIConnectionError as exc: + raise ConnectionError( + "Could not reach the LLM server — check your connection or try again in a moment." + ) from exc + + yield StreamComplete( + response=LLMResponse( + content=content_text, + tool_calls=tool_calls, + usage=Usage( + input_tokens=input_tokens, + output_tokens=output_tokens, + context_pressure=compute_context_pressure(model, input_tokens), + ), + stop_reason=stop_reason, + ) + ) + + +def _parse_response_object(response, model: str) -> LLMResponse: + """Convert a Responses API ``Response`` object into our unified ``LLMResponse``. + + The response contains an ``output`` array of items: messages (with + ``output_text`` content blocks), function calls (with ``call_id``, + ``name``, ``arguments``), and server-tool calls (web_search etc.) which we + intentionally drop because their effects are already incorporated into the + model's text content. + """ + content_text = "" + tool_calls: list[ToolCall] = [] + + for item in response.output or []: + item_type = getattr(item, "type", "") + if item_type == "message": + for content_block in getattr(item, "content", []) or []: + if getattr(content_block, "type", "") == "output_text": + content_text += getattr(content_block, "text", "") or "" + elif item_type == "function_call": + call_id = getattr(item, "call_id", "") or getattr(item, "id", "") + name = getattr(item, "name", "") or "" + args_str = getattr(item, "arguments", "") or "" + try: + parsed = json.loads(args_str) if args_str else {} + except json.JSONDecodeError: + parsed = {} + tool_calls.append(ToolCall(id=call_id, name=name, input=parsed)) + # Other item types (web_search_call, reasoning, etc.) are skipped — + # the model's output_text already incorporates their effects. + + usage = getattr(response, "usage", None) + input_tokens = getattr(usage, "input_tokens", 0) if usage else 0 + output_tokens = getattr(usage, "output_tokens", 0) if usage else 0 + + return LLMResponse( + content=content_text, + tool_calls=tool_calls, + usage=Usage( + input_tokens=input_tokens, + output_tokens=output_tokens, + context_pressure=compute_context_pressure(model, input_tokens), + ), + stop_reason=getattr(response, "status", None), + ) diff --git a/anton/core/llm/provider.py b/anton/core/llm/provider.py index 180fc445..f49c8906 100644 --- a/anton/core/llm/provider.py +++ b/anton/core/llm/provider.py @@ -153,6 +153,22 @@ class LLMProvider(ABC): # Human-readable provider id (e.g. "anthropic", "openai-compatible"). name: str = "" + def native_web_tools(self) -> set[str]: + """Subset of {"web_search", "web_fetch"} this provider executes server-side. + + When a tool is declared here, the provider is responsible for translating + the capability into its own native tool spec (e.g. Anthropic's + ``web_search_*`` server-tool, OpenAI's Responses API ``web_search``, + mdb.ai's ``{"type": "web_search"}`` passthrough). Server-side execution + means the model's response already incorporates the search/fetch + results — Anton's tool-dispatch loop never sees a ``tool_use`` for + these names. + + Providers without native support return an empty set, and the session + falls back to handler-dispatched ``ToolDef``s for any enabled web tools. + """ + return set() + @abstractmethod async def complete( self, @@ -163,6 +179,7 @@ async def complete( tools: list[dict] | None = None, tool_choice: dict | None = None, max_tokens: int = 4096, + native_web_tools: set[str] | None = None, ) -> LLMResponse: ... def export_connection_info(self) -> ProviderConnectionInfo: @@ -181,6 +198,7 @@ async def stream( messages: list[dict], tools: list[dict] | None = None, max_tokens: int = 4096, + native_web_tools: set[str] | None = None, ) -> AsyncIterator[StreamEvent]: """Stream LLM responses. Default falls back to complete().""" response = await self.complete( @@ -189,6 +207,7 @@ async def stream( messages=messages, tools=tools, max_tokens=max_tokens, + native_web_tools=native_web_tools, ) if response.content: yield StreamTextDelta(text=response.content) diff --git a/anton/core/session.py b/anton/core/session.py index 49192d98..7cb597d8 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -79,6 +79,12 @@ class ChatSessionConfig: session_id: str | None = None proactive_dashboards: bool = False tools: list[ToolDef] = field(default_factory=list) + # Web tools — on by default. Each is independently resolved at session + # construction into either a native provider capability (passed to the LLM + # via ``native_web_tools``) or a handler-dispatched fallback ToolDef + # (registered on the tool registry). See ChatSession.__init__. + web_search_enabled: bool = True + web_fetch_enabled: bool = True class ChatSession: @@ -86,6 +92,11 @@ class ChatSession: def __init__(self, config: ChatSessionConfig) -> None: s = config.settings or CoreSettings() + # Stash the full settings object (may be AntonSettings, CoreSettings, + # or None). Tool handlers read host-only fields like + # ``external_search_provider`` / ``exa_api_key`` via getattr so the + # session stays decoupled from the host's settings shape. + self._settings = config.settings self._max_tool_rounds = s.max_tool_rounds self._max_continuations = s.max_continuations self._context_pressure_threshold = s.context_pressure_threshold @@ -157,6 +168,20 @@ def __init__(self, config: ChatSessionConfig) -> None: # the post-recovery response still reports high pressure. self._compacted_this_turn = False + # Resolve web tool routing once per session. ``_native_web_tools`` is + # the set the planning provider will execute server-side (passed + # through every ``plan*`` call); ``_fallback_web_tools`` is the set + # we run ourselves via handler-dispatched ToolDefs (registered in + # ``_build_core_tools``). The two sets are disjoint by construction. + desired_web: set[str] = set() + if config.web_search_enabled: + desired_web.add("web_search") + if config.web_fetch_enabled: + desired_web.add("web_fetch") + provider_native = self._llm.planning_provider.native_web_tools() + self._native_web_tools: set[str] = desired_web & provider_native + self._fallback_web_tools: set[str] = desired_web - provider_native + @property def history(self) -> list[dict]: return self._history @@ -411,6 +436,19 @@ def _build_core_tools(self) -> None: # Procedural memory retrieval — always available, no-op if no skills. self.tool_registry.register_tool(RECALL_SKILL_TOOL) + # Handler-dispatched web tools — registered only when the LLM provider + # does NOT execute them natively. On Anthropic / OpenAI BYOK / mdb.ai + # passthrough, ``_fallback_web_tools`` is empty and these tools never + # appear in the registry; the model uses the provider's server-side + # web tools instead and Anton's dispatch loop never sees a ``tool_use`` + # for them. See ``anton/core/tools/web_tools.py`` for the handlers. + if "web_search" in self._fallback_web_tools: + from anton.core.tools.web_tools import WEB_SEARCH_FALLBACK_TOOL + self.tool_registry.register_tool(WEB_SEARCH_FALLBACK_TOOL) + if "web_fetch" in self._fallback_web_tools: + from anton.core.tools.web_tools import WEB_FETCH_FALLBACK_TOOL + self.tool_registry.register_tool(WEB_FETCH_FALLBACK_TOOL) + async def close(self) -> None: """Clean up scratchpads and other resources.""" await self._scratchpads.close_all() @@ -599,6 +637,10 @@ async def plan_with_recovery( kwargs["tools"] = tools if max_tokens is not None: kwargs["max_tokens"] = max_tokens + # Native web tools are a per-session capability — forward to every + # planning call automatically so callers don't have to remember. + if self._native_web_tools: + kwargs["native_web_tools"] = self._native_web_tools try: return await self._llm.plan(messages=factory(), **kwargs) @@ -638,6 +680,8 @@ async def plan_stream_with_recovery( kwargs["tools"] = tools if max_tokens is not None: kwargs["max_tokens"] = max_tokens + if self._native_web_tools: + kwargs["native_web_tools"] = self._native_web_tools try: async for event in self._llm.plan_stream(messages=factory(), **kwargs): diff --git a/anton/core/tools/web_tools.py b/anton/core/tools/web_tools.py new file mode 100644 index 00000000..0b102942 --- /dev/null +++ b/anton/core/tools/web_tools.py @@ -0,0 +1,290 @@ +"""Handler-dispatched fallbacks for ``web_search`` / ``web_fetch``. + +These tools are registered on the session's ``ToolRegistry`` only when the +active LLM provider does *not* expose the equivalent capability natively +(see ``LLMProvider.native_web_tools()``). On Anthropic BYOK, OpenAI BYOK, and +the mdb.ai passthrough the model uses the provider's server-side tools and +this module is dormant. + +For generic OpenAI-compatible third-party endpoints (Case 3 in the design): + +- ``web_search`` is dispatched to Exa.ai or Brave Search using a key the user + configured via ``anton setup search``. Without a configured key the handler + returns a clear error message pointing at that command. +- ``web_fetch`` always works — it is a stdlib-style HTTP GET (via httpx, which + Anton already depends on transitively through the LLM SDKs) plus a + lightweight HTML→text stripper, so it does not need a third-party key. + +Future enhancement (intentionally deferred from v1): when +``external_search_provider == "exa"`` and ``exa_api_key`` is set, ``web_fetch`` +could route through Exa's ``/contents`` endpoint instead of stdlib HTTP for +higher-quality extraction (handles paywalls, JS-rendered nav, ad/boilerplate +stripping). Held back for now to keep behavior uniform across Exa, Brave, and +unconfigured users — the swap is local to ``handle_web_fetch_fallback``. +""" + +from __future__ import annotations + +import html +from html.parser import HTMLParser +from typing import TYPE_CHECKING, Any + +import httpx + +from anton.core.tools.tool_defs import ToolDef + +if TYPE_CHECKING: + from anton.core.session import ChatSession + + +# ───────────────────────────────────────────────────────────────────────────── +# External search provider adapters +# ───────────────────────────────────────────────────────────────────────────── + +EXA_SEARCH_ENDPOINT = "https://api.exa.ai/search" +BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search" + +_HTTP_TIMEOUT = 30.0 + + +async def _search_exa(query: str, api_key: str, max_results: int) -> str: + """Hit Exa's ``/search`` endpoint and format hits as markdown.""" + payload: dict[str, Any] = { + "query": query, + "num_results": max_results, + # Include a short excerpt with each result so the model can answer + # many questions without a follow-up fetch round-trip. + "contents": {"text": {"max_characters": 600}}, + } + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + async with httpx.AsyncClient(timeout=_HTTP_TIMEOUT) as client: + resp = await client.post(EXA_SEARCH_ENDPOINT, json=payload, headers=headers) + if resp.status_code != 200: + return f"Exa search failed ({resp.status_code}): {resp.text[:500]}" + data = resp.json() + + results = data.get("results") or [] + if not results: + return f"No results for query: {query!r}" + lines = [f"Web search results for: {query!r} (Exa, {len(results)} hits)\n"] + for i, r in enumerate(results, 1): + title = r.get("title") or r.get("url") or "(untitled)" + url = r.get("url") or "" + snippet = (r.get("text") or "").strip() + if len(snippet) > 600: + snippet = snippet[:600] + "…" + lines.append(f"{i}. **{title}**\n {url}") + if snippet: + lines.append(f" {snippet}") + return "\n".join(lines) + + +async def _search_brave(query: str, api_key: str, max_results: int) -> str: + """Hit Brave Search's web endpoint and format hits as markdown.""" + headers = { + "X-Subscription-Token": api_key, + "Accept": "application/json", + } + params = {"q": query, "count": max_results} + async with httpx.AsyncClient(timeout=_HTTP_TIMEOUT) as client: + resp = await client.get(BRAVE_SEARCH_ENDPOINT, headers=headers, params=params) + if resp.status_code != 200: + return f"Brave search failed ({resp.status_code}): {resp.text[:500]}" + data = resp.json() + + web = (data.get("web") or {}).get("results") or [] + if not web: + return f"No results for query: {query!r}" + lines = [f"Web search results for: {query!r} (Brave, {len(web)} hits)\n"] + for i, r in enumerate(web, 1): + title = r.get("title") or r.get("url") or "(untitled)" + url = r.get("url") or "" + snippet = (r.get("description") or "").strip() + lines.append(f"{i}. **{title}**\n {url}") + if snippet: + lines.append(f" {snippet}") + return "\n".join(lines) + + +# ───────────────────────────────────────────────────────────────────────────── +# Stdlib HTTP fetch + lightweight HTML extraction +# ───────────────────────────────────────────────────────────────────────────── + + +class _TextExtractor(HTMLParser): + """Tiny stdlib-only HTML→text converter. + + Skips ``script``/``style``/``noscript`` content, decodes character refs, + and normalizes whitespace. Good enough for the model to read article-style + pages; will produce noisy output for heavily JS-driven SPAs (acceptable + for v1 — the future Exa ``/contents`` enhancement covers that case). + """ + + _SKIP_TAGS = {"script", "style", "noscript", "head"} + + def __init__(self) -> None: + super().__init__(convert_charrefs=True) + self._chunks: list[str] = [] + self._skip_depth = 0 + + def handle_starttag(self, tag: str, attrs: list) -> None: + if tag in self._SKIP_TAGS: + self._skip_depth += 1 + + def handle_endtag(self, tag: str) -> None: + if tag in self._SKIP_TAGS and self._skip_depth > 0: + self._skip_depth -= 1 + # Block-level tags get an implicit newline so paragraphs don't smush. + if tag in ("p", "br", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6", "tr"): + self._chunks.append("\n") + + def handle_data(self, data: str) -> None: + if self._skip_depth == 0: + self._chunks.append(data) + + def text(self) -> str: + raw = "".join(self._chunks) + # Collapse runs of whitespace; preserve paragraph breaks. + lines = [line.strip() for line in raw.splitlines()] + return "\n".join(line for line in lines if line) + + +def _strip_html(body: str) -> str: + parser = _TextExtractor() + try: + parser.feed(body) + except Exception: + # Bail out to a minimal "decode entities" fallback if the parser barfs. + return html.unescape(body) + return parser.text() + + +async def _fetch_url(url: str, max_chars: int) -> str: + """GET a URL and return its text content, truncated to ``max_chars``.""" + try: + async with httpx.AsyncClient( + timeout=_HTTP_TIMEOUT, follow_redirects=True + ) as client: + resp = await client.get(url, headers={"User-Agent": "AntonBot/1.0"}) + except httpx.TimeoutException: + return f"Fetch timed out after {_HTTP_TIMEOUT}s for {url}" + except httpx.HTTPError as exc: + return f"Fetch failed for {url}: {exc}" + + if resp.status_code >= 400: + return f"Fetch returned HTTP {resp.status_code} for {url}" + + content_type = (resp.headers.get("content-type") or "").lower() + body = resp.text + + if "html" in content_type or body.lstrip().startswith("<"): + text = _strip_html(body) + else: + text = body + + truncated = False + if len(text) > max_chars: + text = text[:max_chars] + truncated = True + + header = f"Fetched {url} (HTTP {resp.status_code}, {len(resp.content)} bytes)" + suffix = "\n... [truncated]" if truncated else "" + return f"{header}\n\n{text}{suffix}" + + +# ───────────────────────────────────────────────────────────────────────────── +# Handlers + ToolDefs +# ───────────────────────────────────────────────────────────────────────────── + + +_NO_PROVIDER_MSG = ( + "No search provider configured for this LLM endpoint. " + "Run `anton setup search` to configure Exa.ai or Brave Search." +) + + +async def handle_web_search_fallback(session: "ChatSession", tc_input: dict) -> str: + query = (tc_input.get("query") or "").strip() + if not query: + return "web_search requires a non-empty `query`." + max_results = int(tc_input.get("max_results") or 5) + max_results = max(1, min(max_results, 20)) + + settings = session._settings + provider = (getattr(settings, "external_search_provider", None) or "").lower() + + if provider == "exa": + key = getattr(settings, "exa_api_key", None) + if not key: + return _NO_PROVIDER_MSG + return await _search_exa(query, key, max_results) + if provider == "brave": + key = getattr(settings, "brave_api_key", None) + if not key: + return _NO_PROVIDER_MSG + return await _search_brave(query, key, max_results) + + return _NO_PROVIDER_MSG + + +async def handle_web_fetch_fallback(session: "ChatSession", tc_input: dict) -> str: + del session # unused — fetch needs no settings + url = (tc_input.get("url") or "").strip() + if not url: + return "web_fetch requires a `url`." + if not (url.startswith("http://") or url.startswith("https://")): + return f"web_fetch only supports http(s) URLs; got: {url!r}" + max_chars = int(tc_input.get("max_chars") or 20000) + max_chars = max(500, min(max_chars, 200_000)) + return await _fetch_url(url, max_chars) + + +WEB_SEARCH_FALLBACK_TOOL = ToolDef( + name="web_search", + description=( + "Search the web for up-to-date information. Returns a ranked list of " + "results with title, URL, and a short excerpt. Use this when you need " + "facts that may have changed recently, breaking news, or to discover " + "URLs to fetch in detail. Backed by Exa.ai or Brave Search." + ), + input_schema={ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query.", + }, + "max_results": { + "type": "integer", + "description": "Maximum results to return (1-20, default 5).", + }, + }, + "required": ["query"], + }, + handler=handle_web_search_fallback, +) + + +WEB_FETCH_FALLBACK_TOOL = ToolDef( + name="web_fetch", + description=( + "Fetch a URL and return its text content. Strips HTML markup; works " + "best on article-style pages. Use this after web_search when you need " + "the full body of a result, or directly when the user provides a URL." + ), + input_schema={ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "Absolute http(s) URL to fetch.", + }, + "max_chars": { + "type": "integer", + "description": "Maximum characters to return (default 20000, max 200000).", + }, + }, + "required": ["url"], + }, + handler=handle_web_fetch_fallback, +) From 8ece20818771d3d1da840440748fcf886e78a429 Mon Sep 17 00:00:00 2001 From: Max Abouchar Date: Wed, 6 May 2026 16:00:57 -0700 Subject: [PATCH 3/7] document web search capability --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index fab0ea84..7bfba0fc 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,26 @@ Anton doesn't wait for someone to build a connector. It writes the integration c - **Credential vault** - prevents secrets from being exposed to LLMs. - **Isolated code execution** - protected, reproducible "show your work" environment. - **Multi-layer memory & continuous learning** - session, semantic and long-term knowledge. Anton remembers what it learned and gets better at your specific workflows over time. +- **Web search & fetch** - the agent can query the live web and retrieve URL contents. Routed natively through your LLM provider when possible (no extra setup), with a transparent fallback for third-party endpoints. See below. + +--- + +## Web search & fetch + +Anton exposes two web tools to the agent — `web_search` and `web_fetch` — both on by default. How they execute depends on your LLM provider: + +| Provider | `web_search` | `web_fetch` | Setup | +| --- | --- | --- | --- | +| Anthropic BYOK | Anthropic native server tool | Anthropic native server tool | None — billed on your Anthropic key | +| OpenAI BYOK | OpenAI Responses API native | covered by `web_search` | None — billed on your OpenAI key | +| Minds-Enterprise-Cloud (mdb.ai) | mdb.ai passthrough | mdb.ai passthrough | None — billed on your Minds key | +| Generic OpenAI-compatible (Together, Groq, Ollama, vLLM, …) | Exa.ai or Brave (you choose at setup) | stdlib HTTP GET (no key) | Run `anton setup-search` once | + +For the first three rows there's nothing to configure — the LLM provider executes the tools server-side and the results are folded directly into its response. For the fourth row, after `anton setup` finishes configuring a custom OpenAI-compatible endpoint Anton will offer to set up Exa or Brave; you can also (re)run that step at any time with `anton setup-search`. The chosen search-provider key is persisted to `~/.anton/.env` so it carries across sessions and workspaces, exactly like your LLM key. + +To opt out, set `ANTON_WEB_SEARCH_ENABLED=false` and/or `ANTON_WEB_FETCH_ENABLED=false`. + +Caveats: provider rate limits apply; `web_fetch` has a 30-second timeout and strips HTML to plain text (works best on article-style pages); paywalled and JS-heavy SPAs may return little useful content; treat fetched page bodies as untrusted input. --- From f0c7f306d6d659333c539de0f94da557934c312e Mon Sep 17 00:00:00 2001 From: Max Abouchar Date: Wed, 6 May 2026 16:01:27 -0700 Subject: [PATCH 4/7] tests including e2e on using web tool providers --- tests/conftest.py | 12 +- tests/test_datasource.py | 2 + tests/test_openai_provider.py | 285 ++++++++++++++++ tests/test_provider.py | 117 +++++++ tests/test_tools.py | 2 + tests/test_web_tools.py | 432 ++++++++++++++++++++++++ tests/test_web_tools_live.py | 609 ++++++++++++++++++++++++++++++++++ 7 files changed, 1456 insertions(+), 3 deletions(-) create mode 100644 tests/test_web_tools.py create mode 100644 tests/test_web_tools_live.py diff --git a/tests/conftest.py b/tests/conftest.py index 9fa2287e..0557730c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,9 +11,11 @@ def make_mock_llm() -> AsyncMock: """Return an AsyncMock LLM client with coding_provider configured for sync use. ``AsyncMock`` makes all child attributes ``AsyncMock`` too, which means - ``coding_provider.export_connection_info()`` would return a coroutine — - but ``ChatSession.__init__`` calls it synchronously. This helper fixes - that by explicitly wiring ``coding_provider`` with a plain ``MagicMock``. + methods we call synchronously on the provider would otherwise return + coroutines. This helper fixes that for both providers — ``coding_provider`` + (whose ``export_connection_info()`` is read in ``ChatSession.__init__``) and + ``planning_provider`` (whose ``native_web_tools()`` is read in the same + constructor to resolve the per-session web tool routing). """ mock = AsyncMock() mock.coding_provider = MagicMock() @@ -21,6 +23,10 @@ def make_mock_llm() -> AsyncMock: return_value=ProviderConnectionInfo(provider="anthropic", api_key="test") ) mock.coding_model = "claude-sonnet-4-6" + mock.planning_provider = MagicMock() + # Default test posture: no native web tools — fallback tools also off + # unless a specific test configures otherwise via ChatSessionConfig. + mock.planning_provider.native_web_tools = MagicMock(return_value=set()) return mock diff --git a/tests/test_datasource.py b/tests/test_datasource.py index 6dafd9eb..fb58bf9a 100644 --- a/tests/test_datasource.py +++ b/tests/test_datasource.py @@ -180,6 +180,8 @@ async def _default_generate_object(schema_class, **kwargs): return_value=ProviderConnectionInfo(provider="anthropic", api_key="test") ) mock_llm.coding_model = "claude-sonnet-4-6" + mock_llm.planning_provider = MagicMock() + mock_llm.planning_provider.native_web_tools = MagicMock(return_value=set()) session = ChatSession(ChatSessionConfig(llm_client=mock_llm)) session._scratchpads = AsyncMock() return session diff --git a/tests/test_openai_provider.py b/tests/test_openai_provider.py index 11d972a0..95014553 100644 --- a/tests/test_openai_provider.py +++ b/tests/test_openai_provider.py @@ -319,3 +319,288 @@ async def test_azure_provider_complete_calls_chat_completions(self): assert result.usage.input_tokens == 8 assert result.usage.output_tokens == 12 mock_azure_client.chat.completions.create.assert_awaited_once() + + +# ───────────────────────────────────────────────────────────────────────────── +# Flavor split — minds-passthrough native tools, Responses API for BYOK OpenAI +# ───────────────────────────────────────────────────────────────────────────── + + +class TestNativeWebToolsByFlavor: + def test_generic_flavor_advertises_no_native_tools(self): + with patch("anton.core.llm.openai.openai"): + provider = OpenAIProvider( + api_key="k", + flavor=OpenAIProvider.FLAVOR_OPENAI_COMPATIBLE_GENERIC, + ) + assert provider.native_web_tools() == set() + + def test_minds_passthrough_advertises_search_and_fetch(self): + with patch("anton.core.llm.openai.openai"): + provider = OpenAIProvider( + api_key="k", flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH + ) + assert provider.native_web_tools() == {"web_search", "web_fetch"} + + def test_openai_flavor_advertises_search_and_fetch(self): + with patch("anton.core.llm.openai.openai"): + provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI) + assert provider.native_web_tools() == {"web_search", "web_fetch"} + + +class TestMindsPassthroughTools: + """The mdb.ai passthrough must accept ``{"type": "web_search"}`` / + ``{"type": "fetch"}`` raw — they cannot be routed through + ``_translate_tools`` because they have no ``name``/``input_schema`` keys. + """ + + async def test_appends_web_search_raw(self): + with patch("anton.core.llm.openai.openai") as mock_openai: + mock_client = AsyncMock() + mock_openai.AsyncOpenAI.return_value = mock_client + mock_client.chat.completions.create = AsyncMock( + return_value=_make_mock_response() + ) + + provider = OpenAIProvider( + api_key="k", flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH + ) + await provider.complete( + model="_reason_", + system="sys", + messages=[{"role": "user", "content": "hi"}], + tools=[{"name": "scratchpad", "description": "x", "input_schema": {}}], + native_web_tools={"web_search"}, + ) + + kwargs = mock_client.chat.completions.create.call_args.kwargs + tools = kwargs["tools"] + # Existing function tool was translated to chat.completions shape + assert any( + t.get("type") == "function" and t["function"]["name"] == "scratchpad" + for t in tools + ) + # Native server-tool entry is appended raw — exact shape mdb.ai expects. + assert {"type": "web_search"} in tools + + async def test_appends_fetch_raw(self): + with patch("anton.core.llm.openai.openai") as mock_openai: + mock_client = AsyncMock() + mock_openai.AsyncOpenAI.return_value = mock_client + mock_client.chat.completions.create = AsyncMock( + return_value=_make_mock_response() + ) + + provider = OpenAIProvider( + api_key="k", flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH + ) + await provider.complete( + model="_reason_", + system="sys", + messages=[{"role": "user", "content": "hi"}], + native_web_tools={"web_fetch"}, + ) + + kwargs = mock_client.chat.completions.create.call_args.kwargs + assert {"type": "fetch"} in kwargs["tools"] + + async def test_generic_flavor_does_not_inject_native_tools(self): + with patch("anton.core.llm.openai.openai") as mock_openai: + mock_client = AsyncMock() + mock_openai.AsyncOpenAI.return_value = mock_client + mock_client.chat.completions.create = AsyncMock( + return_value=_make_mock_response() + ) + + provider = OpenAIProvider( + api_key="k", + flavor=OpenAIProvider.FLAVOR_OPENAI_COMPATIBLE_GENERIC, + ) + await provider.complete( + model="some-model", + system="sys", + messages=[{"role": "user", "content": "hi"}], + native_web_tools={"web_search", "web_fetch"}, + ) + + kwargs = mock_client.chat.completions.create.call_args.kwargs + # Generic flavor never appends native entries — even when the caller + # passed them. The session is responsible for falling back to + # handler-dispatched ToolDefs in that case. + assert "tools" not in kwargs + + +class TestOpenAIBYOKResponsesAPIPath: + """``flavor="openai"`` routes every call through ``client.responses.create`` + rather than ``chat.completions.create``.""" + + async def test_complete_uses_responses_create(self): + with patch("anton.core.llm.openai.openai") as mock_openai: + mock_client = AsyncMock() + mock_openai.AsyncOpenAI.return_value = mock_client + + # Build a response object that mimics Responses API output. + response = MagicMock() + content_block = MagicMock() + content_block.type = "output_text" + content_block.text = "Hello from Responses API" + message_item = MagicMock() + message_item.type = "message" + message_item.content = [content_block] + response.output = [message_item] + response.status = "completed" + response.usage = MagicMock(input_tokens=42, output_tokens=18) + mock_client.responses.create = AsyncMock(return_value=response) + + provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI) + result = await provider.complete( + model="gpt-5", + system="be helpful", + messages=[{"role": "user", "content": "hi"}], + ) + + mock_client.responses.create.assert_awaited_once() + # chat.completions must NOT have been touched + mock_client.chat.completions.create.assert_not_called() + assert result.content == "Hello from Responses API" + assert result.usage.input_tokens == 42 + assert result.usage.output_tokens == 18 + + async def test_complete_passes_instructions_and_input_shape(self): + with patch("anton.core.llm.openai.openai") as mock_openai: + mock_client = AsyncMock() + mock_openai.AsyncOpenAI.return_value = mock_client + + response = MagicMock() + response.output = [] + response.status = "completed" + response.usage = MagicMock(input_tokens=1, output_tokens=1) + mock_client.responses.create = AsyncMock(return_value=response) + + provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI) + await provider.complete( + model="gpt-5", + system="custom system", + messages=[{"role": "user", "content": "hello"}], + ) + + kwargs = mock_client.responses.create.call_args.kwargs + # System prompt goes via instructions, not as a message item. + assert kwargs["instructions"] == "custom system" + assert kwargs["model"] == "gpt-5" + # Input items are message-shaped + assert kwargs["input"] == [ + {"role": "user", "content": "hello", "type": "message"} + ] + # max_output_tokens is the Responses API field name + assert "max_output_tokens" in kwargs + + async def test_complete_appends_web_search_native_tool(self): + with patch("anton.core.llm.openai.openai") as mock_openai: + mock_client = AsyncMock() + mock_openai.AsyncOpenAI.return_value = mock_client + + response = MagicMock() + response.output = [] + response.status = "completed" + response.usage = MagicMock(input_tokens=1, output_tokens=1) + mock_client.responses.create = AsyncMock(return_value=response) + + provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI) + await provider.complete( + model="gpt-5", + system="sys", + messages=[{"role": "user", "content": "hi"}], + tools=[{"name": "scratchpad", "description": "x", "input_schema": {}}], + native_web_tools={"web_search"}, + ) + + kwargs = mock_client.responses.create.call_args.kwargs + tools = kwargs["tools"] + # Function tools use the FLAT Responses API shape — not nested under + # a "function" key like chat.completions. + assert any( + t.get("type") == "function" and t.get("name") == "scratchpad" + for t in tools + ) + assert {"type": "web_search"} in tools + + async def test_complete_translates_function_call_output(self): + """Responses API returns function calls as output items with call_id.""" + with patch("anton.core.llm.openai.openai") as mock_openai: + mock_client = AsyncMock() + mock_openai.AsyncOpenAI.return_value = mock_client + + fc_item = MagicMock() + fc_item.type = "function_call" + fc_item.call_id = "call_xyz" + fc_item.name = "do_thing" + fc_item.arguments = json.dumps({"foo": 42}) + + response = MagicMock() + response.output = [fc_item] + response.status = "completed" + response.usage = MagicMock(input_tokens=1, output_tokens=1) + mock_client.responses.create = AsyncMock(return_value=response) + + provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI) + result = await provider.complete( + model="gpt-5", + system="sys", + messages=[{"role": "user", "content": "hi"}], + tools=[{"name": "do_thing", "description": "x", "input_schema": {}}], + ) + + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].id == "call_xyz" + assert result.tool_calls[0].name == "do_thing" + assert result.tool_calls[0].input == {"foo": 42} + + +class TestOpenAICompatibleFlavorResolution: + """``LLMClient.from_settings`` resolves openai-compatible into either + minds-passthrough or generic based on the ``openai_base_url`` matching + the user's configured ``minds_url``.""" + + def test_resolves_to_minds_passthrough_when_base_url_matches(self): + with patch("anton.core.llm.openai.openai"): + settings = AntonSettings( + planning_provider="openai-compatible", + coding_provider="openai-compatible", + planning_model="_reason_", + coding_model="_code_", + openai_api_key="mdb-key", + openai_base_url="https://mdb.ai/api/v1", + minds_url="https://mdb.ai", + _env_file=None, + ) + client = LLMClient.from_settings(settings) + assert client._planning_provider._flavor == OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH + + def test_resolves_to_generic_when_base_url_is_third_party(self): + with patch("anton.core.llm.openai.openai"): + settings = AntonSettings( + planning_provider="openai-compatible", + coding_provider="openai-compatible", + planning_model="my-model", + coding_model="my-model", + openai_api_key="k", + openai_base_url="https://api.openrouter.ai/v1", + minds_url="https://mdb.ai", + _env_file=None, + ) + client = LLMClient.from_settings(settings) + assert client._planning_provider._flavor == OpenAIProvider.FLAVOR_OPENAI_COMPATIBLE_GENERIC + + def test_byok_openai_uses_openai_flavor(self): + with patch("anton.core.llm.openai.openai"): + settings = AntonSettings( + planning_provider="openai", + coding_provider="openai", + planning_model="gpt-5", + coding_model="gpt-5", + openai_api_key="sk-test", + _env_file=None, + ) + client = LLMClient.from_settings(settings) + assert client._planning_provider._flavor == OpenAIProvider.FLAVOR_OPENAI diff --git a/tests/test_provider.py b/tests/test_provider.py index 0d0ac8a8..59afc7ba 100644 --- a/tests/test_provider.py +++ b/tests/test_provider.py @@ -141,3 +141,120 @@ async def test_provider_without_api_key(self): mock_anthropic.AsyncAnthropic.return_value = AsyncMock() provider = AnthropicProvider() mock_anthropic.AsyncAnthropic.assert_called_once_with() + + +# ───────────────────────────────────────────────────────────────────────────── +# Native server-side web tools (web_search / web_fetch) +# ───────────────────────────────────────────────────────────────────────────── + + +def _stub_text_response(text: str = "ok"): + """Build a MagicMock response that looks like a plain text Anthropic reply.""" + block = MagicMock() + block.type = "text" + block.text = text + response = MagicMock() + response.content = [block] + response.usage.input_tokens = 1 + response.usage.output_tokens = 1 + response.stop_reason = "end_turn" + return response + + +class TestAnthropicNativeWebTools: + def test_native_web_tools_advertises_search_and_fetch(self): + with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic: + mock_anthropic.AsyncAnthropic.return_value = AsyncMock() + provider = AnthropicProvider(api_key="k") + assert provider.native_web_tools() == {"web_search", "web_fetch"} + + async def test_complete_appends_web_search_server_tool(self): + from anton.core.llm.anthropic import ANTHROPIC_WEB_SEARCH_TOOL_TYPE + + with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic: + mock_client = AsyncMock() + mock_anthropic.AsyncAnthropic.return_value = mock_client + mock_client.messages.create = AsyncMock(return_value=_stub_text_response()) + + provider = AnthropicProvider(api_key="k") + await provider.complete( + model="claude-sonnet-4-6", + system="sys", + messages=[{"role": "user", "content": "hi"}], + tools=[{"name": "scratchpad", "description": "x", "input_schema": {}}], + native_web_tools={"web_search"}, + ) + + kwargs = mock_client.messages.create.call_args[1] + tools = kwargs["tools"] + # Existing function tool is preserved + assert any(t.get("name") == "scratchpad" for t in tools) + # Server tool entry is appended in the right shape + assert {"type": ANTHROPIC_WEB_SEARCH_TOOL_TYPE, "name": "web_search"} in tools + # web_search is GA — no beta header should be set + assert "extra_headers" not in kwargs + + async def test_complete_appends_web_fetch_with_beta_header(self): + from anton.core.llm.anthropic import ( + ANTHROPIC_WEB_FETCH_BETA_HEADER, + ANTHROPIC_WEB_FETCH_TOOL_TYPE, + ) + + with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic: + mock_client = AsyncMock() + mock_anthropic.AsyncAnthropic.return_value = mock_client + mock_client.messages.create = AsyncMock(return_value=_stub_text_response()) + + provider = AnthropicProvider(api_key="k") + await provider.complete( + model="claude-sonnet-4-6", + system="sys", + messages=[{"role": "user", "content": "hi"}], + native_web_tools={"web_fetch"}, + ) + + kwargs = mock_client.messages.create.call_args[1] + assert {"type": ANTHROPIC_WEB_FETCH_TOOL_TYPE, "name": "web_fetch"} in kwargs["tools"] + # web_fetch is beta — header must be present + assert kwargs["extra_headers"] == { + "anthropic-beta": ANTHROPIC_WEB_FETCH_BETA_HEADER + } + + async def test_complete_appends_both_server_tools(self): + with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic: + mock_client = AsyncMock() + mock_anthropic.AsyncAnthropic.return_value = mock_client + mock_client.messages.create = AsyncMock(return_value=_stub_text_response()) + + provider = AnthropicProvider(api_key="k") + await provider.complete( + model="claude-sonnet-4-6", + system="sys", + messages=[{"role": "user", "content": "hi"}], + native_web_tools={"web_search", "web_fetch"}, + ) + + kwargs = mock_client.messages.create.call_args[1] + names = [t.get("name") for t in kwargs["tools"]] + assert "web_search" in names and "web_fetch" in names + # web_fetch always brings the beta header along + assert "anthropic-beta" in kwargs["extra_headers"] + + async def test_complete_omits_web_tools_when_set_is_empty(self): + with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic: + mock_client = AsyncMock() + mock_anthropic.AsyncAnthropic.return_value = mock_client + mock_client.messages.create = AsyncMock(return_value=_stub_text_response()) + + provider = AnthropicProvider(api_key="k") + await provider.complete( + model="claude-sonnet-4-6", + system="sys", + messages=[{"role": "user", "content": "hi"}], + native_web_tools=None, + ) + + kwargs = mock_client.messages.create.call_args[1] + # No tools array at all — backward-compatible with the no-tools case + assert "tools" not in kwargs + assert "extra_headers" not in kwargs diff --git a/tests/test_tools.py b/tests/test_tools.py index 90583309..92e3c9bd 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -30,6 +30,8 @@ def _make_session(vault_dir): return_value=ProviderConnectionInfo(provider="anthropic", api_key="test") ) mock_llm.coding_model = "claude-sonnet-4-6" + mock_llm.planning_provider = MagicMock() + mock_llm.planning_provider.native_web_tools = MagicMock(return_value=set()) session = ChatSession(ChatSessionConfig(llm_client=mock_llm)) session._console = MagicMock() session._scratchpads = AsyncMock() diff --git a/tests/test_web_tools.py b/tests/test_web_tools.py new file mode 100644 index 00000000..ade876ef --- /dev/null +++ b/tests/test_web_tools.py @@ -0,0 +1,432 @@ +"""Tests for the handler-dispatched web_search/web_fetch fallbacks and the +session-side routing decision (native vs handler-dispatched). +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from anton.core.tools.web_tools import ( + WEB_FETCH_FALLBACK_TOOL, + WEB_SEARCH_FALLBACK_TOOL, + _strip_html, + handle_web_fetch_fallback, + handle_web_search_fallback, +) + + +def _session_with_settings(**fields): + """Build a stand-in session object exposing only ._settings.""" + settings = SimpleNamespace( + external_search_provider=fields.get("external_search_provider"), + exa_api_key=fields.get("exa_api_key"), + brave_api_key=fields.get("brave_api_key"), + ) + return SimpleNamespace(_settings=settings) + + +# ───────────────────────────────────────────────────────────────────────────── +# web_search fallback — Exa +# ───────────────────────────────────────────────────────────────────────────── + + +class TestWebSearchFallbackExa: + async def test_returns_no_provider_message_when_unconfigured(self): + session = _session_with_settings() + result = await handle_web_search_fallback(session, {"query": "anything"}) + assert "anton setup search" in result + assert "No search provider" in result + + async def test_returns_no_provider_when_provider_set_but_no_key(self): + session = _session_with_settings(external_search_provider="exa") + result = await handle_web_search_fallback(session, {"query": "x"}) + assert "anton setup search" in result + + async def test_empty_query_short_circuits(self): + session = _session_with_settings( + external_search_provider="exa", exa_api_key="k" + ) + result = await handle_web_search_fallback(session, {"query": " "}) + assert "non-empty" in result.lower() + + async def test_calls_exa_endpoint_with_bearer_auth(self): + session = _session_with_settings( + external_search_provider="exa", exa_api_key="exa-key-xyz" + ) + + # Capture the outgoing request, return a canned response. + captured: dict = {} + + async def _post(self, url, json=None, headers=None): + captured["url"] = url + captured["json"] = json + captured["headers"] = headers + request = httpx.Request("POST", url) + return httpx.Response( + 200, + json={ + "results": [ + { + "title": "Result A", + "url": "https://a.example", + "text": "snippet A " * 5, + }, + { + "title": "Result B", + "url": "https://b.example", + "text": "snippet B", + }, + ] + }, + request=request, + ) + + with patch.object(httpx.AsyncClient, "post", new=_post): + out = await handle_web_search_fallback( + session, {"query": "what is anton", "max_results": 2} + ) + + assert captured["url"] == "https://api.exa.ai/search" + assert captured["headers"]["Authorization"] == "Bearer exa-key-xyz" + assert captured["json"]["query"] == "what is anton" + assert captured["json"]["num_results"] == 2 + # Output is markdown-ish with both results + assert "Result A" in out + assert "https://a.example" in out + assert "Result B" in out + + async def test_exa_non_200_response_returns_error_string(self): + session = _session_with_settings( + external_search_provider="exa", exa_api_key="k" + ) + + async def _post(self, url, json=None, headers=None): + return httpx.Response( + 401, text="bad key", request=httpx.Request("POST", url) + ) + + with patch.object(httpx.AsyncClient, "post", new=_post): + out = await handle_web_search_fallback(session, {"query": "x"}) + assert "Exa search failed" in out + assert "401" in out + + async def test_caps_max_results_to_safe_range(self): + session = _session_with_settings( + external_search_provider="exa", exa_api_key="k" + ) + + captured: dict = {} + + async def _post(self, url, json=None, headers=None): + captured["json"] = json + return httpx.Response( + 200, json={"results": []}, request=httpx.Request("POST", url) + ) + + with patch.object(httpx.AsyncClient, "post", new=_post): + await handle_web_search_fallback( + session, {"query": "x", "max_results": 999} + ) + # 999 is clamped to 20 (the upper bound). + assert captured["json"]["num_results"] == 20 + + +# ───────────────────────────────────────────────────────────────────────────── +# web_search fallback — Brave +# ───────────────────────────────────────────────────────────────────────────── + + +class TestWebSearchFallbackBrave: + async def test_calls_brave_endpoint_with_subscription_token(self): + session = _session_with_settings( + external_search_provider="brave", brave_api_key="brv-key" + ) + captured: dict = {} + + async def _get(self, url, headers=None, params=None): + captured["url"] = url + captured["headers"] = headers + captured["params"] = params + return httpx.Response( + 200, + json={ + "web": { + "results": [ + { + "title": "Brave hit", + "url": "https://b.example", + "description": "A hit.", + } + ] + } + }, + request=httpx.Request("GET", url), + ) + + with patch.object(httpx.AsyncClient, "get", new=_get): + out = await handle_web_search_fallback(session, {"query": "anton"}) + + assert captured["url"] == "https://api.search.brave.com/res/v1/web/search" + assert captured["headers"]["X-Subscription-Token"] == "brv-key" + assert captured["params"] == {"q": "anton", "count": 5} + assert "Brave hit" in out + assert "A hit." in out + + async def test_brave_no_results(self): + session = _session_with_settings( + external_search_provider="brave", brave_api_key="k" + ) + + async def _get(self, url, headers=None, params=None): + return httpx.Response( + 200, json={"web": {"results": []}}, request=httpx.Request("GET", url) + ) + + with patch.object(httpx.AsyncClient, "get", new=_get): + out = await handle_web_search_fallback(session, {"query": "obscure"}) + assert "No results" in out + + +# ───────────────────────────────────────────────────────────────────────────── +# web_fetch fallback +# ───────────────────────────────────────────────────────────────────────────── + + +class TestWebFetchFallback: + async def test_rejects_non_http_urls(self): + out = await handle_web_fetch_fallback(None, {"url": "ftp://x.example"}) + assert "http(s)" in out + + async def test_empty_url(self): + out = await handle_web_fetch_fallback(None, {"url": " "}) + assert "requires" in out + + async def test_strips_html_to_text(self): + async def _get(self, url, headers=None): + return httpx.Response( + 200, + text=( + "T" + "" + "

Hello, world!

" + "

Second para.

" + ), + headers={"content-type": "text/html"}, + request=httpx.Request("GET", url), + ) + + with patch.object(httpx.AsyncClient, "get", new=_get): + out = await handle_web_fetch_fallback( + None, {"url": "https://example.com"} + ) + + # Body text is preserved, script and tags are stripped. + assert "Hello" in out + assert "world" in out + assert "Second para" in out + assert "" + "

Visible.

" + ) + assert _strip_html(html).strip() == "Visible." + + def test_decodes_entities(self): + assert "you & me" in _strip_html("

you & me

") + + def test_block_tags_get_newline_separation(self): + html = "

one

two

" + out = _strip_html(html) + assert "one" in out and "two" in out + # Some kind of separator between paragraphs (newline or blank line). + assert "\n" in out + + +# ───────────────────────────────────────────────────────────────────────────── +# Session-side resolution: native vs fallback by provider +# ───────────────────────────────────────────────────────────────────────────── + + +class TestSessionWebToolResolution: + """ChatSession.__init__ must resolve the per-session web tool plan correctly: + + - When the planning provider claims a capability natively, it goes into + ``_native_web_tools`` and the fallback ToolDef is NOT registered. + - When the provider does not, the capability goes into ``_fallback_web_tools`` + and the corresponding ToolDef IS registered. + """ + + def _build_session(self, *, provider_native: set[str], cfg_kwargs: dict | None = None): + from anton.core.session import ChatSession, ChatSessionConfig + from anton.core.llm.provider import ProviderConnectionInfo + + mock_llm = AsyncMock() + mock_llm.coding_provider = MagicMock() + mock_llm.coding_provider.export_connection_info = MagicMock( + return_value=ProviderConnectionInfo(provider="x", api_key="k") + ) + mock_llm.coding_model = "x" + mock_llm.planning_provider = MagicMock() + mock_llm.planning_provider.native_web_tools = MagicMock( + return_value=provider_native + ) + cfg = ChatSessionConfig(llm_client=mock_llm, **(cfg_kwargs or {})) + return ChatSession(cfg) + + def test_anthropic_style_native_provider_uses_no_fallback(self): + session = self._build_session(provider_native={"web_search", "web_fetch"}) + assert session._native_web_tools == {"web_search", "web_fetch"} + assert session._fallback_web_tools == set() + + def test_generic_provider_routes_both_to_fallback(self): + session = self._build_session(provider_native=set()) + assert session._native_web_tools == set() + assert session._fallback_web_tools == {"web_search", "web_fetch"} + + def test_disabled_search_drops_from_both_sets(self): + session = self._build_session( + provider_native={"web_search", "web_fetch"}, + cfg_kwargs={"web_search_enabled": False}, + ) + assert "web_search" not in session._native_web_tools + assert "web_search" not in session._fallback_web_tools + assert "web_fetch" in session._native_web_tools + + def test_fallback_toolDefs_registered_when_provider_lacks_native(self): + session = self._build_session(provider_native=set()) + # Trigger lazy build of the registry. + tools = session._build_tools() + names = {t["name"] for t in tools} + assert "web_search" in names + assert "web_fetch" in names + + def test_fallback_toolDefs_not_registered_when_provider_is_native(self): + session = self._build_session(provider_native={"web_search", "web_fetch"}) + tools = session._build_tools() + names = {t["name"] for t in tools} + # web tools are server-side on the provider; they should NOT appear in + # the registry — the model invokes them through the provider directly. + assert "web_search" not in names + assert "web_fetch" not in names + + +class TestNativeWebToolsForwarded: + """plan_with_recovery / plan_stream_with_recovery must forward the resolved + native_web_tools set to the LLM client without each call site needing to + remember it.""" + + async def test_plan_with_recovery_forwards_native_set(self): + from anton.core.session import ChatSession, ChatSessionConfig + from anton.core.llm.provider import LLMResponse, ProviderConnectionInfo, Usage + + mock_llm = AsyncMock() + mock_llm.coding_provider = MagicMock() + mock_llm.coding_provider.export_connection_info = MagicMock( + return_value=ProviderConnectionInfo(provider="x", api_key="k") + ) + mock_llm.coding_model = "x" + mock_llm.planning_provider = MagicMock() + mock_llm.planning_provider.native_web_tools = MagicMock( + return_value={"web_search", "web_fetch"} + ) + mock_llm.plan = AsyncMock( + return_value=LLMResponse(content="ok", usage=Usage()) + ) + + session = ChatSession(ChatSessionConfig(llm_client=mock_llm)) + await session.plan_with_recovery(system="sys") + + kwargs = mock_llm.plan.call_args.kwargs + assert kwargs["native_web_tools"] == {"web_search", "web_fetch"} + + async def test_plan_with_recovery_omits_kwarg_when_no_native(self): + from anton.core.session import ChatSession, ChatSessionConfig + from anton.core.llm.provider import LLMResponse, ProviderConnectionInfo, Usage + + mock_llm = AsyncMock() + mock_llm.coding_provider = MagicMock() + mock_llm.coding_provider.export_connection_info = MagicMock( + return_value=ProviderConnectionInfo(provider="x", api_key="k") + ) + mock_llm.coding_model = "x" + mock_llm.planning_provider = MagicMock() + mock_llm.planning_provider.native_web_tools = MagicMock(return_value=set()) + mock_llm.plan = AsyncMock( + return_value=LLMResponse(content="ok", usage=Usage()) + ) + + session = ChatSession(ChatSessionConfig(llm_client=mock_llm)) + await session.plan_with_recovery(system="sys") + + kwargs = mock_llm.plan.call_args.kwargs + # When the provider has no native web tools, the kwarg is left out + # entirely so it doesn't even appear in older mocks' call_args. + assert "native_web_tools" not in kwargs + + +class TestToolDefShapes: + def test_search_tool_schema_requires_query(self): + assert "query" in WEB_SEARCH_FALLBACK_TOOL.input_schema["required"] + + def test_fetch_tool_schema_requires_url(self): + assert "url" in WEB_FETCH_FALLBACK_TOOL.input_schema["required"] + + def test_tool_names_match_native_capability_strings(self): + # The fallback names MUST match the native capability strings so that + # provider-side execution and handler-side execution feel identical to + # the agent. If these drift, tools registered conditionally won't line + # up with the native_web_tools set. + assert WEB_SEARCH_FALLBACK_TOOL.name == "web_search" + assert WEB_FETCH_FALLBACK_TOOL.name == "web_fetch" diff --git a/tests/test_web_tools_live.py b/tests/test_web_tools_live.py new file mode 100644 index 00000000..12761aca --- /dev/null +++ b/tests/test_web_tools_live.py @@ -0,0 +1,609 @@ +"""Live integration tests for the native web tool paths. + +These tests make real API calls — they exercise the wire format end-to-end +(tool spec serialization, server-side execution, response parsing) instead of +just checking what we send. They auto-skip when the corresponding API key is +not in the environment, so CI without keys is unaffected. + +Loads ``.env`` from the project root once at import time so a developer who +keeps their keys in ``.env`` (the standard pattern for this repo) doesn't need +to ``source`` anything before running ``pytest``. + +Coverage map: + +- ``TestAnthropicLive`` — ``AnthropicProvider`` with ``native_web_tools`` + resolving to ``web_search_20250305`` and ``web_fetch_20250910`` server tools. + Hits the Messages API directly. +- ``TestOpenAIBYOKLive`` — ``OpenAIProvider(flavor="openai")`` with + ``native_web_tools`` routing through the Responses API + (``client.responses.create``). The whole BYOK OpenAI path runs through + Responses now, so this also validates non-tool calls along the way. +- ``TestMindsPassthroughLive`` — flavor=``"minds-passthrough"``, base_url= + ``https://mdb.ai/api/v1``, model=``"_reason_"``. Currently skipped because + the mdb.ai ``passthrough_agent`` web-tools translation lands in a separate + PR; the scaffolding here means we just remove the skip mark when it ships. + +Cost note: each test uses small ``max_tokens`` to keep the bill negligible. +""" + +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + +# Load .env once so plain os.environ reads pick up keys the developer put in +# the repo-root .env (matches AntonSettings' env-file precedence). +try: + from dotenv import load_dotenv + + load_dotenv(Path(__file__).resolve().parent.parent / ".env", override=False) +except Exception: + # python-dotenv is a transitive dep through pydantic-settings; if it's + # missing for any reason, fall back to whatever's already in os.environ. + pass + + +def _have(key: str) -> bool: + return bool(os.environ.get(key)) + + +anthropic_only = pytest.mark.skipif( + not _have("ANTHROPIC_API_KEY"), + reason="ANTHROPIC_API_KEY not set — live test skipped", +) +openai_only = pytest.mark.skipif( + not _have("OPENAI_API_KEY"), + reason="OPENAI_API_KEY not set — live test skipped", +) +minds_only = pytest.mark.skipif( + not _have("MINDS_API_KEY"), + reason="MINDS_API_KEY not set — live test skipped", +) +exa_only = pytest.mark.skipif( + not _have("EXA_API_KEY"), + reason="EXA_API_KEY not set — live test skipped", +) +brave_only = pytest.mark.skipif( + not _have("BRAVE_API_KEY"), + reason="BRAVE_API_KEY not set — live test skipped", +) + + +# ───────────────────────────────────────────────────────────────────────────── +# Anthropic BYOK — native web_search and web_fetch +# ───────────────────────────────────────────────────────────────────────────── + + +@anthropic_only +class TestAnthropicLive: + """Real calls to Anthropic with the native server-side web tools. + + On success the model emits some combination of ``server_tool_use`` / + ``web_search_tool_result`` / ``text`` blocks; the existing extraction loop + in ``AnthropicProvider.complete`` already filters down to text blocks (and + real ``tool_use`` blocks for function tools), so the model's natural + response — which incorporates the search/fetch result — flows back as + ``LLMResponse.content``. The assertions are deliberately loose: we only + care that the call succeeds and returns plausible content; exact + summarization is the model's job, not our wire format's. + """ + + @pytest.mark.asyncio + async def test_complete_with_native_web_search(self): + from anton.core.llm.anthropic import AnthropicProvider + + provider = AnthropicProvider(api_key=os.environ["ANTHROPIC_API_KEY"]) + response = await provider.complete( + model="claude-sonnet-4-6", + system="Use web_search if you need current information. Be brief.", + messages=[ + { + "role": "user", + "content": ( + "Search the web for 'Anthropic Claude' and tell me one " + "fact in a single sentence." + ), + } + ], + native_web_tools={"web_search"}, + max_tokens=512, + ) + + assert response.content, "expected non-empty model response" + assert len(response.content) > 20 + # The query forces a search-shaped answer; "Anthropic" or "Claude" + # should land in the text either way. + lowered = response.content.lower() + assert "anthropic" in lowered or "claude" in lowered + + @pytest.mark.asyncio + async def test_complete_with_native_web_fetch(self): + from anton.core.llm.anthropic import AnthropicProvider + + provider = AnthropicProvider(api_key=os.environ["ANTHROPIC_API_KEY"]) + response = await provider.complete( + model="claude-sonnet-4-6", + system=( + "Use the web_fetch tool to retrieve the URL the user provides " + "and quote one short phrase from the page. Be brief." + ), + messages=[ + { + "role": "user", + "content": ( + "Fetch https://example.com and tell me what the page says." + ), + } + ], + native_web_tools={"web_fetch"}, + max_tokens=512, + ) + + assert response.content, "expected non-empty model response" + # example.com's signature phrase — the model should surface it after + # the server-side fetch lands. + assert "example" in response.content.lower() + + @pytest.mark.asyncio + async def test_complete_with_both_native_tools(self): + """Both server tools wired in the same call — exercises the merged + tools array + the beta header co-existing with non-beta tooling.""" + from anton.core.llm.anthropic import AnthropicProvider + + provider = AnthropicProvider(api_key=os.environ["ANTHROPIC_API_KEY"]) + response = await provider.complete( + model="claude-sonnet-4-6", + system="Use whichever web tool fits. Keep your answer short.", + messages=[ + { + "role": "user", + "content": "What is on https://example.com? One sentence.", + } + ], + native_web_tools={"web_search", "web_fetch"}, + max_tokens=512, + ) + + assert response.content, "expected non-empty model response" + + @pytest.mark.asyncio + async def test_complete_without_web_tools_still_works(self): + """Sanity: opting out (``native_web_tools=None``) must not regress + the existing chat-only path.""" + from anton.core.llm.anthropic import AnthropicProvider + + provider = AnthropicProvider(api_key=os.environ["ANTHROPIC_API_KEY"]) + response = await provider.complete( + model="claude-sonnet-4-6", + system="Reply with exactly: pong", + messages=[{"role": "user", "content": "ping"}], + max_tokens=16, + ) + assert "pong" in response.content.lower() + + +# ───────────────────────────────────────────────────────────────────────────── +# OpenAI BYOK — Responses API path with native web_search +# ───────────────────────────────────────────────────────────────────────────── + + +@openai_only +class TestOpenAIBYOKLive: + """Real calls to ``client.responses.create`` for ``flavor="openai"``. + + Validates that the entire Responses API translation (input shape, tools, + instructions, output_text/function_call extraction) lines up with what + the live API expects. The earlier mocked tests cover the request shape + going out; these confirm it actually works against the real endpoint. + """ + + @pytest.mark.asyncio + async def test_responses_api_basic_call(self): + """No web tools — just confirm the Responses API transport works for + the simple text path. If this fails, every other BYOK OpenAI test + is also broken.""" + from anton.core.llm.openai import OpenAIProvider + + provider = OpenAIProvider( + api_key=os.environ["OPENAI_API_KEY"], + flavor=OpenAIProvider.FLAVOR_OPENAI, + ) + response = await provider.complete( + model="gpt-5", + system="Reply with exactly: pong", + messages=[{"role": "user", "content": "ping"}], + max_tokens=512, + ) + assert response.content, "expected non-empty Responses API output_text" + assert "pong" in response.content.lower() + assert response.usage.input_tokens > 0 + assert response.usage.output_tokens > 0 + + @pytest.mark.asyncio + async def test_responses_api_with_native_web_search(self): + from anton.core.llm.openai import OpenAIProvider + + provider = OpenAIProvider( + api_key=os.environ["OPENAI_API_KEY"], + flavor=OpenAIProvider.FLAVOR_OPENAI, + ) + # gpt-5 is a reasoning model: reasoning tokens + the (often large) + # web_search result payload share the ``max_output_tokens`` budget, + # so a tight cap can leave nothing for the final text. 4096 is + # comfortable headroom for a one-sentence answer over a search. + response = await provider.complete( + model="gpt-5", + system="Use web_search if you need current information. Be brief.", + messages=[ + { + "role": "user", + "content": ( + "Search for 'OpenAI Responses API' and summarize one " + "thing about it in a single sentence." + ), + } + ], + native_web_tools={"web_search"}, + max_tokens=4096, + ) + + assert response.content, ( + f"expected non-empty model response but got stop_reason=" + f"{response.stop_reason!r} (input_tokens={response.usage.input_tokens}, " + f"output_tokens={response.usage.output_tokens})" + ) + assert len(response.content) > 20 + lowered = response.content.lower() + assert "openai" in lowered or "responses" in lowered or "api" in lowered + + @pytest.mark.asyncio + async def test_responses_api_with_function_tool_round_trip(self): + """Forced function-tool call through the Responses API. + + Confirms the flat function-tool shape (`{"type": "function", "name": ...}`), + the ``tool_choice`` translation, and the ``call_id`` round-trip all + work against the live endpoint. This is the same path + ``generate_object*`` uses, so a regression here would cascade. + + Note on ``max_tokens``: gpt-5 is a reasoning model and its reasoning + tokens count against ``max_output_tokens``. A low cap can leave the + model with no budget to emit the function call (``stop_reason= + "incomplete"``), so we use a generous 4096 here. Still pennies per run. + """ + from anton.core.llm.openai import OpenAIProvider + + provider = OpenAIProvider( + api_key=os.environ["OPENAI_API_KEY"], + flavor=OpenAIProvider.FLAVOR_OPENAI, + ) + response = await provider.complete( + model="gpt-5", + system="Call the answer tool to provide your reply.", + messages=[{"role": "user", "content": "What is 6 times 7?"}], + tools=[ + { + "name": "answer", + "description": "Provide the numeric answer.", + "input_schema": { + "type": "object", + "properties": { + "value": {"type": "integer"}, + "explanation": {"type": "string"}, + }, + "required": ["value"], + }, + } + ], + tool_choice={"type": "tool", "name": "answer"}, + max_tokens=4096, + ) + + assert response.tool_calls, ( + f"expected forced tool call but got stop_reason={response.stop_reason!r} " + f"with content={response.content!r}" + ) + tc = response.tool_calls[0] + assert tc.name == "answer" + # call_id is the canonical id we'll reference in any follow-up + # function_call_output items. + assert tc.id + assert tc.input.get("value") == 42 + + @pytest.mark.asyncio + async def test_responses_api_streaming(self): + """Quick smoke of the streaming path. Streaming has its own + per-event translation (output_text.delta, function_call_arguments.*, + completed) that the non-streaming test doesn't exercise.""" + from anton.core.llm.openai import OpenAIProvider + from anton.core.llm.provider import StreamComplete, StreamTextDelta + + provider = OpenAIProvider( + api_key=os.environ["OPENAI_API_KEY"], + flavor=OpenAIProvider.FLAVOR_OPENAI, + ) + + text_chunks: list[str] = [] + final_response = None + async for event in provider.stream( + model="gpt-5", + system="Reply with exactly: pong", + messages=[{"role": "user", "content": "ping"}], + max_tokens=512, + ): + if isinstance(event, StreamTextDelta): + text_chunks.append(event.text) + elif isinstance(event, StreamComplete): + final_response = event.response + + joined = "".join(text_chunks).lower() + assert "pong" in joined + assert final_response is not None + assert final_response.content == "".join(text_chunks) + + +# ───────────────────────────────────────────────────────────────────────────── +# Minds passthrough — same path as OpenAI-compatible chat.completions +# ───────────────────────────────────────────────────────────────────────────── + + +@minds_only +@pytest.mark.skip( + reason=( + "mdb.ai passthrough_agent web-tools translation lives in a separate " + "backend PR. Scaffolding is in place — remove this skip when the " + "passthrough side ships." + ) +) +class TestMindsPassthroughLive: + """Native web tools through mdb.ai (chat.completions transport with + ``{"type": "web_search"}`` / ``{"type": "fetch"}`` appended raw). + + The wire format on our end is finalized — this suite already passes + against the local mock — but the upstream ``passthrough_agent`` doesn't + translate the web tool entries to the underlying provider yet, so a real + call returns either a 4xx or a no-op completion. Tests are skipped at + the class level until the backend lands, so any future change to the + passthrough path that breaks our wire format will surface here on the + first un-skipped run. + """ + + @pytest.mark.asyncio + async def test_complete_with_native_web_search(self): + from anton.core.llm.openai import OpenAIProvider + + provider = OpenAIProvider( + api_key=os.environ["MINDS_API_KEY"], + base_url="https://mdb.ai/api/v1", + flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH, + supports_vision=False, + ) + response = await provider.complete( + model="_reason_", + system="Use web_search if you need current information. Be brief.", + messages=[ + { + "role": "user", + "content": ( + "Search for 'Anthropic' and tell me one fact in a sentence." + ), + } + ], + native_web_tools={"web_search"}, + max_tokens=512, + ) + assert response.content + assert len(response.content) > 20 + + @pytest.mark.asyncio + async def test_complete_with_native_fetch(self): + from anton.core.llm.openai import OpenAIProvider + + provider = OpenAIProvider( + api_key=os.environ["MINDS_API_KEY"], + base_url="https://mdb.ai/api/v1", + flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH, + supports_vision=False, + ) + response = await provider.complete( + model="_reason_", + system="Use the fetch tool. Be brief.", + messages=[ + { + "role": "user", + "content": "Fetch https://example.com and tell me what's there.", + } + ], + native_web_tools={"web_fetch"}, + max_tokens=512, + ) + assert response.content + assert "example" in response.content.lower() + + @pytest.mark.asyncio + async def test_complete_without_web_tools_still_works(self): + """Mind-passthrough chat.completions without web tools — sanity + check that our flavor flag doesn't break the baseline chat call.""" + from anton.core.llm.openai import OpenAIProvider + + provider = OpenAIProvider( + api_key=os.environ["MINDS_API_KEY"], + base_url="https://mdb.ai/api/v1", + flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH, + supports_vision=False, + ) + response = await provider.complete( + model="_reason_", + system="Reply with exactly: pong", + messages=[{"role": "user", "content": "ping"}], + max_tokens=16, + ) + assert "pong" in response.content.lower() + + +# ───────────────────────────────────────────────────────────────────────────── +# Case 3 — generic OpenAI-compatible fallback: Exa.ai & Brave Search +# ───────────────────────────────────────────────────────────────────────────── +# +# The Exa/Brave adapters live in ``anton/core/tools/web_tools.py``. Mocked +# tests in ``test_web_tools.py`` already cover the request shape going out; +# these confirm the live endpoints accept our auth + payload + still return +# the response shape we parse. They also implicitly validate the setup probe +# in ``cli._setup_exa`` / ``_setup_brave``, which uses the same auth + +# endpoint pair. + + +def _settings_with(**fields): + """Tiny stand-in for AntonSettings — only the attrs the handlers read.""" + from types import SimpleNamespace + + return SimpleNamespace( + external_search_provider=fields.get("external_search_provider"), + exa_api_key=fields.get("exa_api_key"), + brave_api_key=fields.get("brave_api_key"), + ) + + +def _session_with(settings): + from types import SimpleNamespace + + return SimpleNamespace(_settings=settings) + + +@exa_only +class TestExaLive: + """Real calls to Exa.ai's ``/search`` endpoint.""" + + @pytest.mark.asyncio + async def test_search_returns_real_results(self): + """Direct adapter call — the format helper formats real hits.""" + from anton.core.tools.web_tools import _search_exa + + out = await _search_exa( + query="Anthropic Claude", + api_key=os.environ["EXA_API_KEY"], + max_results=3, + ) + + assert "Web search results for: 'Anthropic Claude'" in out + # At least one https:// URL should appear in the formatted output. + assert "https://" in out + # And the markdown numbering means we got real hits, not the "no + # results" branch. + assert "1. **" in out + + @pytest.mark.asyncio + async def test_handler_dispatch_via_session(self): + """The full path the agent actually uses: session settings → + ``handle_web_search_fallback`` → ``_search_exa`` → real network.""" + from anton.core.tools.web_tools import handle_web_search_fallback + + session = _session_with( + _settings_with( + external_search_provider="exa", + exa_api_key=os.environ["EXA_API_KEY"], + ) + ) + out = await handle_web_search_fallback( + session, {"query": "Anthropic Claude", "max_results": 2} + ) + assert "https://" in out + assert "Anthropic Claude" in out # query echoed in the header + + @pytest.mark.asyncio + async def test_setup_probe_endpoint_contract(self): + """The setup probe in ``cli._setup_exa`` posts the same payload to + the same URL with the same auth header. This test validates that + contract against the live API — if Exa changes their endpoint or + auth shape, both setup AND runtime would break, and this would + catch it on the next live run.""" + import httpx as _httpx + + # Exact same shape ``cli._setup_exa._test`` uses internally. + resp = await _httpx.AsyncClient(timeout=15.0).post( + "https://api.exa.ai/search", + headers={"Authorization": f"Bearer {os.environ['EXA_API_KEY']}"}, + json={"query": "anton ping", "num_results": 1}, + ) + assert resp.status_code == 200, ( + f"setup probe contract broken: HTTP {resp.status_code} — {resp.text[:200]}" + ) + + +@brave_only +class TestBraveLive: + """Real calls to Brave Search's web endpoint.""" + + @pytest.mark.asyncio + async def test_search_returns_real_results(self): + from anton.core.tools.web_tools import _search_brave + + out = await _search_brave( + query="Anthropic Claude", + api_key=os.environ["BRAVE_API_KEY"], + max_results=3, + ) + + assert "Web search results for: 'Anthropic Claude'" in out + assert "https://" in out + assert "1. **" in out + + @pytest.mark.asyncio + async def test_handler_dispatch_via_session(self): + from anton.core.tools.web_tools import handle_web_search_fallback + + session = _session_with( + _settings_with( + external_search_provider="brave", + brave_api_key=os.environ["BRAVE_API_KEY"], + ) + ) + out = await handle_web_search_fallback( + session, {"query": "Anthropic Claude", "max_results": 2} + ) + assert "https://" in out + assert "Anthropic Claude" in out + + @pytest.mark.asyncio + async def test_setup_probe_endpoint_contract(self): + """Mirror of the Exa probe-contract test for Brave (matches + ``cli._setup_brave._test``).""" + import httpx as _httpx + + resp = await _httpx.AsyncClient(timeout=15.0).get( + "https://api.search.brave.com/res/v1/web/search", + headers={ + "X-Subscription-Token": os.environ["BRAVE_API_KEY"], + "Accept": "application/json", + }, + params={"q": "anton ping", "count": 1}, + ) + assert resp.status_code == 200, ( + f"setup probe contract broken: HTTP {resp.status_code} — {resp.text[:200]}" + ) + + +class TestWebFetchLive: + """Real ``handle_web_fetch_fallback`` against a stable known URL. + + No API key needed — fetch is the always-on Case 3 capability. ``example.com`` + is operated by IANA and has a stable, well-formed signature page (``Example + Domain`` heading) which makes this assertion stable enough to live in CI. + """ + + @pytest.mark.asyncio + async def test_fetches_example_dot_com(self): + from anton.core.tools.web_tools import handle_web_fetch_fallback + + out = await handle_web_fetch_fallback( + None, {"url": "https://example.com", "max_chars": 5000} + ) + # The header line includes status + byte count. + assert "HTTP 200" in out + # Signature text from the canonical example.com page. + assert "Example Domain" in out + # Confirms the HTML stripper actually ran (the live page has + # // tags that should not survive in our output). + assert " Date: Wed, 6 May 2026 16:06:33 -0700 Subject: [PATCH 5/7] search setup UX improvements --- anton/cli.py | 122 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 89 insertions(+), 33 deletions(-) diff --git a/anton/cli.py b/anton/cli.py index a09f1a04..ee0b599c 100644 --- a/anton/cli.py +++ b/anton/cli.py @@ -1115,6 +1115,49 @@ def _looks_like_mdb_ai(base_url: str, settings) -> bool: return base == minds or base == f"{minds}/api/v1" +def _current_search_label(settings) -> str: + """Human-readable summary of the currently-configured search provider. + + Returns ``"none"`` if nothing is set, otherwise the provider name plus a + masked tail of the stored key so the user can recognize which key is + active without exposing it. + """ + provider = (getattr(settings, "external_search_provider", None) or "").lower() + if not provider: + return "none" + if provider == "exa": + key = getattr(settings, "exa_api_key", None) or "" + label = "Exa.ai" + elif provider == "brave": + key = getattr(settings, "brave_api_key", None) or "" + label = "Brave Search" + else: + return provider + if len(key) >= 4: + return f"{label} (key: ****{key[-4:]})" + return label + + +def _skip_search_provider(settings, ws) -> None: + """Disable ``web_search``. If a provider was configured, confirm first + so a stray keystroke can't silently wipe a working setup.""" + if settings.external_search_provider: + current = _current_search_label(settings) + confirm = _setup_prompt( + f"Disable web_search and clear current config ({current})? [y/N]", + default="N", + ).strip().lower() + if confirm not in ("y", "yes"): + console.print(" [anton.muted]Keeping current search provider.[/]") + return + settings.external_search_provider = None + ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "") + console.print( + " [anton.muted]web_search will be unavailable until you run " + "[bold]anton setup-search[/].[/]" + ) + + def _setup_search_provider(settings, ws) -> None: """Configure an external search provider (Exa.ai or Brave Search). @@ -1126,6 +1169,10 @@ def _setup_search_provider(settings, ws) -> None: """ console.print() console.print("[anton.cyan]Search provider[/]") + console.print( + f" [anton.muted]Currently:[/] {_current_search_label(settings)}" + ) + console.print() console.print( " [bold]1[/] [link=https://exa.ai][anton.cyan]Exa.ai[/][/link] " "[anton.muted]AI-native semantic search[/]" @@ -1134,24 +1181,20 @@ def _setup_search_provider(settings, ws) -> None: " [bold]2[/] [link=https://brave.com/search/api][anton.cyan]Brave Search[/][/link] " "[anton.muted]privacy-focused web search[/]" ) - console.print(" [bold]3[/] [anton.muted]Skip — disable web_search for now[/]") + console.print(" [bold]3[/] [anton.muted]Skip — disable web_search[/]") console.print() - from rich.prompt import Prompt - choice = Prompt.ask( - " Choose", - choices=["1", "2", "3"], - default="1", - console=console, - ) + # ``_setup_prompt`` (prompt_toolkit) gives us ESC-to-go-back support and + # matches every other ``_setup_*`` helper in this file. Loop on invalid + # input — the underlying prompt has no built-in choice validation. + while True: + choice = _setup_prompt("Choose [1/2/3]", default="1").strip() + if choice in ("1", "2", "3"): + break + console.print(" [anton.warning]Please enter 1, 2, or 3.[/]") if choice == "3": - settings.external_search_provider = None - ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "") - console.print( - " [anton.muted]web_search will be unavailable until you run " - "[bold]anton setup-search[/].[/]" - ) + _skip_search_provider(settings, ws) return if choice == "1": @@ -1197,14 +1240,14 @@ def _test(): _validate_with_spinner(console, "Exa.ai", _test) except PermissionError as exc: console.print(f" [anton.error]{exc}[/]") - _handle_search_retry(settings, ws) + _handle_search_retry(settings, ws, last_provider="exa") return except Exception as exc: if _is_transient_error(exc): console.print(" [anton.warning]Search service is temporarily overloaded.[/]") else: console.print(f" [anton.error]Failed:[/] {exc}") - _handle_search_retry(settings, ws) + _handle_search_retry(settings, ws, last_provider="exa") return settings.external_search_provider = "exa" @@ -1253,14 +1296,14 @@ def _test(): _validate_with_spinner(console, "Brave Search", _test) except PermissionError as exc: console.print(f" [anton.error]{exc}[/]") - _handle_search_retry(settings, ws) + _handle_search_retry(settings, ws, last_provider="brave") return except Exception as exc: if _is_transient_error(exc): console.print(" [anton.warning]Search service is temporarily overloaded.[/]") else: console.print(f" [anton.error]Failed:[/] {exc}") - _handle_search_retry(settings, ws) + _handle_search_retry(settings, ws, last_provider="brave") return settings.external_search_provider = "brave" @@ -1270,23 +1313,36 @@ def _test(): console.print(" [anton.success]Brave Search configured.[/]") -def _handle_search_retry(settings, ws) -> None: - """Retry / switch / skip after a search-provider validation failure.""" - from rich.prompt import Prompt - choice = Prompt.ask( - " Retry, switch provider, or skip?", - choices=["retry", "switch", "skip", "r", "s", "k"], - default="retry", - console=console, - ) - if choice in ("retry", "r"): - _setup_search_provider(settings, ws) - elif choice in ("switch", "s"): - # Re-show the picker so the user can pick the other provider. +def _handle_search_retry(settings, ws, *, last_provider: str) -> None: + """Retry the same provider, switch to the other, or skip web_search. + + ``last_provider`` is the provider whose probe just failed (``"exa"`` or + ``"brave"``). ``retry`` re-enters that same helper so the user can fix a + typo without re-picking from the menu; ``switch`` re-shows the picker so + they can try the other provider; ``skip`` clears the config (with the + standard confirm if a previous provider was set). + """ + other = "Brave Search" if last_provider == "exa" else "Exa.ai" + while True: + choice = _setup_prompt( + f"Retry, switch to {other}, or skip? [r/s/k]", + default="r", + ).strip().lower() + if choice in ("r", "retry", "s", "switch", "k", "skip"): + break + console.print(" [anton.warning]Please enter r, s, or k.[/]") + + if choice in ("r", "retry"): + # Jump back into the same provider's helper — no menu detour. + if last_provider == "exa": + _setup_exa(settings, ws) + else: + _setup_brave(settings, ws) + elif choice in ("s", "switch"): + # Show the picker so the user can pick the other provider. _setup_search_provider(settings, ws) else: - settings.external_search_provider = None - ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "") + _skip_search_provider(settings, ws) @app.command("setup") From 46e6052f7986173e8902e55b63ce4fc74ee1936b Mon Sep 17 00:00:00 2001 From: Max Abouchar Date: Wed, 6 May 2026 16:06:54 -0700 Subject: [PATCH 6/7] tests for setup ux --- tests/test_openai_setup.py | 57 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/test_openai_setup.py b/tests/test_openai_setup.py index f6d27331..8c72bc72 100644 --- a/tests/test_openai_setup.py +++ b/tests/test_openai_setup.py @@ -192,3 +192,60 @@ def test_non_azure_endpoint_with_api_version_uses_standard_client(self, monkeypa assert not azure_called assert settings.openai_api_version == "2025-01" + + +# ───────────────────────────────────────────────────────────────────────────── +# Search provider helpers — pure-function corners worth pinning +# ───────────────────────────────────────────────────────────────────────────── + + +class TestCurrentSearchLabel: + """Locks in the masked format used by the ``Currently:`` line in + ``_setup_search_provider`` — a regression here would silently leak a + different number of key characters into the chat output. + """ + + def test_none_when_unconfigured(self): + from anton.cli import _current_search_label + from types import SimpleNamespace + + s = SimpleNamespace(external_search_provider=None, exa_api_key=None, brave_api_key=None) + assert _current_search_label(s) == "none" + + def test_exa_with_full_key_masks_to_last_four(self): + from anton.cli import _current_search_label + from types import SimpleNamespace + + s = SimpleNamespace( + external_search_provider="exa", + exa_api_key="abcd-1234-wxyz", + brave_api_key=None, + ) + assert _current_search_label(s) == "Exa.ai (key: ****wxyz)" + + def test_brave_with_full_key_masks_to_last_four(self): + from anton.cli import _current_search_label + from types import SimpleNamespace + + s = SimpleNamespace( + external_search_provider="brave", + brave_api_key="brv-key-9876", + exa_api_key=None, + ) + assert _current_search_label(s) == "Brave Search (key: ****9876)" + + def test_short_key_omits_the_mask_to_avoid_revealing_length(self): + from anton.cli import _current_search_label + from types import SimpleNamespace + + s = SimpleNamespace(external_search_provider="exa", exa_api_key="ab", brave_api_key=None) + assert _current_search_label(s) == "Exa.ai" + + def test_unknown_provider_falls_back_to_raw_value(self): + from anton.cli import _current_search_label + from types import SimpleNamespace + + s = SimpleNamespace( + external_search_provider="serper", exa_api_key=None, brave_api_key=None + ) + assert _current_search_label(s) == "serper" From 6001003a7df3dbaac8943c7ced6d7fb524b3c6b5 Mon Sep 17 00:00:00 2001 From: Max Abouchar Date: Wed, 6 May 2026 16:23:06 -0700 Subject: [PATCH 7/7] use formatted URLs in web search tests --- tests/test_web_tools.py | 9 +++++++-- tests/test_web_tools_live.py | 21 +++++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/tests/test_web_tools.py b/tests/test_web_tools.py index ade876ef..08d8585c 100644 --- a/tests/test_web_tools.py +++ b/tests/test_web_tools.py @@ -94,9 +94,14 @@ async def _post(self, url, json=None, headers=None): assert captured["headers"]["Authorization"] == "Bearer exa-key-xyz" assert captured["json"]["query"] == "what is anton" assert captured["json"]["num_results"] == 2 - # Output is markdown-ish with both results + # Output is markdown-ish with both results. Assert the URL appears as + # an exact formatted line (" ") rather than via substring `in` + # — the latter would also pass for "https://a.example.evil.com" and + # CodeQL's incomplete-URL-substring-sanitization rule (correctly) + # warns on that pattern even in tests. + out_lines = out.splitlines() assert "Result A" in out - assert "https://a.example" in out + assert " https://a.example" in out_lines assert "Result B" in out async def test_exa_non_200_response_returns_error_string(self): diff --git a/tests/test_web_tools_live.py b/tests/test_web_tools_live.py index 12761aca..78212fbb 100644 --- a/tests/test_web_tools_live.py +++ b/tests/test_web_tools_live.py @@ -49,6 +49,19 @@ def _have(key: str) -> bool: return bool(os.environ.get(key)) +def _has_https_url_line(text: str) -> bool: + """Return True if any line of ``text`` is a formatted URL row. + + The web_search formatter emits URLs on their own indented line — see + ``anton.core.tools.web_tools._search_exa`` / ``_search_brave``. Asserting + against an exact line beginning is both stricter than ``"https://" in out`` + (which would also pass for ``"foo https://x evil"``) and avoids tripping + CodeQL's ``py/incomplete-url-substring-sanitization`` rule, which + correctly flags the substring pattern even in test contexts. + """ + return any(line.lstrip().startswith("https://") for line in text.splitlines()) + + anthropic_only = pytest.mark.skipif( not _have("ANTHROPIC_API_KEY"), reason="ANTHROPIC_API_KEY not set — live test skipped", @@ -488,7 +501,7 @@ async def test_search_returns_real_results(self): assert "Web search results for: 'Anthropic Claude'" in out # At least one https:// URL should appear in the formatted output. - assert "https://" in out + assert _has_https_url_line(out) # And the markdown numbering means we got real hits, not the "no # results" branch. assert "1. **" in out @@ -508,7 +521,7 @@ async def test_handler_dispatch_via_session(self): out = await handle_web_search_fallback( session, {"query": "Anthropic Claude", "max_results": 2} ) - assert "https://" in out + assert _has_https_url_line(out) assert "Anthropic Claude" in out # query echoed in the header @pytest.mark.asyncio @@ -546,7 +559,7 @@ async def test_search_returns_real_results(self): ) assert "Web search results for: 'Anthropic Claude'" in out - assert "https://" in out + assert _has_https_url_line(out) assert "1. **" in out @pytest.mark.asyncio @@ -562,7 +575,7 @@ async def test_handler_dispatch_via_session(self): out = await handle_web_search_fallback( session, {"query": "Anthropic Claude", "max_results": 2} ) - assert "https://" in out + assert _has_https_url_line(out) assert "Anthropic Claude" in out @pytest.mark.asyncio