From af4856826f07e7a25ad702ff11da38dd2a5f1569 Mon Sep 17 00:00:00 2001
From: Max Abouchar <maxabouchar@gmail.com>
Date: Wed, 6 May 2026 15:59:52 -0700
Subject: [PATCH 1/7] ignore agent files

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index fc7b86e0..e3c6cf75 100644
--- a/.gitignore
+++ b/.gitignore
@@ -201,6 +201,11 @@ cython_debug/
 .cursorignore
 .cursorindexingignore
 
+# personal AI agents
+CLAUDE.md
+.claude/*
+AGENTS.md
+
 # Marimo
 marimo/_static/
 marimo/_lsp/

From daaa5e5bd518a757add1358614af0ade8d0990be Mon Sep 17 00:00:00 2001
From: Max Abouchar <maxabouchar@gmail.com>
Date: Wed, 6 May 2026 16:00:39 -0700
Subject: [PATCH 2/7] add native ability to do web search

---
 anton/chat.py                 |   3 +
 anton/chat_session.py         |   3 +
 anton/cli.py                  | 227 ++++++++++++++
 anton/config/settings.py      |  14 +
 anton/core/llm/anthropic.py   |  56 +++-
 anton/core/llm/client.py      |  37 +++
 anton/core/llm/openai.py      | 541 +++++++++++++++++++++++++++++++++-
 anton/core/llm/provider.py    |  19 ++
 anton/core/session.py         |  44 +++
 anton/core/tools/web_tools.py | 290 ++++++++++++++++++
 10 files changed, 1228 insertions(+), 6 deletions(-)
 create mode 100644 anton/core/tools/web_tools.py

diff --git a/anton/chat.py b/anton/chat.py
index e981000f..30c1d928 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -1115,6 +1115,7 @@ async def _chat_loop(
     session = ChatSession(ChatSessionConfig(
         llm_client=state["llm_client"],
         runtime_factory=get_runtime_factory(settings),
+        settings=settings,
         self_awareness=self_awareness,
         cortex=cortex,
         episodic=episodic,
@@ -1128,6 +1129,8 @@ async def _chat_loop(
         session_id=current_session_id,
         proactive_dashboards=settings.proactive_dashboards,
         tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL],
+        web_search_enabled=settings.web_search_enabled,
+        web_fetch_enabled=settings.web_fetch_enabled,
     ))
 
     # Handle --resume flag at startup
diff --git a/anton/chat_session.py b/anton/chat_session.py
index c7daf543..e471cde4 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -105,6 +105,7 @@ def rebuild_session(
     return ChatSession(ChatSessionConfig(
         llm_client=state["llm_client"],
         runtime_factory=get_runtime_factory(settings),
+        settings=settings,
         self_awareness=self_awareness,
         cortex=cortex,
         episodic=episodic,
@@ -117,4 +118,6 @@ def rebuild_session(
         history_store=history_store,
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        web_search_enabled=settings.web_search_enabled,
+        web_fetch_enabled=settings.web_fetch_enabled,
     ))
diff --git a/anton/cli.py b/anton/cli.py
index 74b9cfc2..a09f1a04 100644
--- a/anton/cli.py
+++ b/anton/cli.py
@@ -1087,6 +1087,207 @@ def _test():
     ws.set_secret("ANTON_PLANNING_MODEL", model)
     ws.set_secret("ANTON_CODING_MODEL", model)
 
+    # The custom endpoint is generic openai-compatible (i.e. NOT mdb.ai
+    # passthrough), so the LLM provider doesn't expose web_search natively.
+    # Offer to configure Exa or Brave so the agent has search available.
+    # Skip the prompt in non-interactive contexts (tests, CI) — the user can
+    # always run ``anton setup-search`` later.
+    if not _looks_like_mdb_ai(base_url, settings) and sys.stdout.isatty():
+        console.print()
+        console.print(
+            "  [anton.muted]Web search needs an external provider on this endpoint. "
+            "You can configure one now or run [bold]anton setup-search[/] later.[/]"
+        )
+        try:
+            _setup_search_provider(settings, ws)
+        except _SetupRetry:
+            # User pressed ESC out of the search-provider step — that's fine,
+            # the LLM is already configured. They can rerun `anton setup-search`.
+            pass
+
+
+def _looks_like_mdb_ai(base_url: str, settings) -> bool:
+    """Match the same condition LLMClient.from_settings uses for mdb.ai."""
+    base = (base_url or "").rstrip("/").lower()
+    minds = (getattr(settings, "minds_url", None) or "").rstrip("/").lower()
+    if not minds:
+        return False
+    return base == minds or base == f"{minds}/api/v1"
+
+
+def _setup_search_provider(settings, ws) -> None:
+    """Configure an external search provider (Exa.ai or Brave Search).
+
+    Used by Case 3 in the web-tools design (generic OpenAI-compatible endpoints
+    that don't have a native ``web_search`` capability). The user picks a
+    provider and supplies a key; we validate the key with a probe call before
+    persisting it to the global ``~/.anton/.env`` so it survives across
+    sessions and workspaces — same scope as the LLM provider keys.
+    """
+    console.print()
+    console.print("[anton.cyan]Search provider[/]")
+    console.print(
+        "  [bold]1[/]  [link=https://exa.ai][anton.cyan]Exa.ai[/][/link] "
+        "[anton.muted]AI-native semantic search[/]"
+    )
+    console.print(
+        "  [bold]2[/]  [link=https://brave.com/search/api][anton.cyan]Brave Search[/][/link] "
+        "[anton.muted]privacy-focused web search[/]"
+    )
+    console.print("  [bold]3[/]  [anton.muted]Skip — disable web_search for now[/]")
+    console.print()
+
+    from rich.prompt import Prompt
+    choice = Prompt.ask(
+        "  Choose",
+        choices=["1", "2", "3"],
+        default="1",
+        console=console,
+    )
+
+    if choice == "3":
+        settings.external_search_provider = None
+        ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "")
+        console.print(
+            "  [anton.muted]web_search will be unavailable until you run "
+            "[bold]anton setup-search[/].[/]"
+        )
+        return
+
+    if choice == "1":
+        _setup_exa(settings, ws)
+    else:
+        _setup_brave(settings, ws)
+
+
+def _setup_exa(settings, ws) -> None:
+    """Collect and validate an Exa.ai API key."""
+    console.print()
+    console.print(
+        "  [anton.muted]Get an API key at "
+        "[link=https://dashboard.exa.ai/api-keys]"
+        "[anton.cyan]dashboard.exa.ai/api-keys[/][/link][/]"
+    )
+    console.print()
+
+    while True:
+        api_key = _setup_prompt("Exa API key", is_password=True)
+        if api_key.strip():
+            break
+        console.print("  [anton.warning]Please enter your API key.[/]")
+    api_key = api_key.strip()
+
+    try:
+
+        def _test():
+            # Sync httpx call — _validate_with_spinner runs us inside a Live.
+            import httpx as _httpx
+
+            resp = _httpx.post(
+                "https://api.exa.ai/search",
+                headers={"Authorization": f"Bearer {api_key}"},
+                json={"query": "anton ping", "num_results": 1},
+                timeout=15.0,
+            )
+            if resp.status_code in (401, 403):
+                raise PermissionError("Authentication failed. Check your API key.")
+            if resp.status_code >= 400:
+                raise RuntimeError(f"HTTP {resp.status_code}: {resp.text[:200]}")
+
+        _validate_with_spinner(console, "Exa.ai", _test)
+    except PermissionError as exc:
+        console.print(f"  [anton.error]{exc}[/]")
+        _handle_search_retry(settings, ws)
+        return
+    except Exception as exc:
+        if _is_transient_error(exc):
+            console.print("  [anton.warning]Search service is temporarily overloaded.[/]")
+        else:
+            console.print(f"  [anton.error]Failed:[/] {exc}")
+        _handle_search_retry(settings, ws)
+        return
+
+    settings.external_search_provider = "exa"
+    settings.exa_api_key = api_key
+    ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "exa")
+    ws.set_secret("ANTON_EXA_API_KEY", api_key)
+    console.print("  [anton.success]Exa.ai configured.[/]")
+
+
+def _setup_brave(settings, ws) -> None:
+    """Collect and validate a Brave Search API key."""
+    console.print()
+    console.print(
+        "  [anton.muted]Get an API key at "
+        "[link=https://api.search.brave.com/app/keys]"
+        "[anton.cyan]api.search.brave.com/app/keys[/][/link][/]"
+    )
+    console.print()
+
+    while True:
+        api_key = _setup_prompt("Brave Search API key", is_password=True)
+        if api_key.strip():
+            break
+        console.print("  [anton.warning]Please enter your API key.[/]")
+    api_key = api_key.strip()
+
+    try:
+
+        def _test():
+            import httpx as _httpx
+
+            resp = _httpx.get(
+                "https://api.search.brave.com/res/v1/web/search",
+                headers={
+                    "X-Subscription-Token": api_key,
+                    "Accept": "application/json",
+                },
+                params={"q": "anton ping", "count": 1},
+                timeout=15.0,
+            )
+            if resp.status_code in (401, 403):
+                raise PermissionError("Authentication failed. Check your API key.")
+            if resp.status_code >= 400:
+                raise RuntimeError(f"HTTP {resp.status_code}: {resp.text[:200]}")
+
+        _validate_with_spinner(console, "Brave Search", _test)
+    except PermissionError as exc:
+        console.print(f"  [anton.error]{exc}[/]")
+        _handle_search_retry(settings, ws)
+        return
+    except Exception as exc:
+        if _is_transient_error(exc):
+            console.print("  [anton.warning]Search service is temporarily overloaded.[/]")
+        else:
+            console.print(f"  [anton.error]Failed:[/] {exc}")
+        _handle_search_retry(settings, ws)
+        return
+
+    settings.external_search_provider = "brave"
+    settings.brave_api_key = api_key
+    ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "brave")
+    ws.set_secret("ANTON_BRAVE_API_KEY", api_key)
+    console.print("  [anton.success]Brave Search configured.[/]")
+
+
+def _handle_search_retry(settings, ws) -> None:
+    """Retry / switch / skip after a search-provider validation failure."""
+    from rich.prompt import Prompt
+    choice = Prompt.ask(
+        "  Retry, switch provider, or skip?",
+        choices=["retry", "switch", "skip", "r", "s", "k"],
+        default="retry",
+        console=console,
+    )
+    if choice in ("retry", "r"):
+        _setup_search_provider(settings, ws)
+    elif choice in ("switch", "s"):
+        # Re-show the picker so the user can pick the other provider.
+        _setup_search_provider(settings, ws)
+    else:
+        settings.external_search_provider = None
+        ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "")
+
 
 @app.command("setup")
 def setup(ctx: typer.Context) -> None:
@@ -1097,6 +1298,32 @@ def setup(ctx: typer.Context) -> None:
     console.print("[anton.success]Setup complete.[/]")
 
 
+@app.command("setup-search")
+def setup_search(ctx: typer.Context) -> None:
+    """Configure an external search provider (Exa.ai or Brave Search).
+
+    Only used when the active LLM endpoint is a generic OpenAI-compatible
+    third-party (i.e. NOT Anthropic, OpenAI BYOK, or the mdb.ai passthrough —
+    those expose web_search natively on the LLM provider's key). The chosen
+    key is persisted to the global ``~/.anton/.env`` so it survives across
+    sessions and workspaces, exactly like LLM provider keys.
+    """
+    from pathlib import Path
+    from anton.workspace import Workspace
+
+    settings = _get_settings(ctx)
+    _ensure_workspace(settings)
+    # Search-provider keys live globally — same scope as LLM keys.
+    global_ws = Workspace(Path.home())
+    try:
+        _setup_search_provider(settings, global_ws)
+    except _SetupRetry:
+        console.print("  [anton.muted]Cancelled.[/]")
+        return
+    global_ws.apply_env_to_process()
+    console.print("[anton.success]Search provider setup complete.[/]")
+
+
 @app.command("dashboard")
 def dashboard() -> None:
     """Show the Anton status dashboard."""
diff --git a/anton/config/settings.py b/anton/config/settings.py
index acc8130b..fcfb66f9 100644
--- a/anton/config/settings.py
+++ b/anton/config/settings.py
@@ -37,6 +37,20 @@ class AntonSettings(CoreSettings):
     openai_base_url: str | None = None
     openai_api_version: str | None = None  # Azure api-version query param
 
+    # Web tools — on by default. For LLM providers that ship native server-side
+    # web search/fetch (Anthropic, OpenAI, mdb.ai passthrough), the tools execute
+    # inside the provider on the user's existing key. For generic
+    # openai-compatible endpoints, web_search needs an external provider key
+    # (Exa or Brave); web_fetch always falls back to stdlib HTTP.
+    web_search_enabled: bool = True
+    web_fetch_enabled: bool = True
+
+    # Case 3 fallback — only consulted when the LLM provider lacks native web
+    # search and the user is on a generic OpenAI-compatible endpoint.
+    external_search_provider: str | None = None  # "exa" | "brave" | None
+    exa_api_key: str | None = None
+    brave_api_key: str | None = None
+
     memory_enabled: bool = True
     memory_dir: str = ".anton"
 
diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py
index 6c6114d8..a80e41d4 100644
--- a/anton/core/llm/anthropic.py
+++ b/anton/core/llm/anthropic.py
@@ -21,10 +21,45 @@
     compute_context_pressure,
 )
 
+# Native server-side web tool type strings exposed by the Anthropic Messages API.
+# The model invokes these inside the provider — Anton's tool-dispatch loop never
+# sees a tool_use for them; the model's final text content already incorporates
+# the search/fetch results. Bump these constants when newer revisions ship.
+ANTHROPIC_WEB_SEARCH_TOOL_TYPE = "web_search_20250305"
+ANTHROPIC_WEB_FETCH_TOOL_TYPE = "web_fetch_20250910"
+# web_fetch is gated behind a beta header; web_search is GA and needs no header.
+ANTHROPIC_WEB_FETCH_BETA_HEADER = "web-fetch-2025-09-10"
+
+
+def _build_native_web_tools(
+    native_web_tools: set[str] | None,
+) -> tuple[list[dict], list[str]]:
+    """Translate the unified web-tool set into Anthropic server-tool entries.
+
+    Returns ``(tool_entries, beta_headers)`` — entries to append to the
+    Messages API ``tools`` array, and any ``anthropic-beta`` header values that
+    must be set for the call.
+    """
+    if not native_web_tools:
+        return [], []
+    entries: list[dict] = []
+    beta: list[str] = []
+    if "web_search" in native_web_tools:
+        entries.append({"type": ANTHROPIC_WEB_SEARCH_TOOL_TYPE, "name": "web_search"})
+    if "web_fetch" in native_web_tools:
+        entries.append({"type": ANTHROPIC_WEB_FETCH_TOOL_TYPE, "name": "web_fetch"})
+        beta.append(ANTHROPIC_WEB_FETCH_BETA_HEADER)
+    return entries, beta
+
 
 class AnthropicProvider(LLMProvider):
     name: str = "anthropic"
 
+    def native_web_tools(self) -> set[str]:
+        # Anthropic's Messages API ships both server-side web_search and
+        # web_fetch tools; we route both through the provider when enabled.
+        return {"web_search", "web_fetch"}
+
     def __init__(self, api_key: str | None = None) -> None:
         self._api_key = api_key
         kwargs = {}
@@ -44,17 +79,24 @@ async def complete(
         tools: list[dict] | None = None,
         tool_choice: dict | None = None,
         max_tokens: int = 4096,
+        native_web_tools: set[str] | None = None,
     ) -> LLMResponse:
+        web_entries, beta_headers = _build_native_web_tools(native_web_tools)
+        merged_tools = list(tools or []) + web_entries
+
         kwargs: dict = {
             "model": model,
             "max_tokens": max_tokens,
             "system": system,
             "messages": messages,
         }
-        if tools:
-            kwargs["tools"] = tools
+        if merged_tools:
+            kwargs["tools"] = merged_tools
         if tool_choice:
             kwargs["tool_choice"] = tool_choice
+        if beta_headers:
+            # Anthropic accepts a comma-separated list of beta features.
+            kwargs["extra_headers"] = {"anthropic-beta": ",".join(beta_headers)}
 
         try:
             response = await self._client.messages.create(**kwargs)
@@ -113,15 +155,21 @@ async def stream(
         messages: list[dict],
         tools: list[dict] | None = None,
         max_tokens: int = 4096,
+        native_web_tools: set[str] | None = None,
     ) -> AsyncIterator[StreamEvent]:
+        web_entries, beta_headers = _build_native_web_tools(native_web_tools)
+        merged_tools = list(tools or []) + web_entries
+
         kwargs: dict = {
             "model": model,
             "max_tokens": max_tokens,
             "system": system,
             "messages": messages,
         }
-        if tools:
-            kwargs["tools"] = tools
+        if merged_tools:
+            kwargs["tools"] = merged_tools
+        if beta_headers:
+            kwargs["extra_headers"] = {"anthropic-beta": ",".join(beta_headers)}
 
         content_text = ""
         tool_calls: list[ToolCall] = []
diff --git a/anton/core/llm/client.py b/anton/core/llm/client.py
index 92a50759..9d9ffb11 100644
--- a/anton/core/llm/client.py
+++ b/anton/core/llm/client.py
@@ -9,6 +9,29 @@
     from anton.config.settings import AntonSettings
 
 
+def _resolve_openai_compatible_flavor(settings: AntonSettings) -> str:
+    """Distinguish mdb.ai passthrough from a generic openai-compatible endpoint.
+
+    The "Minds-Enterprise-Cloud" setup path writes ``openai_base_url =
+    f"{minds_url.rstrip('/')}/api/v1"`` and ``openai_api_key = minds_api_key``
+    (see ``AntonSettings.model_post_init``). When that exact pairing matches
+    the user's current settings, the OpenAI provider is talking to mdb.ai and
+    can therefore use the chat.completions native web tool passthrough. Any
+    other base URL is a generic third-party endpoint that needs the
+    handler-dispatched fallback at the session layer.
+
+    No new env var is introduced — we infer flavor purely from the existing
+    config the setup flow already produces.
+    """
+    from .openai import OpenAIProvider
+
+    base = (getattr(settings, "openai_base_url", None) or "").rstrip("/").lower()
+    minds = (getattr(settings, "minds_url", None) or "").rstrip("/").lower()
+    if minds and (base == minds or base == f"{minds}/api/v1"):
+        return OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH
+    return OpenAIProvider.FLAVOR_OPENAI_COMPATIBLE_GENERIC
+
+
 class LLMClient:
     def __init__(
         self,
@@ -32,6 +55,7 @@ async def plan(
         messages: list[dict],
         tools: list[dict] | None = None,
         max_tokens: int | None = None,
+        native_web_tools: set[str] | None = None,
     ) -> LLMResponse:
         return await self._planning_provider.complete(
             model=self._planning_model,
@@ -39,6 +63,7 @@ async def plan(
             messages=messages,
             tools=tools,
             max_tokens=max_tokens or self._max_tokens,
+            native_web_tools=native_web_tools,
         )
 
     async def plan_stream(
@@ -48,6 +73,7 @@ async def plan_stream(
         messages: list[dict],
         tools: list[dict] | None = None,
         max_tokens: int | None = None,
+        native_web_tools: set[str] | None = None,
     ) -> AsyncIterator[StreamEvent]:
         async for event in self._planning_provider.stream(
             model=self._planning_model,
@@ -55,9 +81,15 @@ async def plan_stream(
             messages=messages,
             tools=tools,
             max_tokens=max_tokens or self._max_tokens,
+            native_web_tools=native_web_tools,
         ):
             yield event
 
+    @property
+    def planning_provider(self) -> LLMProvider:
+        """The LLM provider used for planning / the user-facing turn loop."""
+        return self._planning_provider
+
     @property
     def coding_provider(self) -> LLMProvider:
         """The LLM provider used for coding/skill execution."""
@@ -75,6 +107,7 @@ async def code(
         messages: list[dict],
         tools: list[dict] | None = None,
         max_tokens: int | None = None,
+        native_web_tools: set[str] | None = None,
     ) -> LLMResponse:
         return await self._coding_provider.complete(
             model=self._coding_model,
@@ -82,6 +115,7 @@ async def code(
             messages=messages,
             tools=tools,
             max_tokens=max_tokens or self._max_tokens,
+            native_web_tools=native_web_tools,
         )
 
     async def _generate_object_with(
@@ -219,6 +253,7 @@ def from_settings(cls, settings: AntonSettings) -> LLMClient:
         from .openai import OpenAIProvider
 
         api_version = getattr(settings, "openai_api_version", None)
+        compatible_flavor = _resolve_openai_compatible_flavor(settings)
         providers = {
             "anthropic": lambda: AnthropicProvider(api_key=settings.anthropic_api_key),
             "openai": lambda: OpenAIProvider(
@@ -226,6 +261,7 @@ def from_settings(cls, settings: AntonSettings) -> LLMClient:
                 base_url=settings.openai_base_url,
                 ssl_verify=settings.minds_ssl_verify,
                 api_version=api_version,
+                flavor=OpenAIProvider.FLAVOR_OPENAI,
             ),
             "openai-compatible": lambda: OpenAIProvider(
                 api_key=settings.openai_api_key,
@@ -233,6 +269,7 @@ def from_settings(cls, settings: AntonSettings) -> LLMClient:
                 ssl_verify=settings.minds_ssl_verify,
                 api_version=api_version,
                 supports_vision=False,
+                flavor=compatible_flavor,
             ),
         }
 
diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py
index af93ec72..bf49ffef 100644
--- a/anton/core/llm/openai.py
+++ b/anton/core/llm/openai.py
@@ -186,6 +186,220 @@ def _is_azure_endpoint(url: str | None) -> bool:
     return host.endswith(".openai.azure.com") or host.endswith(".cognitiveservices.azure.com")
 
 
+# ─────────────────────────────────────────────────────────────────────────────
+# Responses API translation
+#
+# Used only for ``flavor="openai"`` (BYOK direct OpenAI). The Responses API is
+# OpenAI's recommended transport going forward, supports native server-side
+# web_search, and has a different request/response shape than chat.completions
+# (flat function-tool params, ``input``/``instructions`` instead of
+# ``messages``/``system``, ``output`` array instead of ``choices``).
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def _translate_tools_to_responses(tools: list[dict]) -> list[dict]:
+    """Anthropic tool format -> OpenAI Responses API function-tool format.
+
+    The Responses API uses a flat shape (``{"type": "function", "name": ...,
+    "description": ..., "parameters": ...}``) rather than the chat.completions
+    nested shape under a ``function`` key.
+    """
+    result: list[dict] = []
+    for tool in tools:
+        result.append(
+            {
+                "type": "function",
+                "name": tool["name"],
+                "description": tool.get("description", ""),
+                "parameters": tool.get("input_schema", {}),
+            }
+        )
+    return result
+
+
+def _translate_tool_choice_to_responses(tool_choice: dict) -> dict | str:
+    """Anthropic tool_choice -> OpenAI Responses API tool_choice."""
+    tc_type = tool_choice.get("type")
+    if tc_type == "tool":
+        return {"type": "function", "name": tool_choice["name"]}
+    if tc_type == "any":
+        return "required"
+    if tc_type == "auto":
+        return "auto"
+    return "auto"
+
+
+def _translate_messages_to_responses_input(
+    messages: list[dict], supports_vision: bool = True
+) -> list[dict]:
+    """Convert Anthropic-style messages to Responses API ``input`` items.
+
+    The Responses API accepts a list of items where:
+
+    - User/assistant text messages → ``{"role": ..., "content": ..., "type": "message"}``
+    - Assistant tool calls → ``{"type": "function_call", "call_id": ..., "name": ..., "arguments": ...}``
+    - Tool results → ``{"type": "function_call_output", "call_id": ..., "output": ...}``
+
+    The system prompt is passed via the top-level ``instructions`` parameter
+    rather than as a message item, so it is *not* emitted here.
+    """
+    items: list[dict] = []
+
+    for msg in messages:
+        role = msg["role"]
+        content = msg.get("content")
+
+        # Plain string content — pass through as a simple message item.
+        if isinstance(content, str):
+            items.append({"role": role, "content": content, "type": "message"})
+            continue
+
+        if isinstance(content, list):
+            if role == "assistant":
+                items.extend(_translate_assistant_blocks_to_responses(content))
+            elif role == "user":
+                items.extend(
+                    _translate_user_blocks_to_responses(
+                        content, supports_vision=supports_vision
+                    )
+                )
+            else:
+                # Fallback: join text blocks
+                text = " ".join(
+                    b.get("text", "") for b in content if b.get("type") == "text"
+                )
+                items.append({"role": role, "content": text or "", "type": "message"})
+            continue
+
+        items.append(
+            {"role": role, "content": str(content) if content else "", "type": "message"}
+        )
+
+    return items
+
+
+def _translate_assistant_blocks_to_responses(blocks: list[dict]) -> list[dict]:
+    """Convert assistant content blocks to Responses API input items.
+
+    Tool-use blocks become ``function_call`` items; text blocks become a single
+    assistant message item. The ordering matters less here than in
+    chat.completions because each item is independent.
+    """
+    text_parts: list[str] = []
+    tool_calls: list[dict] = []
+
+    for block in blocks:
+        if block.get("type") == "text":
+            text_parts.append(block["text"])
+        elif block.get("type") == "tool_use":
+            tool_calls.append(
+                {
+                    "type": "function_call",
+                    "call_id": block["id"],
+                    "name": block["name"],
+                    "arguments": json.dumps(block.get("input", {})),
+                }
+            )
+
+    items: list[dict] = []
+    if text_parts:
+        items.append(
+            {"role": "assistant", "content": "\n".join(text_parts), "type": "message"}
+        )
+    items.extend(tool_calls)
+    return items
+
+
+def _translate_user_blocks_to_responses(
+    blocks: list[dict], supports_vision: bool = True
+) -> list[dict]:
+    """Convert user content blocks (text, tool_result, image) to Responses API items."""
+    result: list[dict] = []
+    content_parts: list[dict] = []
+
+    for block in blocks:
+        if block.get("type") == "tool_result":
+            # Flush any accumulated content parts first as a user message.
+            if content_parts:
+                result.append(_user_message_from_parts(content_parts))
+                content_parts = []
+            tool_content = block.get("content", "")
+            if isinstance(tool_content, list):
+                tool_content = "\n".join(
+                    b.get("text", "") for b in tool_content if b.get("type") == "text"
+                )
+            result.append(
+                {
+                    "type": "function_call_output",
+                    "call_id": block["tool_use_id"],
+                    "output": str(tool_content),
+                }
+            )
+        elif block.get("type") == "text":
+            content_parts.append({"type": "input_text", "text": block.get("text", "")})
+        elif block.get("type") == "image" and supports_vision:
+            source = block.get("source", {})
+            if source.get("type") == "base64":
+                media_type = source.get("media_type", "image/png")
+                data = source.get("data", "")
+                content_parts.append(
+                    {
+                        "type": "input_image",
+                        "image_url": f"data:{media_type};base64,{data}",
+                    }
+                )
+
+    if content_parts:
+        result.append(_user_message_from_parts(content_parts))
+
+    return result
+
+
+def _user_message_from_parts(parts: list[dict]) -> dict:
+    """Build a Responses API user message from accumulated content parts.
+
+    If the message is text-only, flatten to a plain string for compatibility;
+    otherwise emit the structured content list (images + text).
+    """
+    if all(p.get("type") == "input_text" for p in parts):
+        return {
+            "role": "user",
+            "content": "\n".join(p["text"] for p in parts),
+            "type": "message",
+        }
+    return {"role": "user", "content": parts, "type": "message"}
+
+
+def _native_web_entries_for_flavor(
+    flavor: str, native_web_tools: set[str] | None
+) -> list[dict]:
+    """Build the list of native server-tool entries to append to the tools array.
+
+    - ``flavor="openai"`` (Responses API): ``{"type": "web_search"}`` covers
+      both search and fetch (per OpenAI docs, web_search handles fetch implicitly).
+    - ``flavor="minds-passthrough"`` (chat.completions): mdb.ai accepts
+      ``{"type": "web_search"}`` and ``{"type": "fetch"}`` directly in the
+      OpenAI-shaped tools array.
+    - ``flavor="openai-compatible-generic"``: never returns native entries —
+      these endpoints get the handler-dispatched fallback at the session layer.
+    """
+    if not native_web_tools:
+        return []
+    if flavor == "openai":
+        # Single Responses API tool covers search + fetch.
+        if "web_search" in native_web_tools or "web_fetch" in native_web_tools:
+            return [{"type": "web_search"}]
+        return []
+    if flavor == "minds-passthrough":
+        entries: list[dict] = []
+        if "web_search" in native_web_tools:
+            entries.append({"type": "web_search"})
+        if "web_fetch" in native_web_tools:
+            entries.append({"type": "fetch"})
+        return entries
+    return []
+
+
 def build_chat_completion_kwargs(
     *,
     model: str,
@@ -208,6 +422,13 @@ def build_chat_completion_kwargs(
 class OpenAIProvider(LLMProvider):
     name: str = "openai"
 
+    # Three flavors distinguish the transport + native-tool behavior. See
+    # ``_native_web_entries_for_flavor`` for the per-flavor tool injection rules,
+    # and the ``complete``/``stream`` methods for the per-flavor transport split.
+    FLAVOR_OPENAI = "openai"  # Direct OpenAI BYOK — uses Responses API.
+    FLAVOR_MINDS_PASSTHROUGH = "minds-passthrough"  # mdb.ai — chat.completions w/ native tools.
+    FLAVOR_OPENAI_COMPATIBLE_GENERIC = "openai-compatible-generic"  # third-party.
+
     def __init__(
         self,
         api_key: str | None = None,
@@ -215,12 +436,14 @@ def __init__(
         ssl_verify: bool = True,
         api_version: str | None = None,
         supports_vision: bool = True,
+        flavor: str = FLAVOR_OPENAI_COMPATIBLE_GENERIC,
     ) -> None:
         self._api_key = api_key
         self._base_url = base_url
         self._ssl_verify = ssl_verify
         self._api_version = api_version
         self._supports_vision = supports_vision
+        self._flavor = flavor
 
         import httpx
 
@@ -254,6 +477,16 @@ def export_connection_info(self) -> ProviderConnectionInfo:
             api_version=self._api_version,
         )
 
+    def native_web_tools(self) -> set[str]:
+        # BYOK OpenAI exposes web_search via Responses API (which covers fetch
+        # implicitly). The mdb.ai passthrough accepts both web_search and fetch
+        # directly in the chat.completions tools array. Generic OpenAI-compatible
+        # endpoints have no native support — the session falls back to handler
+        # ToolDefs (Exa/Brave for search, stdlib HTTP for fetch).
+        if self._flavor in (self.FLAVOR_OPENAI, self.FLAVOR_MINDS_PASSTHROUGH):
+            return {"web_search", "web_fetch"}
+        return set()
+
     async def complete(
         self,
         *,
@@ -263,7 +496,19 @@ async def complete(
         tools: list[dict] | None = None,
         tool_choice: dict | None = None,
         max_tokens: int = 4096,
+        native_web_tools: set[str] | None = None,
     ) -> LLMResponse:
+        if self._flavor == self.FLAVOR_OPENAI:
+            return await self._complete_via_responses(
+                model=model,
+                system=system,
+                messages=messages,
+                tools=tools,
+                tool_choice=tool_choice,
+                max_tokens=max_tokens,
+                native_web_tools=native_web_tools,
+            )
+
         oai_messages = _translate_messages(system, messages, supports_vision=self._supports_vision)
 
         kwargs = build_chat_completion_kwargs(
@@ -271,8 +516,14 @@ async def complete(
             messages=oai_messages,
             max_tokens=max_tokens,
         )
+        merged_tools: list[dict] = []
         if tools:
-            kwargs["tools"] = _translate_tools(tools)
+            merged_tools.extend(_translate_tools(tools))
+        # Native server-tool entries (mdb.ai passthrough) are appended *raw* so
+        # they aren't routed through the function-shape translation.
+        merged_tools.extend(_native_web_entries_for_flavor(self._flavor, native_web_tools))
+        if merged_tools:
+            kwargs["tools"] = merged_tools
         if tool_choice:
             kwargs["tool_choice"] = _translate_tool_choice(tool_choice)
 
@@ -341,7 +592,20 @@ async def stream(
         messages: list[dict],
         tools: list[dict] | None = None,
         max_tokens: int = 4096,
+        native_web_tools: set[str] | None = None,
     ) -> AsyncIterator[StreamEvent]:
+        if self._flavor == self.FLAVOR_OPENAI:
+            async for event in self._stream_via_responses(
+                model=model,
+                system=system,
+                messages=messages,
+                tools=tools,
+                max_tokens=max_tokens,
+                native_web_tools=native_web_tools,
+            ):
+                yield event
+            return
+
         oai_messages = _translate_messages(system, messages, supports_vision=self._supports_vision)
 
         kwargs = build_chat_completion_kwargs(
@@ -350,8 +614,12 @@ async def stream(
             max_tokens=max_tokens,
             stream=True,
         )
+        merged_tools: list[dict] = []
         if tools:
-            kwargs["tools"] = _translate_tools(tools)
+            merged_tools.extend(_translate_tools(tools))
+        merged_tools.extend(_native_web_entries_for_flavor(self._flavor, native_web_tools))
+        if merged_tools:
+            kwargs["tools"] = merged_tools
 
         content_text = ""
         tool_calls: list[ToolCall] = []
@@ -461,3 +729,272 @@ async def stream(
                 stop_reason=stop_reason,
             )
         )
+
+    # ─────────────────────────────────────────────────────────────────────
+    # Responses API path — used for ``flavor="openai"`` (BYOK direct OpenAI)
+    # ─────────────────────────────────────────────────────────────────────
+
+    def _build_responses_kwargs(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None,
+        tool_choice: dict | None,
+        max_tokens: int,
+        native_web_tools: set[str] | None,
+    ) -> dict:
+        """Common Responses API kwargs for both ``complete`` and ``stream``."""
+        responses_input = _translate_messages_to_responses_input(
+            messages, supports_vision=self._supports_vision
+        )
+        kwargs: dict = {
+            "model": model,
+            "input": responses_input,
+            "max_output_tokens": max_tokens,
+        }
+        if system:
+            kwargs["instructions"] = system
+
+        merged_tools: list[dict] = []
+        if tools:
+            merged_tools.extend(_translate_tools_to_responses(tools))
+        merged_tools.extend(_native_web_entries_for_flavor(self._flavor, native_web_tools))
+        if merged_tools:
+            kwargs["tools"] = merged_tools
+        if tool_choice:
+            kwargs["tool_choice"] = _translate_tool_choice_to_responses(tool_choice)
+        return kwargs
+
+    async def _complete_via_responses(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None,
+        tool_choice: dict | None,
+        max_tokens: int,
+        native_web_tools: set[str] | None,
+    ) -> LLMResponse:
+        kwargs = self._build_responses_kwargs(
+            model=model,
+            system=system,
+            messages=messages,
+            tools=tools,
+            tool_choice=tool_choice,
+            max_tokens=max_tokens,
+            native_web_tools=native_web_tools,
+        )
+
+        try:
+            response = await self._client.responses.create(**kwargs)
+        except openai.BadRequestError as exc:
+            msg = str(exc).lower()
+            if "context_length_exceeded" in msg or "maximum context length" in msg:
+                raise ContextOverflowError(str(exc)) from exc
+            raise
+        except openai.APIStatusError as exc:
+            if (
+                exc.status_code == 429
+                and isinstance(exc.body, dict)
+                and exc.body.get("detail")
+            ):
+                msg = f"Server returned 429 — {exc.body['detail']}"
+                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                from .provider import TokenLimitExceeded
+
+                raise TokenLimitExceeded(msg) from exc
+            else:
+                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
+            raise ConnectionError(msg) from exc
+        except openai.APIConnectionError as exc:
+            raise ConnectionError(
+                "Could not reach the LLM server — check your connection or try again in a moment."
+            ) from exc
+
+        return _parse_response_object(response, model)
+
+    async def _stream_via_responses(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None,
+        max_tokens: int,
+        native_web_tools: set[str] | None,
+    ) -> AsyncIterator[StreamEvent]:
+        kwargs = self._build_responses_kwargs(
+            model=model,
+            system=system,
+            messages=messages,
+            tools=tools,
+            tool_choice=None,  # streaming path does not force tool_choice today
+            max_tokens=max_tokens,
+            native_web_tools=native_web_tools,
+        )
+        kwargs["stream"] = True
+
+        content_text = ""
+        tool_calls: list[ToolCall] = []
+        input_tokens = 0
+        output_tokens = 0
+        stop_reason: str | None = None
+
+        # Map output_index → in-flight function-call state. Responses API uses
+        # a per-output_index stable handle for streaming arguments.
+        fc_state: dict[int, dict] = {}
+
+        try:
+            stream = await self._client.responses.create(**kwargs)
+            async for event in stream:
+                etype = getattr(event, "type", "")
+
+                # Text deltas
+                if etype == "response.output_text.delta":
+                    delta = getattr(event, "delta", "")
+                    if delta:
+                        content_text += delta
+                        yield StreamTextDelta(text=delta)
+
+                # New output item (could be a function_call, server-tool call,
+                # or message). We only need to react when a function_call
+                # appears so we can emit the StreamToolUseStart with id+name.
+                elif etype == "response.output_item.added":
+                    item = getattr(event, "item", None)
+                    item_type = getattr(item, "type", None)
+                    if item_type == "function_call":
+                        idx = event.output_index
+                        call_id = getattr(item, "call_id", "") or getattr(item, "id", "")
+                        name = getattr(item, "name", "") or ""
+                        fc_state[idx] = {"call_id": call_id, "name": name, "args_parts": []}
+                        if call_id and name:
+                            yield StreamToolUseStart(id=call_id, name=name)
+
+                # Function-call argument deltas
+                elif etype == "response.function_call_arguments.delta":
+                    idx = event.output_index
+                    delta = getattr(event, "delta", "")
+                    info = fc_state.get(idx)
+                    if info is None:
+                        # output_item.added didn't surface this call yet — buffer
+                        info = {"call_id": "", "name": "", "args_parts": []}
+                        fc_state[idx] = info
+                    info["args_parts"].append(delta)
+                    if info["call_id"]:
+                        yield StreamToolUseDelta(id=info["call_id"], json_delta=delta)
+
+                # Function-call arguments complete — finalize this call.
+                elif etype == "response.function_call_arguments.done":
+                    idx = event.output_index
+                    info = fc_state.get(idx)
+                    if info is None:
+                        continue
+                    raw_json = "".join(info["args_parts"]) or getattr(
+                        event, "arguments", ""
+                    )
+                    parsed = json.loads(raw_json) if raw_json else {}
+                    tool_calls.append(
+                        ToolCall(
+                            id=info["call_id"], name=info["name"], input=parsed
+                        )
+                    )
+                    if info["call_id"]:
+                        yield StreamToolUseEnd(id=info["call_id"])
+
+                # Final completion event carries the resolved Response object
+                # with usage/stop_reason. We trust the structured parse here in
+                # case the streamed deltas missed something (e.g. server-tool
+                # calls produce text we already streamed but no function call).
+                elif etype == "response.completed":
+                    final_response = getattr(event, "response", None)
+                    if final_response is not None:
+                        usage = getattr(final_response, "usage", None)
+                        if usage is not None:
+                            input_tokens = getattr(usage, "input_tokens", 0) or 0
+                            output_tokens = getattr(usage, "output_tokens", 0) or 0
+                        stop_reason = getattr(final_response, "status", None)
+        except openai.BadRequestError as exc:
+            msg = str(exc).lower()
+            if "context_length_exceeded" in msg or "maximum context length" in msg:
+                raise ContextOverflowError(str(exc)) from exc
+            raise
+        except openai.APIStatusError as exc:
+            if (
+                exc.status_code == 429
+                and isinstance(exc.body, dict)
+                and exc.body.get("detail")
+            ):
+                msg = f"Server returned 429 — {exc.body['detail']}"
+                msg += " Visit https://mdb.ai to upgrade or top up your tokens."
+                from .provider import TokenLimitExceeded
+
+                raise TokenLimitExceeded(msg) from exc
+            else:
+                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
+            raise ConnectionError(msg) from exc
+        except openai.APIConnectionError as exc:
+            raise ConnectionError(
+                "Could not reach the LLM server — check your connection or try again in a moment."
+            ) from exc
+
+        yield StreamComplete(
+            response=LLMResponse(
+                content=content_text,
+                tool_calls=tool_calls,
+                usage=Usage(
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    context_pressure=compute_context_pressure(model, input_tokens),
+                ),
+                stop_reason=stop_reason,
+            )
+        )
+
+
+def _parse_response_object(response, model: str) -> LLMResponse:
+    """Convert a Responses API ``Response`` object into our unified ``LLMResponse``.
+
+    The response contains an ``output`` array of items: messages (with
+    ``output_text`` content blocks), function calls (with ``call_id``,
+    ``name``, ``arguments``), and server-tool calls (web_search etc.) which we
+    intentionally drop because their effects are already incorporated into the
+    model's text content.
+    """
+    content_text = ""
+    tool_calls: list[ToolCall] = []
+
+    for item in response.output or []:
+        item_type = getattr(item, "type", "")
+        if item_type == "message":
+            for content_block in getattr(item, "content", []) or []:
+                if getattr(content_block, "type", "") == "output_text":
+                    content_text += getattr(content_block, "text", "") or ""
+        elif item_type == "function_call":
+            call_id = getattr(item, "call_id", "") or getattr(item, "id", "")
+            name = getattr(item, "name", "") or ""
+            args_str = getattr(item, "arguments", "") or ""
+            try:
+                parsed = json.loads(args_str) if args_str else {}
+            except json.JSONDecodeError:
+                parsed = {}
+            tool_calls.append(ToolCall(id=call_id, name=name, input=parsed))
+        # Other item types (web_search_call, reasoning, etc.) are skipped —
+        # the model's output_text already incorporates their effects.
+
+    usage = getattr(response, "usage", None)
+    input_tokens = getattr(usage, "input_tokens", 0) if usage else 0
+    output_tokens = getattr(usage, "output_tokens", 0) if usage else 0
+
+    return LLMResponse(
+        content=content_text,
+        tool_calls=tool_calls,
+        usage=Usage(
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            context_pressure=compute_context_pressure(model, input_tokens),
+        ),
+        stop_reason=getattr(response, "status", None),
+    )
diff --git a/anton/core/llm/provider.py b/anton/core/llm/provider.py
index 180fc445..f49c8906 100644
--- a/anton/core/llm/provider.py
+++ b/anton/core/llm/provider.py
@@ -153,6 +153,22 @@ class LLMProvider(ABC):
     # Human-readable provider id (e.g. "anthropic", "openai-compatible").
     name: str = ""
 
+    def native_web_tools(self) -> set[str]:
+        """Subset of {"web_search", "web_fetch"} this provider executes server-side.
+
+        When a tool is declared here, the provider is responsible for translating
+        the capability into its own native tool spec (e.g. Anthropic's
+        ``web_search_*`` server-tool, OpenAI's Responses API ``web_search``,
+        mdb.ai's ``{"type": "web_search"}`` passthrough). Server-side execution
+        means the model's response already incorporates the search/fetch
+        results — Anton's tool-dispatch loop never sees a ``tool_use`` for
+        these names.
+
+        Providers without native support return an empty set, and the session
+        falls back to handler-dispatched ``ToolDef``s for any enabled web tools.
+        """
+        return set()
+
     @abstractmethod
     async def complete(
         self,
@@ -163,6 +179,7 @@ async def complete(
         tools: list[dict] | None = None,
         tool_choice: dict | None = None,
         max_tokens: int = 4096,
+        native_web_tools: set[str] | None = None,
     ) -> LLMResponse: ...
 
     def export_connection_info(self) -> ProviderConnectionInfo:
@@ -181,6 +198,7 @@ async def stream(
         messages: list[dict],
         tools: list[dict] | None = None,
         max_tokens: int = 4096,
+        native_web_tools: set[str] | None = None,
     ) -> AsyncIterator[StreamEvent]:
         """Stream LLM responses. Default falls back to complete()."""
         response = await self.complete(
@@ -189,6 +207,7 @@ async def stream(
             messages=messages,
             tools=tools,
             max_tokens=max_tokens,
+            native_web_tools=native_web_tools,
         )
         if response.content:
             yield StreamTextDelta(text=response.content)
diff --git a/anton/core/session.py b/anton/core/session.py
index 49192d98..7cb597d8 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -79,6 +79,12 @@ class ChatSessionConfig:
     session_id: str | None = None
     proactive_dashboards: bool = False
     tools: list[ToolDef] = field(default_factory=list)
+    # Web tools — on by default. Each is independently resolved at session
+    # construction into either a native provider capability (passed to the LLM
+    # via ``native_web_tools``) or a handler-dispatched fallback ToolDef
+    # (registered on the tool registry). See ChatSession.__init__.
+    web_search_enabled: bool = True
+    web_fetch_enabled: bool = True
 
 
 class ChatSession:
@@ -86,6 +92,11 @@ class ChatSession:
 
     def __init__(self, config: ChatSessionConfig) -> None:
         s = config.settings or CoreSettings()
+        # Stash the full settings object (may be AntonSettings, CoreSettings,
+        # or None). Tool handlers read host-only fields like
+        # ``external_search_provider`` / ``exa_api_key`` via getattr so the
+        # session stays decoupled from the host's settings shape.
+        self._settings = config.settings
         self._max_tool_rounds = s.max_tool_rounds
         self._max_continuations = s.max_continuations
         self._context_pressure_threshold = s.context_pressure_threshold
@@ -157,6 +168,20 @@ def __init__(self, config: ChatSessionConfig) -> None:
         # the post-recovery response still reports high pressure.
         self._compacted_this_turn = False
 
+        # Resolve web tool routing once per session. ``_native_web_tools`` is
+        # the set the planning provider will execute server-side (passed
+        # through every ``plan*`` call); ``_fallback_web_tools`` is the set
+        # we run ourselves via handler-dispatched ToolDefs (registered in
+        # ``_build_core_tools``). The two sets are disjoint by construction.
+        desired_web: set[str] = set()
+        if config.web_search_enabled:
+            desired_web.add("web_search")
+        if config.web_fetch_enabled:
+            desired_web.add("web_fetch")
+        provider_native = self._llm.planning_provider.native_web_tools()
+        self._native_web_tools: set[str] = desired_web & provider_native
+        self._fallback_web_tools: set[str] = desired_web - provider_native
+
     @property
     def history(self) -> list[dict]:
         return self._history
@@ -411,6 +436,19 @@ def _build_core_tools(self) -> None:
         # Procedural memory retrieval — always available, no-op if no skills.
         self.tool_registry.register_tool(RECALL_SKILL_TOOL)
 
+        # Handler-dispatched web tools — registered only when the LLM provider
+        # does NOT execute them natively. On Anthropic / OpenAI BYOK / mdb.ai
+        # passthrough, ``_fallback_web_tools`` is empty and these tools never
+        # appear in the registry; the model uses the provider's server-side
+        # web tools instead and Anton's dispatch loop never sees a ``tool_use``
+        # for them. See ``anton/core/tools/web_tools.py`` for the handlers.
+        if "web_search" in self._fallback_web_tools:
+            from anton.core.tools.web_tools import WEB_SEARCH_FALLBACK_TOOL
+            self.tool_registry.register_tool(WEB_SEARCH_FALLBACK_TOOL)
+        if "web_fetch" in self._fallback_web_tools:
+            from anton.core.tools.web_tools import WEB_FETCH_FALLBACK_TOOL
+            self.tool_registry.register_tool(WEB_FETCH_FALLBACK_TOOL)
+
     async def close(self) -> None:
         """Clean up scratchpads and other resources."""
         await self._scratchpads.close_all()
@@ -599,6 +637,10 @@ async def plan_with_recovery(
             kwargs["tools"] = tools
         if max_tokens is not None:
             kwargs["max_tokens"] = max_tokens
+        # Native web tools are a per-session capability — forward to every
+        # planning call automatically so callers don't have to remember.
+        if self._native_web_tools:
+            kwargs["native_web_tools"] = self._native_web_tools
 
         try:
             return await self._llm.plan(messages=factory(), **kwargs)
@@ -638,6 +680,8 @@ async def plan_stream_with_recovery(
             kwargs["tools"] = tools
         if max_tokens is not None:
             kwargs["max_tokens"] = max_tokens
+        if self._native_web_tools:
+            kwargs["native_web_tools"] = self._native_web_tools
 
         try:
             async for event in self._llm.plan_stream(messages=factory(), **kwargs):
diff --git a/anton/core/tools/web_tools.py b/anton/core/tools/web_tools.py
new file mode 100644
index 00000000..0b102942
--- /dev/null
+++ b/anton/core/tools/web_tools.py
@@ -0,0 +1,290 @@
+"""Handler-dispatched fallbacks for ``web_search`` / ``web_fetch``.
+
+These tools are registered on the session's ``ToolRegistry`` only when the
+active LLM provider does *not* expose the equivalent capability natively
+(see ``LLMProvider.native_web_tools()``). On Anthropic BYOK, OpenAI BYOK, and
+the mdb.ai passthrough the model uses the provider's server-side tools and
+this module is dormant.
+
+For generic OpenAI-compatible third-party endpoints (Case 3 in the design):
+
+- ``web_search`` is dispatched to Exa.ai or Brave Search using a key the user
+  configured via ``anton setup search``. Without a configured key the handler
+  returns a clear error message pointing at that command.
+- ``web_fetch`` always works — it is a stdlib-style HTTP GET (via httpx, which
+  Anton already depends on transitively through the LLM SDKs) plus a
+  lightweight HTML→text stripper, so it does not need a third-party key.
+
+Future enhancement (intentionally deferred from v1): when
+``external_search_provider == "exa"`` and ``exa_api_key`` is set, ``web_fetch``
+could route through Exa's ``/contents`` endpoint instead of stdlib HTTP for
+higher-quality extraction (handles paywalls, JS-rendered nav, ad/boilerplate
+stripping). Held back for now to keep behavior uniform across Exa, Brave, and
+unconfigured users — the swap is local to ``handle_web_fetch_fallback``.
+"""
+
+from __future__ import annotations
+
+import html
+from html.parser import HTMLParser
+from typing import TYPE_CHECKING, Any
+
+import httpx
+
+from anton.core.tools.tool_defs import ToolDef
+
+if TYPE_CHECKING:
+    from anton.core.session import ChatSession
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# External search provider adapters
+# ─────────────────────────────────────────────────────────────────────────────
+
+EXA_SEARCH_ENDPOINT = "https://api.exa.ai/search"
+BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
+
+_HTTP_TIMEOUT = 30.0
+
+
+async def _search_exa(query: str, api_key: str, max_results: int) -> str:
+    """Hit Exa's ``/search`` endpoint and format hits as markdown."""
+    payload: dict[str, Any] = {
+        "query": query,
+        "num_results": max_results,
+        # Include a short excerpt with each result so the model can answer
+        # many questions without a follow-up fetch round-trip.
+        "contents": {"text": {"max_characters": 600}},
+    }
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+    async with httpx.AsyncClient(timeout=_HTTP_TIMEOUT) as client:
+        resp = await client.post(EXA_SEARCH_ENDPOINT, json=payload, headers=headers)
+        if resp.status_code != 200:
+            return f"Exa search failed ({resp.status_code}): {resp.text[:500]}"
+        data = resp.json()
+
+    results = data.get("results") or []
+    if not results:
+        return f"No results for query: {query!r}"
+    lines = [f"Web search results for: {query!r} (Exa, {len(results)} hits)\n"]
+    for i, r in enumerate(results, 1):
+        title = r.get("title") or r.get("url") or "(untitled)"
+        url = r.get("url") or ""
+        snippet = (r.get("text") or "").strip()
+        if len(snippet) > 600:
+            snippet = snippet[:600] + "…"
+        lines.append(f"{i}. **{title}**\n   {url}")
+        if snippet:
+            lines.append(f"   {snippet}")
+    return "\n".join(lines)
+
+
+async def _search_brave(query: str, api_key: str, max_results: int) -> str:
+    """Hit Brave Search's web endpoint and format hits as markdown."""
+    headers = {
+        "X-Subscription-Token": api_key,
+        "Accept": "application/json",
+    }
+    params = {"q": query, "count": max_results}
+    async with httpx.AsyncClient(timeout=_HTTP_TIMEOUT) as client:
+        resp = await client.get(BRAVE_SEARCH_ENDPOINT, headers=headers, params=params)
+        if resp.status_code != 200:
+            return f"Brave search failed ({resp.status_code}): {resp.text[:500]}"
+        data = resp.json()
+
+    web = (data.get("web") or {}).get("results") or []
+    if not web:
+        return f"No results for query: {query!r}"
+    lines = [f"Web search results for: {query!r} (Brave, {len(web)} hits)\n"]
+    for i, r in enumerate(web, 1):
+        title = r.get("title") or r.get("url") or "(untitled)"
+        url = r.get("url") or ""
+        snippet = (r.get("description") or "").strip()
+        lines.append(f"{i}. **{title}**\n   {url}")
+        if snippet:
+            lines.append(f"   {snippet}")
+    return "\n".join(lines)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Stdlib HTTP fetch + lightweight HTML extraction
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _TextExtractor(HTMLParser):
+    """Tiny stdlib-only HTML→text converter.
+
+    Skips ``script``/``style``/``noscript`` content, decodes character refs,
+    and normalizes whitespace. Good enough for the model to read article-style
+    pages; will produce noisy output for heavily JS-driven SPAs (acceptable
+    for v1 — the future Exa ``/contents`` enhancement covers that case).
+    """
+
+    _SKIP_TAGS = {"script", "style", "noscript", "head"}
+
+    def __init__(self) -> None:
+        super().__init__(convert_charrefs=True)
+        self._chunks: list[str] = []
+        self._skip_depth = 0
+
+    def handle_starttag(self, tag: str, attrs: list) -> None:
+        if tag in self._SKIP_TAGS:
+            self._skip_depth += 1
+
+    def handle_endtag(self, tag: str) -> None:
+        if tag in self._SKIP_TAGS and self._skip_depth > 0:
+            self._skip_depth -= 1
+        # Block-level tags get an implicit newline so paragraphs don't smush.
+        if tag in ("p", "br", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6", "tr"):
+            self._chunks.append("\n")
+
+    def handle_data(self, data: str) -> None:
+        if self._skip_depth == 0:
+            self._chunks.append(data)
+
+    def text(self) -> str:
+        raw = "".join(self._chunks)
+        # Collapse runs of whitespace; preserve paragraph breaks.
+        lines = [line.strip() for line in raw.splitlines()]
+        return "\n".join(line for line in lines if line)
+
+
+def _strip_html(body: str) -> str:
+    parser = _TextExtractor()
+    try:
+        parser.feed(body)
+    except Exception:
+        # Bail out to a minimal "decode entities" fallback if the parser barfs.
+        return html.unescape(body)
+    return parser.text()
+
+
+async def _fetch_url(url: str, max_chars: int) -> str:
+    """GET a URL and return its text content, truncated to ``max_chars``."""
+    try:
+        async with httpx.AsyncClient(
+            timeout=_HTTP_TIMEOUT, follow_redirects=True
+        ) as client:
+            resp = await client.get(url, headers={"User-Agent": "AntonBot/1.0"})
+    except httpx.TimeoutException:
+        return f"Fetch timed out after {_HTTP_TIMEOUT}s for {url}"
+    except httpx.HTTPError as exc:
+        return f"Fetch failed for {url}: {exc}"
+
+    if resp.status_code >= 400:
+        return f"Fetch returned HTTP {resp.status_code} for {url}"
+
+    content_type = (resp.headers.get("content-type") or "").lower()
+    body = resp.text
+
+    if "html" in content_type or body.lstrip().startswith("<"):
+        text = _strip_html(body)
+    else:
+        text = body
+
+    truncated = False
+    if len(text) > max_chars:
+        text = text[:max_chars]
+        truncated = True
+
+    header = f"Fetched {url} (HTTP {resp.status_code}, {len(resp.content)} bytes)"
+    suffix = "\n... [truncated]" if truncated else ""
+    return f"{header}\n\n{text}{suffix}"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Handlers + ToolDefs
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+_NO_PROVIDER_MSG = (
+    "No search provider configured for this LLM endpoint. "
+    "Run `anton setup search` to configure Exa.ai or Brave Search."
+)
+
+
+async def handle_web_search_fallback(session: "ChatSession", tc_input: dict) -> str:
+    query = (tc_input.get("query") or "").strip()
+    if not query:
+        return "web_search requires a non-empty `query`."
+    max_results = int(tc_input.get("max_results") or 5)
+    max_results = max(1, min(max_results, 20))
+
+    settings = session._settings
+    provider = (getattr(settings, "external_search_provider", None) or "").lower()
+
+    if provider == "exa":
+        key = getattr(settings, "exa_api_key", None)
+        if not key:
+            return _NO_PROVIDER_MSG
+        return await _search_exa(query, key, max_results)
+    if provider == "brave":
+        key = getattr(settings, "brave_api_key", None)
+        if not key:
+            return _NO_PROVIDER_MSG
+        return await _search_brave(query, key, max_results)
+
+    return _NO_PROVIDER_MSG
+
+
+async def handle_web_fetch_fallback(session: "ChatSession", tc_input: dict) -> str:
+    del session  # unused — fetch needs no settings
+    url = (tc_input.get("url") or "").strip()
+    if not url:
+        return "web_fetch requires a `url`."
+    if not (url.startswith("http://") or url.startswith("https://")):
+        return f"web_fetch only supports http(s) URLs; got: {url!r}"
+    max_chars = int(tc_input.get("max_chars") or 20000)
+    max_chars = max(500, min(max_chars, 200_000))
+    return await _fetch_url(url, max_chars)
+
+
+WEB_SEARCH_FALLBACK_TOOL = ToolDef(
+    name="web_search",
+    description=(
+        "Search the web for up-to-date information. Returns a ranked list of "
+        "results with title, URL, and a short excerpt. Use this when you need "
+        "facts that may have changed recently, breaking news, or to discover "
+        "URLs to fetch in detail. Backed by Exa.ai or Brave Search."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "The search query.",
+            },
+            "max_results": {
+                "type": "integer",
+                "description": "Maximum results to return (1-20, default 5).",
+            },
+        },
+        "required": ["query"],
+    },
+    handler=handle_web_search_fallback,
+)
+
+
+WEB_FETCH_FALLBACK_TOOL = ToolDef(
+    name="web_fetch",
+    description=(
+        "Fetch a URL and return its text content. Strips HTML markup; works "
+        "best on article-style pages. Use this after web_search when you need "
+        "the full body of a result, or directly when the user provides a URL."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "url": {
+                "type": "string",
+                "description": "Absolute http(s) URL to fetch.",
+            },
+            "max_chars": {
+                "type": "integer",
+                "description": "Maximum characters to return (default 20000, max 200000).",
+            },
+        },
+        "required": ["url"],
+    },
+    handler=handle_web_fetch_fallback,
+)

From 8ece20818771d3d1da840440748fcf886e78a429 Mon Sep 17 00:00:00 2001
From: Max Abouchar <maxabouchar@gmail.com>
Date: Wed, 6 May 2026 16:00:57 -0700
Subject: [PATCH 3/7] document web search capability

---
 README.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/README.md b/README.md
index fab0ea84..7bfba0fc 100644
--- a/README.md
+++ b/README.md
@@ -93,6 +93,26 @@ Anton doesn't wait for someone to build a connector. It writes the integration c
 - **Credential vault** - prevents secrets from being exposed to LLMs.
 - **Isolated code execution** - protected, reproducible "show your work" environment.
 - **Multi-layer memory & continuous learning** - session, semantic and long-term knowledge. Anton remembers what it learned and gets better at your specific workflows over time.
+- **Web search & fetch** - the agent can query the live web and retrieve URL contents. Routed natively through your LLM provider when possible (no extra setup), with a transparent fallback for third-party endpoints. See below.
+
+---
+
+## Web search & fetch
+
+Anton exposes two web tools to the agent — `web_search` and `web_fetch` — both on by default. How they execute depends on your LLM provider:
+
+| Provider | `web_search` | `web_fetch` | Setup |
+| --- | --- | --- | --- |
+| Anthropic BYOK | Anthropic native server tool | Anthropic native server tool | None — billed on your Anthropic key |
+| OpenAI BYOK | OpenAI Responses API native | covered by `web_search` | None — billed on your OpenAI key |
+| Minds-Enterprise-Cloud (mdb.ai) | mdb.ai passthrough | mdb.ai passthrough | None — billed on your Minds key |
+| Generic OpenAI-compatible (Together, Groq, Ollama, vLLM, …) | Exa.ai or Brave (you choose at setup) | stdlib HTTP GET (no key) | Run `anton setup-search` once |
+
+For the first three rows there's nothing to configure — the LLM provider executes the tools server-side and the results are folded directly into its response. For the fourth row, after `anton setup` finishes configuring a custom OpenAI-compatible endpoint Anton will offer to set up Exa or Brave; you can also (re)run that step at any time with `anton setup-search`. The chosen search-provider key is persisted to `~/.anton/.env` so it carries across sessions and workspaces, exactly like your LLM key.
+
+To opt out, set `ANTON_WEB_SEARCH_ENABLED=false` and/or `ANTON_WEB_FETCH_ENABLED=false`.
+
+Caveats: provider rate limits apply; `web_fetch` has a 30-second timeout and strips HTML to plain text (works best on article-style pages); paywalled and JS-heavy SPAs may return little useful content; treat fetched page bodies as untrusted input.
 
 ---
 

From f0c7f306d6d659333c539de0f94da557934c312e Mon Sep 17 00:00:00 2001
From: Max Abouchar <maxabouchar@gmail.com>
Date: Wed, 6 May 2026 16:01:27 -0700
Subject: [PATCH 4/7] tests including e2e on using web tool providers

---
 tests/conftest.py             |  12 +-
 tests/test_datasource.py      |   2 +
 tests/test_openai_provider.py | 285 ++++++++++++++++
 tests/test_provider.py        | 117 +++++++
 tests/test_tools.py           |   2 +
 tests/test_web_tools.py       | 432 ++++++++++++++++++++++++
 tests/test_web_tools_live.py  | 609 ++++++++++++++++++++++++++++++++++
 7 files changed, 1456 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_web_tools.py
 create mode 100644 tests/test_web_tools_live.py

diff --git a/tests/conftest.py b/tests/conftest.py
index 9fa2287e..0557730c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,9 +11,11 @@ def make_mock_llm() -> AsyncMock:
     """Return an AsyncMock LLM client with coding_provider configured for sync use.
 
     ``AsyncMock`` makes all child attributes ``AsyncMock`` too, which means
-    ``coding_provider.export_connection_info()`` would return a coroutine —
-    but ``ChatSession.__init__`` calls it synchronously.  This helper fixes
-    that by explicitly wiring ``coding_provider`` with a plain ``MagicMock``.
+    methods we call synchronously on the provider would otherwise return
+    coroutines.  This helper fixes that for both providers — ``coding_provider``
+    (whose ``export_connection_info()`` is read in ``ChatSession.__init__``) and
+    ``planning_provider`` (whose ``native_web_tools()`` is read in the same
+    constructor to resolve the per-session web tool routing).
     """
     mock = AsyncMock()
     mock.coding_provider = MagicMock()
@@ -21,6 +23,10 @@ def make_mock_llm() -> AsyncMock:
         return_value=ProviderConnectionInfo(provider="anthropic", api_key="test")
     )
     mock.coding_model = "claude-sonnet-4-6"
+    mock.planning_provider = MagicMock()
+    # Default test posture: no native web tools — fallback tools also off
+    # unless a specific test configures otherwise via ChatSessionConfig.
+    mock.planning_provider.native_web_tools = MagicMock(return_value=set())
     return mock
 
 
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index 6dafd9eb..fb58bf9a 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -180,6 +180,8 @@ async def _default_generate_object(schema_class, **kwargs):
             return_value=ProviderConnectionInfo(provider="anthropic", api_key="test")
         )
         mock_llm.coding_model = "claude-sonnet-4-6"
+        mock_llm.planning_provider = MagicMock()
+        mock_llm.planning_provider.native_web_tools = MagicMock(return_value=set())
         session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
         session._scratchpads = AsyncMock()
         return session
diff --git a/tests/test_openai_provider.py b/tests/test_openai_provider.py
index 11d972a0..95014553 100644
--- a/tests/test_openai_provider.py
+++ b/tests/test_openai_provider.py
@@ -319,3 +319,288 @@ async def test_azure_provider_complete_calls_chat_completions(self):
             assert result.usage.input_tokens == 8
             assert result.usage.output_tokens == 12
             mock_azure_client.chat.completions.create.assert_awaited_once()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Flavor split — minds-passthrough native tools, Responses API for BYOK OpenAI
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestNativeWebToolsByFlavor:
+    def test_generic_flavor_advertises_no_native_tools(self):
+        with patch("anton.core.llm.openai.openai"):
+            provider = OpenAIProvider(
+                api_key="k",
+                flavor=OpenAIProvider.FLAVOR_OPENAI_COMPATIBLE_GENERIC,
+            )
+        assert provider.native_web_tools() == set()
+
+    def test_minds_passthrough_advertises_search_and_fetch(self):
+        with patch("anton.core.llm.openai.openai"):
+            provider = OpenAIProvider(
+                api_key="k", flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH
+            )
+        assert provider.native_web_tools() == {"web_search", "web_fetch"}
+
+    def test_openai_flavor_advertises_search_and_fetch(self):
+        with patch("anton.core.llm.openai.openai"):
+            provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI)
+        assert provider.native_web_tools() == {"web_search", "web_fetch"}
+
+
+class TestMindsPassthroughTools:
+    """The mdb.ai passthrough must accept ``{"type": "web_search"}`` /
+    ``{"type": "fetch"}`` raw — they cannot be routed through
+    ``_translate_tools`` because they have no ``name``/``input_schema`` keys.
+    """
+
+    async def test_appends_web_search_raw(self):
+        with patch("anton.core.llm.openai.openai") as mock_openai:
+            mock_client = AsyncMock()
+            mock_openai.AsyncOpenAI.return_value = mock_client
+            mock_client.chat.completions.create = AsyncMock(
+                return_value=_make_mock_response()
+            )
+
+            provider = OpenAIProvider(
+                api_key="k", flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH
+            )
+            await provider.complete(
+                model="_reason_",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                tools=[{"name": "scratchpad", "description": "x", "input_schema": {}}],
+                native_web_tools={"web_search"},
+            )
+
+            kwargs = mock_client.chat.completions.create.call_args.kwargs
+            tools = kwargs["tools"]
+            # Existing function tool was translated to chat.completions shape
+            assert any(
+                t.get("type") == "function" and t["function"]["name"] == "scratchpad"
+                for t in tools
+            )
+            # Native server-tool entry is appended raw — exact shape mdb.ai expects.
+            assert {"type": "web_search"} in tools
+
+    async def test_appends_fetch_raw(self):
+        with patch("anton.core.llm.openai.openai") as mock_openai:
+            mock_client = AsyncMock()
+            mock_openai.AsyncOpenAI.return_value = mock_client
+            mock_client.chat.completions.create = AsyncMock(
+                return_value=_make_mock_response()
+            )
+
+            provider = OpenAIProvider(
+                api_key="k", flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH
+            )
+            await provider.complete(
+                model="_reason_",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                native_web_tools={"web_fetch"},
+            )
+
+            kwargs = mock_client.chat.completions.create.call_args.kwargs
+            assert {"type": "fetch"} in kwargs["tools"]
+
+    async def test_generic_flavor_does_not_inject_native_tools(self):
+        with patch("anton.core.llm.openai.openai") as mock_openai:
+            mock_client = AsyncMock()
+            mock_openai.AsyncOpenAI.return_value = mock_client
+            mock_client.chat.completions.create = AsyncMock(
+                return_value=_make_mock_response()
+            )
+
+            provider = OpenAIProvider(
+                api_key="k",
+                flavor=OpenAIProvider.FLAVOR_OPENAI_COMPATIBLE_GENERIC,
+            )
+            await provider.complete(
+                model="some-model",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                native_web_tools={"web_search", "web_fetch"},
+            )
+
+            kwargs = mock_client.chat.completions.create.call_args.kwargs
+            # Generic flavor never appends native entries — even when the caller
+            # passed them. The session is responsible for falling back to
+            # handler-dispatched ToolDefs in that case.
+            assert "tools" not in kwargs
+
+
+class TestOpenAIBYOKResponsesAPIPath:
+    """``flavor="openai"`` routes every call through ``client.responses.create``
+    rather than ``chat.completions.create``."""
+
+    async def test_complete_uses_responses_create(self):
+        with patch("anton.core.llm.openai.openai") as mock_openai:
+            mock_client = AsyncMock()
+            mock_openai.AsyncOpenAI.return_value = mock_client
+
+            # Build a response object that mimics Responses API output.
+            response = MagicMock()
+            content_block = MagicMock()
+            content_block.type = "output_text"
+            content_block.text = "Hello from Responses API"
+            message_item = MagicMock()
+            message_item.type = "message"
+            message_item.content = [content_block]
+            response.output = [message_item]
+            response.status = "completed"
+            response.usage = MagicMock(input_tokens=42, output_tokens=18)
+            mock_client.responses.create = AsyncMock(return_value=response)
+
+            provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI)
+            result = await provider.complete(
+                model="gpt-5",
+                system="be helpful",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+            mock_client.responses.create.assert_awaited_once()
+            # chat.completions must NOT have been touched
+            mock_client.chat.completions.create.assert_not_called()
+            assert result.content == "Hello from Responses API"
+            assert result.usage.input_tokens == 42
+            assert result.usage.output_tokens == 18
+
+    async def test_complete_passes_instructions_and_input_shape(self):
+        with patch("anton.core.llm.openai.openai") as mock_openai:
+            mock_client = AsyncMock()
+            mock_openai.AsyncOpenAI.return_value = mock_client
+
+            response = MagicMock()
+            response.output = []
+            response.status = "completed"
+            response.usage = MagicMock(input_tokens=1, output_tokens=1)
+            mock_client.responses.create = AsyncMock(return_value=response)
+
+            provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI)
+            await provider.complete(
+                model="gpt-5",
+                system="custom system",
+                messages=[{"role": "user", "content": "hello"}],
+            )
+
+            kwargs = mock_client.responses.create.call_args.kwargs
+            # System prompt goes via instructions, not as a message item.
+            assert kwargs["instructions"] == "custom system"
+            assert kwargs["model"] == "gpt-5"
+            # Input items are message-shaped
+            assert kwargs["input"] == [
+                {"role": "user", "content": "hello", "type": "message"}
+            ]
+            # max_output_tokens is the Responses API field name
+            assert "max_output_tokens" in kwargs
+
+    async def test_complete_appends_web_search_native_tool(self):
+        with patch("anton.core.llm.openai.openai") as mock_openai:
+            mock_client = AsyncMock()
+            mock_openai.AsyncOpenAI.return_value = mock_client
+
+            response = MagicMock()
+            response.output = []
+            response.status = "completed"
+            response.usage = MagicMock(input_tokens=1, output_tokens=1)
+            mock_client.responses.create = AsyncMock(return_value=response)
+
+            provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI)
+            await provider.complete(
+                model="gpt-5",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                tools=[{"name": "scratchpad", "description": "x", "input_schema": {}}],
+                native_web_tools={"web_search"},
+            )
+
+            kwargs = mock_client.responses.create.call_args.kwargs
+            tools = kwargs["tools"]
+            # Function tools use the FLAT Responses API shape — not nested under
+            # a "function" key like chat.completions.
+            assert any(
+                t.get("type") == "function" and t.get("name") == "scratchpad"
+                for t in tools
+            )
+            assert {"type": "web_search"} in tools
+
+    async def test_complete_translates_function_call_output(self):
+        """Responses API returns function calls as output items with call_id."""
+        with patch("anton.core.llm.openai.openai") as mock_openai:
+            mock_client = AsyncMock()
+            mock_openai.AsyncOpenAI.return_value = mock_client
+
+            fc_item = MagicMock()
+            fc_item.type = "function_call"
+            fc_item.call_id = "call_xyz"
+            fc_item.name = "do_thing"
+            fc_item.arguments = json.dumps({"foo": 42})
+
+            response = MagicMock()
+            response.output = [fc_item]
+            response.status = "completed"
+            response.usage = MagicMock(input_tokens=1, output_tokens=1)
+            mock_client.responses.create = AsyncMock(return_value=response)
+
+            provider = OpenAIProvider(api_key="k", flavor=OpenAIProvider.FLAVOR_OPENAI)
+            result = await provider.complete(
+                model="gpt-5",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                tools=[{"name": "do_thing", "description": "x", "input_schema": {}}],
+            )
+
+            assert len(result.tool_calls) == 1
+            assert result.tool_calls[0].id == "call_xyz"
+            assert result.tool_calls[0].name == "do_thing"
+            assert result.tool_calls[0].input == {"foo": 42}
+
+
+class TestOpenAICompatibleFlavorResolution:
+    """``LLMClient.from_settings`` resolves openai-compatible into either
+    minds-passthrough or generic based on the ``openai_base_url`` matching
+    the user's configured ``minds_url``."""
+
+    def test_resolves_to_minds_passthrough_when_base_url_matches(self):
+        with patch("anton.core.llm.openai.openai"):
+            settings = AntonSettings(
+                planning_provider="openai-compatible",
+                coding_provider="openai-compatible",
+                planning_model="_reason_",
+                coding_model="_code_",
+                openai_api_key="mdb-key",
+                openai_base_url="https://mdb.ai/api/v1",
+                minds_url="https://mdb.ai",
+                _env_file=None,
+            )
+            client = LLMClient.from_settings(settings)
+            assert client._planning_provider._flavor == OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH
+
+    def test_resolves_to_generic_when_base_url_is_third_party(self):
+        with patch("anton.core.llm.openai.openai"):
+            settings = AntonSettings(
+                planning_provider="openai-compatible",
+                coding_provider="openai-compatible",
+                planning_model="my-model",
+                coding_model="my-model",
+                openai_api_key="k",
+                openai_base_url="https://api.openrouter.ai/v1",
+                minds_url="https://mdb.ai",
+                _env_file=None,
+            )
+            client = LLMClient.from_settings(settings)
+            assert client._planning_provider._flavor == OpenAIProvider.FLAVOR_OPENAI_COMPATIBLE_GENERIC
+
+    def test_byok_openai_uses_openai_flavor(self):
+        with patch("anton.core.llm.openai.openai"):
+            settings = AntonSettings(
+                planning_provider="openai",
+                coding_provider="openai",
+                planning_model="gpt-5",
+                coding_model="gpt-5",
+                openai_api_key="sk-test",
+                _env_file=None,
+            )
+            client = LLMClient.from_settings(settings)
+            assert client._planning_provider._flavor == OpenAIProvider.FLAVOR_OPENAI
diff --git a/tests/test_provider.py b/tests/test_provider.py
index 0d0ac8a8..59afc7ba 100644
--- a/tests/test_provider.py
+++ b/tests/test_provider.py
@@ -141,3 +141,120 @@ async def test_provider_without_api_key(self):
             mock_anthropic.AsyncAnthropic.return_value = AsyncMock()
             provider = AnthropicProvider()
             mock_anthropic.AsyncAnthropic.assert_called_once_with()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Native server-side web tools (web_search / web_fetch)
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def _stub_text_response(text: str = "ok"):
+    """Build a MagicMock response that looks like a plain text Anthropic reply."""
+    block = MagicMock()
+    block.type = "text"
+    block.text = text
+    response = MagicMock()
+    response.content = [block]
+    response.usage.input_tokens = 1
+    response.usage.output_tokens = 1
+    response.stop_reason = "end_turn"
+    return response
+
+
+class TestAnthropicNativeWebTools:
+    def test_native_web_tools_advertises_search_and_fetch(self):
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
+            mock_anthropic.AsyncAnthropic.return_value = AsyncMock()
+            provider = AnthropicProvider(api_key="k")
+        assert provider.native_web_tools() == {"web_search", "web_fetch"}
+
+    async def test_complete_appends_web_search_server_tool(self):
+        from anton.core.llm.anthropic import ANTHROPIC_WEB_SEARCH_TOOL_TYPE
+
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
+            mock_client = AsyncMock()
+            mock_anthropic.AsyncAnthropic.return_value = mock_client
+            mock_client.messages.create = AsyncMock(return_value=_stub_text_response())
+
+            provider = AnthropicProvider(api_key="k")
+            await provider.complete(
+                model="claude-sonnet-4-6",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                tools=[{"name": "scratchpad", "description": "x", "input_schema": {}}],
+                native_web_tools={"web_search"},
+            )
+
+            kwargs = mock_client.messages.create.call_args[1]
+            tools = kwargs["tools"]
+            # Existing function tool is preserved
+            assert any(t.get("name") == "scratchpad" for t in tools)
+            # Server tool entry is appended in the right shape
+            assert {"type": ANTHROPIC_WEB_SEARCH_TOOL_TYPE, "name": "web_search"} in tools
+            # web_search is GA — no beta header should be set
+            assert "extra_headers" not in kwargs
+
+    async def test_complete_appends_web_fetch_with_beta_header(self):
+        from anton.core.llm.anthropic import (
+            ANTHROPIC_WEB_FETCH_BETA_HEADER,
+            ANTHROPIC_WEB_FETCH_TOOL_TYPE,
+        )
+
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
+            mock_client = AsyncMock()
+            mock_anthropic.AsyncAnthropic.return_value = mock_client
+            mock_client.messages.create = AsyncMock(return_value=_stub_text_response())
+
+            provider = AnthropicProvider(api_key="k")
+            await provider.complete(
+                model="claude-sonnet-4-6",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                native_web_tools={"web_fetch"},
+            )
+
+            kwargs = mock_client.messages.create.call_args[1]
+            assert {"type": ANTHROPIC_WEB_FETCH_TOOL_TYPE, "name": "web_fetch"} in kwargs["tools"]
+            # web_fetch is beta — header must be present
+            assert kwargs["extra_headers"] == {
+                "anthropic-beta": ANTHROPIC_WEB_FETCH_BETA_HEADER
+            }
+
+    async def test_complete_appends_both_server_tools(self):
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
+            mock_client = AsyncMock()
+            mock_anthropic.AsyncAnthropic.return_value = mock_client
+            mock_client.messages.create = AsyncMock(return_value=_stub_text_response())
+
+            provider = AnthropicProvider(api_key="k")
+            await provider.complete(
+                model="claude-sonnet-4-6",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                native_web_tools={"web_search", "web_fetch"},
+            )
+
+            kwargs = mock_client.messages.create.call_args[1]
+            names = [t.get("name") for t in kwargs["tools"]]
+            assert "web_search" in names and "web_fetch" in names
+            # web_fetch always brings the beta header along
+            assert "anthropic-beta" in kwargs["extra_headers"]
+
+    async def test_complete_omits_web_tools_when_set_is_empty(self):
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
+            mock_client = AsyncMock()
+            mock_anthropic.AsyncAnthropic.return_value = mock_client
+            mock_client.messages.create = AsyncMock(return_value=_stub_text_response())
+
+            provider = AnthropicProvider(api_key="k")
+            await provider.complete(
+                model="claude-sonnet-4-6",
+                system="sys",
+                messages=[{"role": "user", "content": "hi"}],
+                native_web_tools=None,
+            )
+
+            kwargs = mock_client.messages.create.call_args[1]
+            # No tools array at all — backward-compatible with the no-tools case
+            assert "tools" not in kwargs
+            assert "extra_headers" not in kwargs
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 90583309..92e3c9bd 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -30,6 +30,8 @@ def _make_session(vault_dir):
         return_value=ProviderConnectionInfo(provider="anthropic", api_key="test")
     )
     mock_llm.coding_model = "claude-sonnet-4-6"
+    mock_llm.planning_provider = MagicMock()
+    mock_llm.planning_provider.native_web_tools = MagicMock(return_value=set())
     session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
     session._console = MagicMock()
     session._scratchpads = AsyncMock()
diff --git a/tests/test_web_tools.py b/tests/test_web_tools.py
new file mode 100644
index 00000000..ade876ef
--- /dev/null
+++ b/tests/test_web_tools.py
@@ -0,0 +1,432 @@
+"""Tests for the handler-dispatched web_search/web_fetch fallbacks and the
+session-side routing decision (native vs handler-dispatched).
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from anton.core.tools.web_tools import (
+    WEB_FETCH_FALLBACK_TOOL,
+    WEB_SEARCH_FALLBACK_TOOL,
+    _strip_html,
+    handle_web_fetch_fallback,
+    handle_web_search_fallback,
+)
+
+
+def _session_with_settings(**fields):
+    """Build a stand-in session object exposing only ._settings."""
+    settings = SimpleNamespace(
+        external_search_provider=fields.get("external_search_provider"),
+        exa_api_key=fields.get("exa_api_key"),
+        brave_api_key=fields.get("brave_api_key"),
+    )
+    return SimpleNamespace(_settings=settings)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# web_search fallback — Exa
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestWebSearchFallbackExa:
+    async def test_returns_no_provider_message_when_unconfigured(self):
+        session = _session_with_settings()
+        result = await handle_web_search_fallback(session, {"query": "anything"})
+        assert "anton setup search" in result
+        assert "No search provider" in result
+
+    async def test_returns_no_provider_when_provider_set_but_no_key(self):
+        session = _session_with_settings(external_search_provider="exa")
+        result = await handle_web_search_fallback(session, {"query": "x"})
+        assert "anton setup search" in result
+
+    async def test_empty_query_short_circuits(self):
+        session = _session_with_settings(
+            external_search_provider="exa", exa_api_key="k"
+        )
+        result = await handle_web_search_fallback(session, {"query": "  "})
+        assert "non-empty" in result.lower()
+
+    async def test_calls_exa_endpoint_with_bearer_auth(self):
+        session = _session_with_settings(
+            external_search_provider="exa", exa_api_key="exa-key-xyz"
+        )
+
+        # Capture the outgoing request, return a canned response.
+        captured: dict = {}
+
+        async def _post(self, url, json=None, headers=None):
+            captured["url"] = url
+            captured["json"] = json
+            captured["headers"] = headers
+            request = httpx.Request("POST", url)
+            return httpx.Response(
+                200,
+                json={
+                    "results": [
+                        {
+                            "title": "Result A",
+                            "url": "https://a.example",
+                            "text": "snippet A " * 5,
+                        },
+                        {
+                            "title": "Result B",
+                            "url": "https://b.example",
+                            "text": "snippet B",
+                        },
+                    ]
+                },
+                request=request,
+            )
+
+        with patch.object(httpx.AsyncClient, "post", new=_post):
+            out = await handle_web_search_fallback(
+                session, {"query": "what is anton", "max_results": 2}
+            )
+
+        assert captured["url"] == "https://api.exa.ai/search"
+        assert captured["headers"]["Authorization"] == "Bearer exa-key-xyz"
+        assert captured["json"]["query"] == "what is anton"
+        assert captured["json"]["num_results"] == 2
+        # Output is markdown-ish with both results
+        assert "Result A" in out
+        assert "https://a.example" in out
+        assert "Result B" in out
+
+    async def test_exa_non_200_response_returns_error_string(self):
+        session = _session_with_settings(
+            external_search_provider="exa", exa_api_key="k"
+        )
+
+        async def _post(self, url, json=None, headers=None):
+            return httpx.Response(
+                401, text="bad key", request=httpx.Request("POST", url)
+            )
+
+        with patch.object(httpx.AsyncClient, "post", new=_post):
+            out = await handle_web_search_fallback(session, {"query": "x"})
+        assert "Exa search failed" in out
+        assert "401" in out
+
+    async def test_caps_max_results_to_safe_range(self):
+        session = _session_with_settings(
+            external_search_provider="exa", exa_api_key="k"
+        )
+
+        captured: dict = {}
+
+        async def _post(self, url, json=None, headers=None):
+            captured["json"] = json
+            return httpx.Response(
+                200, json={"results": []}, request=httpx.Request("POST", url)
+            )
+
+        with patch.object(httpx.AsyncClient, "post", new=_post):
+            await handle_web_search_fallback(
+                session, {"query": "x", "max_results": 999}
+            )
+        # 999 is clamped to 20 (the upper bound).
+        assert captured["json"]["num_results"] == 20
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# web_search fallback — Brave
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestWebSearchFallbackBrave:
+    async def test_calls_brave_endpoint_with_subscription_token(self):
+        session = _session_with_settings(
+            external_search_provider="brave", brave_api_key="brv-key"
+        )
+        captured: dict = {}
+
+        async def _get(self, url, headers=None, params=None):
+            captured["url"] = url
+            captured["headers"] = headers
+            captured["params"] = params
+            return httpx.Response(
+                200,
+                json={
+                    "web": {
+                        "results": [
+                            {
+                                "title": "Brave hit",
+                                "url": "https://b.example",
+                                "description": "A hit.",
+                            }
+                        ]
+                    }
+                },
+                request=httpx.Request("GET", url),
+            )
+
+        with patch.object(httpx.AsyncClient, "get", new=_get):
+            out = await handle_web_search_fallback(session, {"query": "anton"})
+
+        assert captured["url"] == "https://api.search.brave.com/res/v1/web/search"
+        assert captured["headers"]["X-Subscription-Token"] == "brv-key"
+        assert captured["params"] == {"q": "anton", "count": 5}
+        assert "Brave hit" in out
+        assert "A hit." in out
+
+    async def test_brave_no_results(self):
+        session = _session_with_settings(
+            external_search_provider="brave", brave_api_key="k"
+        )
+
+        async def _get(self, url, headers=None, params=None):
+            return httpx.Response(
+                200, json={"web": {"results": []}}, request=httpx.Request("GET", url)
+            )
+
+        with patch.object(httpx.AsyncClient, "get", new=_get):
+            out = await handle_web_search_fallback(session, {"query": "obscure"})
+        assert "No results" in out
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# web_fetch fallback
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestWebFetchFallback:
+    async def test_rejects_non_http_urls(self):
+        out = await handle_web_fetch_fallback(None, {"url": "ftp://x.example"})
+        assert "http(s)" in out
+
+    async def test_empty_url(self):
+        out = await handle_web_fetch_fallback(None, {"url": "   "})
+        assert "requires" in out
+
+    async def test_strips_html_to_text(self):
+        async def _get(self, url, headers=None):
+            return httpx.Response(
+                200,
+                text=(
+                    "<html><head><title>T</title>"
+                    "<script>var x = 1;</script></head>"
+                    "<body><p>Hello, <b>world</b>!</p>"
+                    "<p>Second para.</p></body></html>"
+                ),
+                headers={"content-type": "text/html"},
+                request=httpx.Request("GET", url),
+            )
+
+        with patch.object(httpx.AsyncClient, "get", new=_get):
+            out = await handle_web_fetch_fallback(
+                None, {"url": "https://example.com"}
+            )
+
+        # Body text is preserved, script and tags are stripped.
+        assert "Hello" in out
+        assert "world" in out
+        assert "Second para" in out
+        assert "<script>" not in out
+        assert "var x = 1" not in out
+
+    async def test_truncates_to_max_chars(self):
+        big = "<html><body><p>" + ("x" * 5000) + "</p></body></html>"
+
+        async def _get(self, url, headers=None):
+            return httpx.Response(
+                200,
+                text=big,
+                headers={"content-type": "text/html"},
+                request=httpx.Request("GET", url),
+            )
+
+        with patch.object(httpx.AsyncClient, "get", new=_get):
+            out = await handle_web_fetch_fallback(
+                None, {"url": "https://example.com", "max_chars": 500}
+            )
+
+        assert "[truncated]" in out
+        # max_chars caps the body text we return; the header line is separate.
+        assert out.count("x") <= 600
+
+    async def test_returns_error_for_4xx(self):
+        async def _get(self, url, headers=None):
+            return httpx.Response(
+                404, text="missing", request=httpx.Request("GET", url)
+            )
+
+        with patch.object(httpx.AsyncClient, "get", new=_get):
+            out = await handle_web_fetch_fallback(
+                None, {"url": "https://example.com/missing"}
+            )
+        assert "404" in out
+
+    async def test_handles_timeout(self):
+        async def _get(self, url, headers=None):
+            raise httpx.TimeoutException("slow")
+
+        with patch.object(httpx.AsyncClient, "get", new=_get):
+            out = await handle_web_fetch_fallback(
+                None, {"url": "https://example.com"}
+            )
+        assert "timed out" in out.lower()
+
+
+class TestStripHtml:
+    def test_drops_script_and_style(self):
+        html = (
+            "<style>p{color:red}</style>"
+            "<script>alert('x')</script>"
+            "<p>Visible.</p>"
+        )
+        assert _strip_html(html).strip() == "Visible."
+
+    def test_decodes_entities(self):
+        assert "you & me" in _strip_html("<p>you &amp; me</p>")
+
+    def test_block_tags_get_newline_separation(self):
+        html = "<p>one</p><p>two</p>"
+        out = _strip_html(html)
+        assert "one" in out and "two" in out
+        # Some kind of separator between paragraphs (newline or blank line).
+        assert "\n" in out
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Session-side resolution: native vs fallback by provider
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestSessionWebToolResolution:
+    """ChatSession.__init__ must resolve the per-session web tool plan correctly:
+
+    - When the planning provider claims a capability natively, it goes into
+      ``_native_web_tools`` and the fallback ToolDef is NOT registered.
+    - When the provider does not, the capability goes into ``_fallback_web_tools``
+      and the corresponding ToolDef IS registered.
+    """
+
+    def _build_session(self, *, provider_native: set[str], cfg_kwargs: dict | None = None):
+        from anton.core.session import ChatSession, ChatSessionConfig
+        from anton.core.llm.provider import ProviderConnectionInfo
+
+        mock_llm = AsyncMock()
+        mock_llm.coding_provider = MagicMock()
+        mock_llm.coding_provider.export_connection_info = MagicMock(
+            return_value=ProviderConnectionInfo(provider="x", api_key="k")
+        )
+        mock_llm.coding_model = "x"
+        mock_llm.planning_provider = MagicMock()
+        mock_llm.planning_provider.native_web_tools = MagicMock(
+            return_value=provider_native
+        )
+        cfg = ChatSessionConfig(llm_client=mock_llm, **(cfg_kwargs or {}))
+        return ChatSession(cfg)
+
+    def test_anthropic_style_native_provider_uses_no_fallback(self):
+        session = self._build_session(provider_native={"web_search", "web_fetch"})
+        assert session._native_web_tools == {"web_search", "web_fetch"}
+        assert session._fallback_web_tools == set()
+
+    def test_generic_provider_routes_both_to_fallback(self):
+        session = self._build_session(provider_native=set())
+        assert session._native_web_tools == set()
+        assert session._fallback_web_tools == {"web_search", "web_fetch"}
+
+    def test_disabled_search_drops_from_both_sets(self):
+        session = self._build_session(
+            provider_native={"web_search", "web_fetch"},
+            cfg_kwargs={"web_search_enabled": False},
+        )
+        assert "web_search" not in session._native_web_tools
+        assert "web_search" not in session._fallback_web_tools
+        assert "web_fetch" in session._native_web_tools
+
+    def test_fallback_toolDefs_registered_when_provider_lacks_native(self):
+        session = self._build_session(provider_native=set())
+        # Trigger lazy build of the registry.
+        tools = session._build_tools()
+        names = {t["name"] for t in tools}
+        assert "web_search" in names
+        assert "web_fetch" in names
+
+    def test_fallback_toolDefs_not_registered_when_provider_is_native(self):
+        session = self._build_session(provider_native={"web_search", "web_fetch"})
+        tools = session._build_tools()
+        names = {t["name"] for t in tools}
+        # web tools are server-side on the provider; they should NOT appear in
+        # the registry — the model invokes them through the provider directly.
+        assert "web_search" not in names
+        assert "web_fetch" not in names
+
+
+class TestNativeWebToolsForwarded:
+    """plan_with_recovery / plan_stream_with_recovery must forward the resolved
+    native_web_tools set to the LLM client without each call site needing to
+    remember it."""
+
+    async def test_plan_with_recovery_forwards_native_set(self):
+        from anton.core.session import ChatSession, ChatSessionConfig
+        from anton.core.llm.provider import LLMResponse, ProviderConnectionInfo, Usage
+
+        mock_llm = AsyncMock()
+        mock_llm.coding_provider = MagicMock()
+        mock_llm.coding_provider.export_connection_info = MagicMock(
+            return_value=ProviderConnectionInfo(provider="x", api_key="k")
+        )
+        mock_llm.coding_model = "x"
+        mock_llm.planning_provider = MagicMock()
+        mock_llm.planning_provider.native_web_tools = MagicMock(
+            return_value={"web_search", "web_fetch"}
+        )
+        mock_llm.plan = AsyncMock(
+            return_value=LLMResponse(content="ok", usage=Usage())
+        )
+
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
+        await session.plan_with_recovery(system="sys")
+
+        kwargs = mock_llm.plan.call_args.kwargs
+        assert kwargs["native_web_tools"] == {"web_search", "web_fetch"}
+
+    async def test_plan_with_recovery_omits_kwarg_when_no_native(self):
+        from anton.core.session import ChatSession, ChatSessionConfig
+        from anton.core.llm.provider import LLMResponse, ProviderConnectionInfo, Usage
+
+        mock_llm = AsyncMock()
+        mock_llm.coding_provider = MagicMock()
+        mock_llm.coding_provider.export_connection_info = MagicMock(
+            return_value=ProviderConnectionInfo(provider="x", api_key="k")
+        )
+        mock_llm.coding_model = "x"
+        mock_llm.planning_provider = MagicMock()
+        mock_llm.planning_provider.native_web_tools = MagicMock(return_value=set())
+        mock_llm.plan = AsyncMock(
+            return_value=LLMResponse(content="ok", usage=Usage())
+        )
+
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
+        await session.plan_with_recovery(system="sys")
+
+        kwargs = mock_llm.plan.call_args.kwargs
+        # When the provider has no native web tools, the kwarg is left out
+        # entirely so it doesn't even appear in older mocks' call_args.
+        assert "native_web_tools" not in kwargs
+
+
+class TestToolDefShapes:
+    def test_search_tool_schema_requires_query(self):
+        assert "query" in WEB_SEARCH_FALLBACK_TOOL.input_schema["required"]
+
+    def test_fetch_tool_schema_requires_url(self):
+        assert "url" in WEB_FETCH_FALLBACK_TOOL.input_schema["required"]
+
+    def test_tool_names_match_native_capability_strings(self):
+        # The fallback names MUST match the native capability strings so that
+        # provider-side execution and handler-side execution feel identical to
+        # the agent. If these drift, tools registered conditionally won't line
+        # up with the native_web_tools set.
+        assert WEB_SEARCH_FALLBACK_TOOL.name == "web_search"
+        assert WEB_FETCH_FALLBACK_TOOL.name == "web_fetch"
diff --git a/tests/test_web_tools_live.py b/tests/test_web_tools_live.py
new file mode 100644
index 00000000..12761aca
--- /dev/null
+++ b/tests/test_web_tools_live.py
@@ -0,0 +1,609 @@
+"""Live integration tests for the native web tool paths.
+
+These tests make real API calls — they exercise the wire format end-to-end
+(tool spec serialization, server-side execution, response parsing) instead of
+just checking what we send. They auto-skip when the corresponding API key is
+not in the environment, so CI without keys is unaffected.
+
+Loads ``.env`` from the project root once at import time so a developer who
+keeps their keys in ``.env`` (the standard pattern for this repo) doesn't need
+to ``source`` anything before running ``pytest``.
+
+Coverage map:
+
+- ``TestAnthropicLive`` — ``AnthropicProvider`` with ``native_web_tools``
+  resolving to ``web_search_20250305`` and ``web_fetch_20250910`` server tools.
+  Hits the Messages API directly.
+- ``TestOpenAIBYOKLive`` — ``OpenAIProvider(flavor="openai")`` with
+  ``native_web_tools`` routing through the Responses API
+  (``client.responses.create``). The whole BYOK OpenAI path runs through
+  Responses now, so this also validates non-tool calls along the way.
+- ``TestMindsPassthroughLive`` — flavor=``"minds-passthrough"``, base_url=
+  ``https://mdb.ai/api/v1``, model=``"_reason_"``. Currently skipped because
+  the mdb.ai ``passthrough_agent`` web-tools translation lands in a separate
+  PR; the scaffolding here means we just remove the skip mark when it ships.
+
+Cost note: each test uses small ``max_tokens`` to keep the bill negligible.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+import pytest
+
+# Load .env once so plain os.environ reads pick up keys the developer put in
+# the repo-root .env (matches AntonSettings' env-file precedence).
+try:
+    from dotenv import load_dotenv
+
+    load_dotenv(Path(__file__).resolve().parent.parent / ".env", override=False)
+except Exception:
+    # python-dotenv is a transitive dep through pydantic-settings; if it's
+    # missing for any reason, fall back to whatever's already in os.environ.
+    pass
+
+
+def _have(key: str) -> bool:
+    return bool(os.environ.get(key))
+
+
+anthropic_only = pytest.mark.skipif(
+    not _have("ANTHROPIC_API_KEY"),
+    reason="ANTHROPIC_API_KEY not set — live test skipped",
+)
+openai_only = pytest.mark.skipif(
+    not _have("OPENAI_API_KEY"),
+    reason="OPENAI_API_KEY not set — live test skipped",
+)
+minds_only = pytest.mark.skipif(
+    not _have("MINDS_API_KEY"),
+    reason="MINDS_API_KEY not set — live test skipped",
+)
+exa_only = pytest.mark.skipif(
+    not _have("EXA_API_KEY"),
+    reason="EXA_API_KEY not set — live test skipped",
+)
+brave_only = pytest.mark.skipif(
+    not _have("BRAVE_API_KEY"),
+    reason="BRAVE_API_KEY not set — live test skipped",
+)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Anthropic BYOK — native web_search and web_fetch
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@anthropic_only
+class TestAnthropicLive:
+    """Real calls to Anthropic with the native server-side web tools.
+
+    On success the model emits some combination of ``server_tool_use`` /
+    ``web_search_tool_result`` / ``text`` blocks; the existing extraction loop
+    in ``AnthropicProvider.complete`` already filters down to text blocks (and
+    real ``tool_use`` blocks for function tools), so the model's natural
+    response — which incorporates the search/fetch result — flows back as
+    ``LLMResponse.content``. The assertions are deliberately loose: we only
+    care that the call succeeds and returns plausible content; exact
+    summarization is the model's job, not our wire format's.
+    """
+
+    @pytest.mark.asyncio
+    async def test_complete_with_native_web_search(self):
+        from anton.core.llm.anthropic import AnthropicProvider
+
+        provider = AnthropicProvider(api_key=os.environ["ANTHROPIC_API_KEY"])
+        response = await provider.complete(
+            model="claude-sonnet-4-6",
+            system="Use web_search if you need current information. Be brief.",
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        "Search the web for 'Anthropic Claude' and tell me one "
+                        "fact in a single sentence."
+                    ),
+                }
+            ],
+            native_web_tools={"web_search"},
+            max_tokens=512,
+        )
+
+        assert response.content, "expected non-empty model response"
+        assert len(response.content) > 20
+        # The query forces a search-shaped answer; "Anthropic" or "Claude"
+        # should land in the text either way.
+        lowered = response.content.lower()
+        assert "anthropic" in lowered or "claude" in lowered
+
+    @pytest.mark.asyncio
+    async def test_complete_with_native_web_fetch(self):
+        from anton.core.llm.anthropic import AnthropicProvider
+
+        provider = AnthropicProvider(api_key=os.environ["ANTHROPIC_API_KEY"])
+        response = await provider.complete(
+            model="claude-sonnet-4-6",
+            system=(
+                "Use the web_fetch tool to retrieve the URL the user provides "
+                "and quote one short phrase from the page. Be brief."
+            ),
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        "Fetch https://example.com and tell me what the page says."
+                    ),
+                }
+            ],
+            native_web_tools={"web_fetch"},
+            max_tokens=512,
+        )
+
+        assert response.content, "expected non-empty model response"
+        # example.com's signature phrase — the model should surface it after
+        # the server-side fetch lands.
+        assert "example" in response.content.lower()
+
+    @pytest.mark.asyncio
+    async def test_complete_with_both_native_tools(self):
+        """Both server tools wired in the same call — exercises the merged
+        tools array + the beta header co-existing with non-beta tooling."""
+        from anton.core.llm.anthropic import AnthropicProvider
+
+        provider = AnthropicProvider(api_key=os.environ["ANTHROPIC_API_KEY"])
+        response = await provider.complete(
+            model="claude-sonnet-4-6",
+            system="Use whichever web tool fits. Keep your answer short.",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What is on https://example.com? One sentence.",
+                }
+            ],
+            native_web_tools={"web_search", "web_fetch"},
+            max_tokens=512,
+        )
+
+        assert response.content, "expected non-empty model response"
+
+    @pytest.mark.asyncio
+    async def test_complete_without_web_tools_still_works(self):
+        """Sanity: opting out (``native_web_tools=None``) must not regress
+        the existing chat-only path."""
+        from anton.core.llm.anthropic import AnthropicProvider
+
+        provider = AnthropicProvider(api_key=os.environ["ANTHROPIC_API_KEY"])
+        response = await provider.complete(
+            model="claude-sonnet-4-6",
+            system="Reply with exactly: pong",
+            messages=[{"role": "user", "content": "ping"}],
+            max_tokens=16,
+        )
+        assert "pong" in response.content.lower()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# OpenAI BYOK — Responses API path with native web_search
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@openai_only
+class TestOpenAIBYOKLive:
+    """Real calls to ``client.responses.create`` for ``flavor="openai"``.
+
+    Validates that the entire Responses API translation (input shape, tools,
+    instructions, output_text/function_call extraction) lines up with what
+    the live API expects. The earlier mocked tests cover the request shape
+    going out; these confirm it actually works against the real endpoint.
+    """
+
+    @pytest.mark.asyncio
+    async def test_responses_api_basic_call(self):
+        """No web tools — just confirm the Responses API transport works for
+        the simple text path. If this fails, every other BYOK OpenAI test
+        is also broken."""
+        from anton.core.llm.openai import OpenAIProvider
+
+        provider = OpenAIProvider(
+            api_key=os.environ["OPENAI_API_KEY"],
+            flavor=OpenAIProvider.FLAVOR_OPENAI,
+        )
+        response = await provider.complete(
+            model="gpt-5",
+            system="Reply with exactly: pong",
+            messages=[{"role": "user", "content": "ping"}],
+            max_tokens=512,
+        )
+        assert response.content, "expected non-empty Responses API output_text"
+        assert "pong" in response.content.lower()
+        assert response.usage.input_tokens > 0
+        assert response.usage.output_tokens > 0
+
+    @pytest.mark.asyncio
+    async def test_responses_api_with_native_web_search(self):
+        from anton.core.llm.openai import OpenAIProvider
+
+        provider = OpenAIProvider(
+            api_key=os.environ["OPENAI_API_KEY"],
+            flavor=OpenAIProvider.FLAVOR_OPENAI,
+        )
+        # gpt-5 is a reasoning model: reasoning tokens + the (often large)
+        # web_search result payload share the ``max_output_tokens`` budget,
+        # so a tight cap can leave nothing for the final text. 4096 is
+        # comfortable headroom for a one-sentence answer over a search.
+        response = await provider.complete(
+            model="gpt-5",
+            system="Use web_search if you need current information. Be brief.",
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        "Search for 'OpenAI Responses API' and summarize one "
+                        "thing about it in a single sentence."
+                    ),
+                }
+            ],
+            native_web_tools={"web_search"},
+            max_tokens=4096,
+        )
+
+        assert response.content, (
+            f"expected non-empty model response but got stop_reason="
+            f"{response.stop_reason!r} (input_tokens={response.usage.input_tokens}, "
+            f"output_tokens={response.usage.output_tokens})"
+        )
+        assert len(response.content) > 20
+        lowered = response.content.lower()
+        assert "openai" in lowered or "responses" in lowered or "api" in lowered
+
+    @pytest.mark.asyncio
+    async def test_responses_api_with_function_tool_round_trip(self):
+        """Forced function-tool call through the Responses API.
+
+        Confirms the flat function-tool shape (`{"type": "function", "name": ...}`),
+        the ``tool_choice`` translation, and the ``call_id`` round-trip all
+        work against the live endpoint. This is the same path
+        ``generate_object*`` uses, so a regression here would cascade.
+
+        Note on ``max_tokens``: gpt-5 is a reasoning model and its reasoning
+        tokens count against ``max_output_tokens``. A low cap can leave the
+        model with no budget to emit the function call (``stop_reason=
+        "incomplete"``), so we use a generous 4096 here. Still pennies per run.
+        """
+        from anton.core.llm.openai import OpenAIProvider
+
+        provider = OpenAIProvider(
+            api_key=os.environ["OPENAI_API_KEY"],
+            flavor=OpenAIProvider.FLAVOR_OPENAI,
+        )
+        response = await provider.complete(
+            model="gpt-5",
+            system="Call the answer tool to provide your reply.",
+            messages=[{"role": "user", "content": "What is 6 times 7?"}],
+            tools=[
+                {
+                    "name": "answer",
+                    "description": "Provide the numeric answer.",
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "value": {"type": "integer"},
+                            "explanation": {"type": "string"},
+                        },
+                        "required": ["value"],
+                    },
+                }
+            ],
+            tool_choice={"type": "tool", "name": "answer"},
+            max_tokens=4096,
+        )
+
+        assert response.tool_calls, (
+            f"expected forced tool call but got stop_reason={response.stop_reason!r} "
+            f"with content={response.content!r}"
+        )
+        tc = response.tool_calls[0]
+        assert tc.name == "answer"
+        # call_id is the canonical id we'll reference in any follow-up
+        # function_call_output items.
+        assert tc.id
+        assert tc.input.get("value") == 42
+
+    @pytest.mark.asyncio
+    async def test_responses_api_streaming(self):
+        """Quick smoke of the streaming path. Streaming has its own
+        per-event translation (output_text.delta, function_call_arguments.*,
+        completed) that the non-streaming test doesn't exercise."""
+        from anton.core.llm.openai import OpenAIProvider
+        from anton.core.llm.provider import StreamComplete, StreamTextDelta
+
+        provider = OpenAIProvider(
+            api_key=os.environ["OPENAI_API_KEY"],
+            flavor=OpenAIProvider.FLAVOR_OPENAI,
+        )
+
+        text_chunks: list[str] = []
+        final_response = None
+        async for event in provider.stream(
+            model="gpt-5",
+            system="Reply with exactly: pong",
+            messages=[{"role": "user", "content": "ping"}],
+            max_tokens=512,
+        ):
+            if isinstance(event, StreamTextDelta):
+                text_chunks.append(event.text)
+            elif isinstance(event, StreamComplete):
+                final_response = event.response
+
+        joined = "".join(text_chunks).lower()
+        assert "pong" in joined
+        assert final_response is not None
+        assert final_response.content == "".join(text_chunks)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Minds passthrough — same path as OpenAI-compatible chat.completions
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@minds_only
+@pytest.mark.skip(
+    reason=(
+        "mdb.ai passthrough_agent web-tools translation lives in a separate "
+        "backend PR. Scaffolding is in place — remove this skip when the "
+        "passthrough side ships."
+    )
+)
+class TestMindsPassthroughLive:
+    """Native web tools through mdb.ai (chat.completions transport with
+    ``{"type": "web_search"}`` / ``{"type": "fetch"}`` appended raw).
+
+    The wire format on our end is finalized — this suite already passes
+    against the local mock — but the upstream ``passthrough_agent`` doesn't
+    translate the web tool entries to the underlying provider yet, so a real
+    call returns either a 4xx or a no-op completion. Tests are skipped at
+    the class level until the backend lands, so any future change to the
+    passthrough path that breaks our wire format will surface here on the
+    first un-skipped run.
+    """
+
+    @pytest.mark.asyncio
+    async def test_complete_with_native_web_search(self):
+        from anton.core.llm.openai import OpenAIProvider
+
+        provider = OpenAIProvider(
+            api_key=os.environ["MINDS_API_KEY"],
+            base_url="https://mdb.ai/api/v1",
+            flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH,
+            supports_vision=False,
+        )
+        response = await provider.complete(
+            model="_reason_",
+            system="Use web_search if you need current information. Be brief.",
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        "Search for 'Anthropic' and tell me one fact in a sentence."
+                    ),
+                }
+            ],
+            native_web_tools={"web_search"},
+            max_tokens=512,
+        )
+        assert response.content
+        assert len(response.content) > 20
+
+    @pytest.mark.asyncio
+    async def test_complete_with_native_fetch(self):
+        from anton.core.llm.openai import OpenAIProvider
+
+        provider = OpenAIProvider(
+            api_key=os.environ["MINDS_API_KEY"],
+            base_url="https://mdb.ai/api/v1",
+            flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH,
+            supports_vision=False,
+        )
+        response = await provider.complete(
+            model="_reason_",
+            system="Use the fetch tool. Be brief.",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Fetch https://example.com and tell me what's there.",
+                }
+            ],
+            native_web_tools={"web_fetch"},
+            max_tokens=512,
+        )
+        assert response.content
+        assert "example" in response.content.lower()
+
+    @pytest.mark.asyncio
+    async def test_complete_without_web_tools_still_works(self):
+        """Mind-passthrough chat.completions without web tools — sanity
+        check that our flavor flag doesn't break the baseline chat call."""
+        from anton.core.llm.openai import OpenAIProvider
+
+        provider = OpenAIProvider(
+            api_key=os.environ["MINDS_API_KEY"],
+            base_url="https://mdb.ai/api/v1",
+            flavor=OpenAIProvider.FLAVOR_MINDS_PASSTHROUGH,
+            supports_vision=False,
+        )
+        response = await provider.complete(
+            model="_reason_",
+            system="Reply with exactly: pong",
+            messages=[{"role": "user", "content": "ping"}],
+            max_tokens=16,
+        )
+        assert "pong" in response.content.lower()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Case 3 — generic OpenAI-compatible fallback: Exa.ai & Brave Search
+# ─────────────────────────────────────────────────────────────────────────────
+#
+# The Exa/Brave adapters live in ``anton/core/tools/web_tools.py``. Mocked
+# tests in ``test_web_tools.py`` already cover the request shape going out;
+# these confirm the live endpoints accept our auth + payload + still return
+# the response shape we parse. They also implicitly validate the setup probe
+# in ``cli._setup_exa`` / ``_setup_brave``, which uses the same auth +
+# endpoint pair.
+
+
+def _settings_with(**fields):
+    """Tiny stand-in for AntonSettings — only the attrs the handlers read."""
+    from types import SimpleNamespace
+
+    return SimpleNamespace(
+        external_search_provider=fields.get("external_search_provider"),
+        exa_api_key=fields.get("exa_api_key"),
+        brave_api_key=fields.get("brave_api_key"),
+    )
+
+
+def _session_with(settings):
+    from types import SimpleNamespace
+
+    return SimpleNamespace(_settings=settings)
+
+
+@exa_only
+class TestExaLive:
+    """Real calls to Exa.ai's ``/search`` endpoint."""
+
+    @pytest.mark.asyncio
+    async def test_search_returns_real_results(self):
+        """Direct adapter call — the format helper formats real hits."""
+        from anton.core.tools.web_tools import _search_exa
+
+        out = await _search_exa(
+            query="Anthropic Claude",
+            api_key=os.environ["EXA_API_KEY"],
+            max_results=3,
+        )
+
+        assert "Web search results for: 'Anthropic Claude'" in out
+        # At least one https:// URL should appear in the formatted output.
+        assert "https://" in out
+        # And the markdown numbering means we got real hits, not the "no
+        # results" branch.
+        assert "1. **" in out
+
+    @pytest.mark.asyncio
+    async def test_handler_dispatch_via_session(self):
+        """The full path the agent actually uses: session settings →
+        ``handle_web_search_fallback`` → ``_search_exa`` → real network."""
+        from anton.core.tools.web_tools import handle_web_search_fallback
+
+        session = _session_with(
+            _settings_with(
+                external_search_provider="exa",
+                exa_api_key=os.environ["EXA_API_KEY"],
+            )
+        )
+        out = await handle_web_search_fallback(
+            session, {"query": "Anthropic Claude", "max_results": 2}
+        )
+        assert "https://" in out
+        assert "Anthropic Claude" in out  # query echoed in the header
+
+    @pytest.mark.asyncio
+    async def test_setup_probe_endpoint_contract(self):
+        """The setup probe in ``cli._setup_exa`` posts the same payload to
+        the same URL with the same auth header. This test validates that
+        contract against the live API — if Exa changes their endpoint or
+        auth shape, both setup AND runtime would break, and this would
+        catch it on the next live run."""
+        import httpx as _httpx
+
+        # Exact same shape ``cli._setup_exa._test`` uses internally.
+        resp = await _httpx.AsyncClient(timeout=15.0).post(
+            "https://api.exa.ai/search",
+            headers={"Authorization": f"Bearer {os.environ['EXA_API_KEY']}"},
+            json={"query": "anton ping", "num_results": 1},
+        )
+        assert resp.status_code == 200, (
+            f"setup probe contract broken: HTTP {resp.status_code} — {resp.text[:200]}"
+        )
+
+
+@brave_only
+class TestBraveLive:
+    """Real calls to Brave Search's web endpoint."""
+
+    @pytest.mark.asyncio
+    async def test_search_returns_real_results(self):
+        from anton.core.tools.web_tools import _search_brave
+
+        out = await _search_brave(
+            query="Anthropic Claude",
+            api_key=os.environ["BRAVE_API_KEY"],
+            max_results=3,
+        )
+
+        assert "Web search results for: 'Anthropic Claude'" in out
+        assert "https://" in out
+        assert "1. **" in out
+
+    @pytest.mark.asyncio
+    async def test_handler_dispatch_via_session(self):
+        from anton.core.tools.web_tools import handle_web_search_fallback
+
+        session = _session_with(
+            _settings_with(
+                external_search_provider="brave",
+                brave_api_key=os.environ["BRAVE_API_KEY"],
+            )
+        )
+        out = await handle_web_search_fallback(
+            session, {"query": "Anthropic Claude", "max_results": 2}
+        )
+        assert "https://" in out
+        assert "Anthropic Claude" in out
+
+    @pytest.mark.asyncio
+    async def test_setup_probe_endpoint_contract(self):
+        """Mirror of the Exa probe-contract test for Brave (matches
+        ``cli._setup_brave._test``)."""
+        import httpx as _httpx
+
+        resp = await _httpx.AsyncClient(timeout=15.0).get(
+            "https://api.search.brave.com/res/v1/web/search",
+            headers={
+                "X-Subscription-Token": os.environ["BRAVE_API_KEY"],
+                "Accept": "application/json",
+            },
+            params={"q": "anton ping", "count": 1},
+        )
+        assert resp.status_code == 200, (
+            f"setup probe contract broken: HTTP {resp.status_code} — {resp.text[:200]}"
+        )
+
+
+class TestWebFetchLive:
+    """Real ``handle_web_fetch_fallback`` against a stable known URL.
+
+    No API key needed — fetch is the always-on Case 3 capability. ``example.com``
+    is operated by IANA and has a stable, well-formed signature page (``Example
+    Domain`` heading) which makes this assertion stable enough to live in CI.
+    """
+
+    @pytest.mark.asyncio
+    async def test_fetches_example_dot_com(self):
+        from anton.core.tools.web_tools import handle_web_fetch_fallback
+
+        out = await handle_web_fetch_fallback(
+            None, {"url": "https://example.com", "max_chars": 5000}
+        )
+        # The header line includes status + byte count.
+        assert "HTTP 200" in out
+        # Signature text from the canonical example.com page.
+        assert "Example Domain" in out
+        # Confirms the HTML stripper actually ran (the live page has
+        # <html>/<body>/<a> tags that should not survive in our output).
+        assert "<html" not in out.lower()
+        assert "<body" not in out.lower()

From cbcef15ce953c3b8f7ad09c891b279a4f51feae8 Mon Sep 17 00:00:00 2001
From: Max Abouchar <maxabouchar@gmail.com>
Date: Wed, 6 May 2026 16:06:33 -0700
Subject: [PATCH 5/7] search setup UX improvements

---
 anton/cli.py | 122 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 89 insertions(+), 33 deletions(-)

diff --git a/anton/cli.py b/anton/cli.py
index a09f1a04..ee0b599c 100644
--- a/anton/cli.py
+++ b/anton/cli.py
@@ -1115,6 +1115,49 @@ def _looks_like_mdb_ai(base_url: str, settings) -> bool:
     return base == minds or base == f"{minds}/api/v1"
 
 
+def _current_search_label(settings) -> str:
+    """Human-readable summary of the currently-configured search provider.
+
+    Returns ``"none"`` if nothing is set, otherwise the provider name plus a
+    masked tail of the stored key so the user can recognize which key is
+    active without exposing it.
+    """
+    provider = (getattr(settings, "external_search_provider", None) or "").lower()
+    if not provider:
+        return "none"
+    if provider == "exa":
+        key = getattr(settings, "exa_api_key", None) or ""
+        label = "Exa.ai"
+    elif provider == "brave":
+        key = getattr(settings, "brave_api_key", None) or ""
+        label = "Brave Search"
+    else:
+        return provider
+    if len(key) >= 4:
+        return f"{label} (key: ****{key[-4:]})"
+    return label
+
+
+def _skip_search_provider(settings, ws) -> None:
+    """Disable ``web_search``. If a provider was configured, confirm first
+    so a stray keystroke can't silently wipe a working setup."""
+    if settings.external_search_provider:
+        current = _current_search_label(settings)
+        confirm = _setup_prompt(
+            f"Disable web_search and clear current config ({current})? [y/N]",
+            default="N",
+        ).strip().lower()
+        if confirm not in ("y", "yes"):
+            console.print("  [anton.muted]Keeping current search provider.[/]")
+            return
+    settings.external_search_provider = None
+    ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "")
+    console.print(
+        "  [anton.muted]web_search will be unavailable until you run "
+        "[bold]anton setup-search[/].[/]"
+    )
+
+
 def _setup_search_provider(settings, ws) -> None:
     """Configure an external search provider (Exa.ai or Brave Search).
 
@@ -1126,6 +1169,10 @@ def _setup_search_provider(settings, ws) -> None:
     """
     console.print()
     console.print("[anton.cyan]Search provider[/]")
+    console.print(
+        f"  [anton.muted]Currently:[/] {_current_search_label(settings)}"
+    )
+    console.print()
     console.print(
         "  [bold]1[/]  [link=https://exa.ai][anton.cyan]Exa.ai[/][/link] "
         "[anton.muted]AI-native semantic search[/]"
@@ -1134,24 +1181,20 @@ def _setup_search_provider(settings, ws) -> None:
         "  [bold]2[/]  [link=https://brave.com/search/api][anton.cyan]Brave Search[/][/link] "
         "[anton.muted]privacy-focused web search[/]"
     )
-    console.print("  [bold]3[/]  [anton.muted]Skip — disable web_search for now[/]")
+    console.print("  [bold]3[/]  [anton.muted]Skip — disable web_search[/]")
     console.print()
 
-    from rich.prompt import Prompt
-    choice = Prompt.ask(
-        "  Choose",
-        choices=["1", "2", "3"],
-        default="1",
-        console=console,
-    )
+    # ``_setup_prompt`` (prompt_toolkit) gives us ESC-to-go-back support and
+    # matches every other ``_setup_*`` helper in this file. Loop on invalid
+    # input — the underlying prompt has no built-in choice validation.
+    while True:
+        choice = _setup_prompt("Choose [1/2/3]", default="1").strip()
+        if choice in ("1", "2", "3"):
+            break
+        console.print("  [anton.warning]Please enter 1, 2, or 3.[/]")
 
     if choice == "3":
-        settings.external_search_provider = None
-        ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "")
-        console.print(
-            "  [anton.muted]web_search will be unavailable until you run "
-            "[bold]anton setup-search[/].[/]"
-        )
+        _skip_search_provider(settings, ws)
         return
 
     if choice == "1":
@@ -1197,14 +1240,14 @@ def _test():
         _validate_with_spinner(console, "Exa.ai", _test)
     except PermissionError as exc:
         console.print(f"  [anton.error]{exc}[/]")
-        _handle_search_retry(settings, ws)
+        _handle_search_retry(settings, ws, last_provider="exa")
         return
     except Exception as exc:
         if _is_transient_error(exc):
             console.print("  [anton.warning]Search service is temporarily overloaded.[/]")
         else:
             console.print(f"  [anton.error]Failed:[/] {exc}")
-        _handle_search_retry(settings, ws)
+        _handle_search_retry(settings, ws, last_provider="exa")
         return
 
     settings.external_search_provider = "exa"
@@ -1253,14 +1296,14 @@ def _test():
         _validate_with_spinner(console, "Brave Search", _test)
     except PermissionError as exc:
         console.print(f"  [anton.error]{exc}[/]")
-        _handle_search_retry(settings, ws)
+        _handle_search_retry(settings, ws, last_provider="brave")
         return
     except Exception as exc:
         if _is_transient_error(exc):
             console.print("  [anton.warning]Search service is temporarily overloaded.[/]")
         else:
             console.print(f"  [anton.error]Failed:[/] {exc}")
-        _handle_search_retry(settings, ws)
+        _handle_search_retry(settings, ws, last_provider="brave")
         return
 
     settings.external_search_provider = "brave"
@@ -1270,23 +1313,36 @@ def _test():
     console.print("  [anton.success]Brave Search configured.[/]")
 
 
-def _handle_search_retry(settings, ws) -> None:
-    """Retry / switch / skip after a search-provider validation failure."""
-    from rich.prompt import Prompt
-    choice = Prompt.ask(
-        "  Retry, switch provider, or skip?",
-        choices=["retry", "switch", "skip", "r", "s", "k"],
-        default="retry",
-        console=console,
-    )
-    if choice in ("retry", "r"):
-        _setup_search_provider(settings, ws)
-    elif choice in ("switch", "s"):
-        # Re-show the picker so the user can pick the other provider.
+def _handle_search_retry(settings, ws, *, last_provider: str) -> None:
+    """Retry the same provider, switch to the other, or skip web_search.
+
+    ``last_provider`` is the provider whose probe just failed (``"exa"`` or
+    ``"brave"``). ``retry`` re-enters that same helper so the user can fix a
+    typo without re-picking from the menu; ``switch`` re-shows the picker so
+    they can try the other provider; ``skip`` clears the config (with the
+    standard confirm if a previous provider was set).
+    """
+    other = "Brave Search" if last_provider == "exa" else "Exa.ai"
+    while True:
+        choice = _setup_prompt(
+            f"Retry, switch to {other}, or skip? [r/s/k]",
+            default="r",
+        ).strip().lower()
+        if choice in ("r", "retry", "s", "switch", "k", "skip"):
+            break
+        console.print("  [anton.warning]Please enter r, s, or k.[/]")
+
+    if choice in ("r", "retry"):
+        # Jump back into the same provider's helper — no menu detour.
+        if last_provider == "exa":
+            _setup_exa(settings, ws)
+        else:
+            _setup_brave(settings, ws)
+    elif choice in ("s", "switch"):
+        # Show the picker so the user can pick the other provider.
         _setup_search_provider(settings, ws)
     else:
-        settings.external_search_provider = None
-        ws.set_secret("ANTON_EXTERNAL_SEARCH_PROVIDER", "")
+        _skip_search_provider(settings, ws)
 
 
 @app.command("setup")

From 46e6052f7986173e8902e55b63ce4fc74ee1936b Mon Sep 17 00:00:00 2001
From: Max Abouchar <maxabouchar@gmail.com>
Date: Wed, 6 May 2026 16:06:54 -0700
Subject: [PATCH 6/7] tests for setup ux

---
 tests/test_openai_setup.py | 57 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/tests/test_openai_setup.py b/tests/test_openai_setup.py
index f6d27331..8c72bc72 100644
--- a/tests/test_openai_setup.py
+++ b/tests/test_openai_setup.py
@@ -192,3 +192,60 @@ def test_non_azure_endpoint_with_api_version_uses_standard_client(self, monkeypa
 
         assert not azure_called
         assert settings.openai_api_version == "2025-01"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Search provider helpers — pure-function corners worth pinning
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestCurrentSearchLabel:
+    """Locks in the masked format used by the ``Currently:`` line in
+    ``_setup_search_provider`` — a regression here would silently leak a
+    different number of key characters into the chat output.
+    """
+
+    def test_none_when_unconfigured(self):
+        from anton.cli import _current_search_label
+        from types import SimpleNamespace
+
+        s = SimpleNamespace(external_search_provider=None, exa_api_key=None, brave_api_key=None)
+        assert _current_search_label(s) == "none"
+
+    def test_exa_with_full_key_masks_to_last_four(self):
+        from anton.cli import _current_search_label
+        from types import SimpleNamespace
+
+        s = SimpleNamespace(
+            external_search_provider="exa",
+            exa_api_key="abcd-1234-wxyz",
+            brave_api_key=None,
+        )
+        assert _current_search_label(s) == "Exa.ai (key: ****wxyz)"
+
+    def test_brave_with_full_key_masks_to_last_four(self):
+        from anton.cli import _current_search_label
+        from types import SimpleNamespace
+
+        s = SimpleNamespace(
+            external_search_provider="brave",
+            brave_api_key="brv-key-9876",
+            exa_api_key=None,
+        )
+        assert _current_search_label(s) == "Brave Search (key: ****9876)"
+
+    def test_short_key_omits_the_mask_to_avoid_revealing_length(self):
+        from anton.cli import _current_search_label
+        from types import SimpleNamespace
+
+        s = SimpleNamespace(external_search_provider="exa", exa_api_key="ab", brave_api_key=None)
+        assert _current_search_label(s) == "Exa.ai"
+
+    def test_unknown_provider_falls_back_to_raw_value(self):
+        from anton.cli import _current_search_label
+        from types import SimpleNamespace
+
+        s = SimpleNamespace(
+            external_search_provider="serper", exa_api_key=None, brave_api_key=None
+        )
+        assert _current_search_label(s) == "serper"

From 6001003a7df3dbaac8943c7ced6d7fb524b3c6b5 Mon Sep 17 00:00:00 2001
From: Max Abouchar <maxabouchar@gmail.com>
Date: Wed, 6 May 2026 16:23:06 -0700
Subject: [PATCH 7/7] use formatted URLs in web search tests

---
 tests/test_web_tools.py      |  9 +++++++--
 tests/test_web_tools_live.py | 21 +++++++++++++++++----
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/tests/test_web_tools.py b/tests/test_web_tools.py
index ade876ef..08d8585c 100644
--- a/tests/test_web_tools.py
+++ b/tests/test_web_tools.py
@@ -94,9 +94,14 @@ async def _post(self, url, json=None, headers=None):
         assert captured["headers"]["Authorization"] == "Bearer exa-key-xyz"
         assert captured["json"]["query"] == "what is anton"
         assert captured["json"]["num_results"] == 2
-        # Output is markdown-ish with both results
+        # Output is markdown-ish with both results. Assert the URL appears as
+        # an exact formatted line ("   <url>") rather than via substring `in`
+        # — the latter would also pass for "https://a.example.evil.com" and
+        # CodeQL's incomplete-URL-substring-sanitization rule (correctly)
+        # warns on that pattern even in tests.
+        out_lines = out.splitlines()
         assert "Result A" in out
-        assert "https://a.example" in out
+        assert "   https://a.example" in out_lines
         assert "Result B" in out
 
     async def test_exa_non_200_response_returns_error_string(self):
diff --git a/tests/test_web_tools_live.py b/tests/test_web_tools_live.py
index 12761aca..78212fbb 100644
--- a/tests/test_web_tools_live.py
+++ b/tests/test_web_tools_live.py
@@ -49,6 +49,19 @@ def _have(key: str) -> bool:
     return bool(os.environ.get(key))
 
 
+def _has_https_url_line(text: str) -> bool:
+    """Return True if any line of ``text`` is a formatted URL row.
+
+    The web_search formatter emits URLs on their own indented line — see
+    ``anton.core.tools.web_tools._search_exa`` / ``_search_brave``. Asserting
+    against an exact line beginning is both stricter than ``"https://" in out``
+    (which would also pass for ``"foo https://x evil"``) and avoids tripping
+    CodeQL's ``py/incomplete-url-substring-sanitization`` rule, which
+    correctly flags the substring pattern even in test contexts.
+    """
+    return any(line.lstrip().startswith("https://") for line in text.splitlines())
+
+
 anthropic_only = pytest.mark.skipif(
     not _have("ANTHROPIC_API_KEY"),
     reason="ANTHROPIC_API_KEY not set — live test skipped",
@@ -488,7 +501,7 @@ async def test_search_returns_real_results(self):
 
         assert "Web search results for: 'Anthropic Claude'" in out
         # At least one https:// URL should appear in the formatted output.
-        assert "https://" in out
+        assert _has_https_url_line(out)
         # And the markdown numbering means we got real hits, not the "no
         # results" branch.
         assert "1. **" in out
@@ -508,7 +521,7 @@ async def test_handler_dispatch_via_session(self):
         out = await handle_web_search_fallback(
             session, {"query": "Anthropic Claude", "max_results": 2}
         )
-        assert "https://" in out
+        assert _has_https_url_line(out)
         assert "Anthropic Claude" in out  # query echoed in the header
 
     @pytest.mark.asyncio
@@ -546,7 +559,7 @@ async def test_search_returns_real_results(self):
         )
 
         assert "Web search results for: 'Anthropic Claude'" in out
-        assert "https://" in out
+        assert _has_https_url_line(out)
         assert "1. **" in out
 
     @pytest.mark.asyncio
@@ -562,7 +575,7 @@ async def test_handler_dispatch_via_session(self):
         out = await handle_web_search_fallback(
             session, {"query": "Anthropic Claude", "max_results": 2}
         )
-        assert "https://" in out
+        assert _has_https_url_line(out)
         assert "Anthropic Claude" in out
 
     @pytest.mark.asyncio