diff --git a/docs/content/docs/(configuration)/secrets.mdx b/docs/content/docs/(configuration)/secrets.mdx index 52342560c..9c954bdd5 100644 --- a/docs/content/docs/(configuration)/secrets.mdx +++ b/docs/content/docs/(configuration)/secrets.mdx @@ -100,6 +100,23 @@ Worker creates a GitHub bot account The tool accepts an optional `category` parameter to override auto-categorization. If omitted, the same rules as the API apply -- known internal credentials are categorized as system, everything else as tool. +### Browser Tool Integration + +The `browser_type` tool supports a `secret` parameter for typing sensitive values (passwords, tokens, API keys) into web form fields without exposing them in tool arguments, output, or LLM context. + +``` +Task says "sign in with password from GH_PASSWORD" + → Worker calls browser_type(index: 2, secret: "GH_PASSWORD") + → Secret store resolves "GH_PASSWORD" to its stored value + → Value is typed into the password field via CDP + → Tool output: "Typed secret 'GH_PASSWORD' into element at index 2" + → The actual password never appears in tool arguments or results +``` + +This is the only safe way to enter credentials in the browser. The `text` parameter types values in plain text that appears in tool output and LLM context -- never use it for passwords or tokens. Both tool-category and system-category secrets can be resolved via the `secret` parameter, since the value is used internally by the browser tool and never exposed to subprocesses. + +If a task references an environment variable for a credential (e.g. "use password from `$GH_PASSWORD`"), the secret should be stored in the secret store under that name and passed via the `secret` parameter. Do not shell out to `echo` or otherwise resolve credential values into plain text. + ## Encryption at Rest The store has two modes: diff --git a/docs/content/docs/(features)/browser.mdx b/docs/content/docs/(features)/browser.mdx index 4c299022f..bfe2809ba 100644 --- a/docs/content/docs/(features)/browser.mdx +++ b/docs/content/docs/(features)/browser.mdx @@ -79,12 +79,27 @@ Single `browser` tool with an `action` discriminator. All actions share one argu Act kinds: - **`click`** -- Click an element by ref. -- **`type`** -- Click an element to focus it, then type `text` into it. +- **`type`** -- Click an element to focus it, then type `text` into it. For passwords and credentials, use `secret` instead of `text` (see below). - **`press_key`** -- Press a keyboard key (e.g., `Enter`, `Tab`, `Escape`). With `element_ref`, presses on that element. Without, dispatches to the page. - **`hover`** -- Move the mouse over an element. - **`scroll_into_view`** -- Scroll an element into the viewport. - **`focus`** -- Focus an element without clicking. +### Secure Text Entry + +The `browser_type` tool accepts a `secret` parameter as an alternative to `text` for typing sensitive values into form fields. When `secret` is provided, the value is resolved from the [secret store](/docs/secrets) and typed into the element without ever appearing in tool arguments, output, or LLM context. + +``` +browser_type { index: 2, secret: "GH_PASSWORD" } + → resolves GH_PASSWORD from the secret store + → types the value into the element + → returns: "Typed secret 'GH_PASSWORD' into element at index 2" +``` + +The `text` and `secret` parameters are mutually exclusive. Use `text` for non-sensitive input (usernames, search queries, form data) and `secret` for passwords, tokens, API keys, and any other credentials. Both tool-category and system-category secrets can be used. + +If a task references a credential by environment variable name (e.g. "use password from `$GH_PASSWORD`"), the worker should pass that name to the `secret` parameter. Never shell out to `echo` or otherwise resolve credential values into plain text for browser entry. + ### JavaScript | Action | Required Args | Description | @@ -96,19 +111,17 @@ Act kinds: ## Typical Workflow ``` -browser { action: "launch" } -browser { action: "navigate", url: "https://example.com" } -browser { action: "snapshot" } - → e0: [textbox] "Email", e1: [textbox] "Password", e2: [button] "Sign In" -browser { action: "act", act_kind: "click", element_ref: "e0" } -browser { action: "act", act_kind: "type", element_ref: "e0", text: "user@example.com" } -browser { action: "act", act_kind: "click", element_ref: "e1" } -browser { action: "act", act_kind: "type", element_ref: "e1", text: "hunter2" } -browser { action: "act", act_kind: "click", element_ref: "e2" } -browser { action: "snapshot" } +browser_launch {} +browser_navigate { url: "https://example.com" } +browser_snapshot {} + → [index=0] textbox "Email", [index=1] textbox "Password", [index=2] button "Sign In" +browser_type { index: 0, text: "user@example.com" } +browser_type { index: 1, secret: "ACCOUNT_PASSWORD" } ← password from secret store +browser_click { index: 2 } +browser_snapshot {} → (new page state after login) -browser { action: "screenshot" } -browser { action: "close" } +browser_screenshot {} +browser_close {} ``` ## Screenshots diff --git a/docs/design-docs/tool-nudging.md b/docs/design-docs/tool-nudging.md index 106d9ed86..c5fb2f635 100644 --- a/docs/design-docs/tool-nudging.md +++ b/docs/design-docs/tool-nudging.md @@ -21,14 +21,20 @@ Workers must explicitly signal a terminal outcome via `set_status(kind: "outcome ``` Worker loop starts → LLM completion - → If response includes tool calls → continue normally - → If text-only response: + → If response includes tool calls → continue normally (reset consecutive nudge counter) + → If response has no tool calls: → Has outcome been signaled via set_status(kind: "outcome")? → allow exit - → No outcome signal? → Terminate with "tool_nudge" reason → retry with nudge prompt - → Max 2 retries per prompt request - → If retries exhausted → worker fails (PromptCancelled) + → No outcome signal? + → Response has text OR is reasoning-only (no text, no tools)? + → Terminate with "tool_nudge" reason → retry with nudge prompt + → Max 2 consecutive text-only responses before failure + → Any successful tool call resets the counter + → If consecutive retries exhausted → worker fails (PromptCancelled) + → After initial task loop: if result text is empty → worker fails (safety net) ``` +The retry budget tracks **consecutive** text-only responses, not total nudges across the worker's lifetime. Whenever a tool call completes successfully, the counter resets to zero. This prevents long-running workers from being killed after a brief narration blip mid-task — a worker that has made 12 tool calls and then narrates once gets the full retry budget again, not a depleted one from earlier nudges. + ### Outcome Signaling The `set_status` tool has a `kind` field: @@ -62,15 +68,24 @@ let hook = SpacebotHook::new(...) - The flag persists for the rest of the prompt request **Nudge decision** (`src/hooks/spacebot.rs:should_nudge_tool_usage`): -- Returns `true` when: policy enabled, nudge active, no outcome signaled, response is text-only +- Returns `true` when: policy enabled, nudge active, no outcome signaled, AND response has no tool calls (either non-empty text or reasoning-only with no text) - Returns `false` when: outcome signaled, response has tool calls, policy disabled +**Empty result guard** (`src/agent/worker.rs`): +- After the initial task loop, if the result text is empty/whitespace-only, the worker fails instead of reaching `Done`. This is a safety net for cases where a reasoning-only response slips past the nudge gate (e.g. after nudge retries are exhausted). + +**Consecutive nudge counter** (`on_tool_result` in `src/hooks/spacebot.rs`): +- When nudge policy is enabled and a tool call completes, `nudge_attempts` is reset to zero +- This means the retry budget is per-consecutive-text-only-streak, not per-prompt-lifetime +- A worker that alternates between tool calls and narration will never exhaust its budget + **Retry flow** (`prompt_with_tool_nudge_retry`): -1. Reset nudge state at start of prompt (clears `outcome_signaled`) +1. Reset nudge state at start of prompt (clears `outcome_signaled` and `nudge_attempts`) 2. On text-only response without outcome: terminate with `TOOL_NUDGE_REASON` -3. Catch termination in retry loop, prune history, retry with nudge prompt +3. Catch termination in retry loop, increment `nudge_attempts`, prune history, retry with nudge prompt 4. On success: prune the nudge prompt from history to keep context clean -5. After `TOOL_NUDGE_MAX_RETRIES` (2) exhausted: `PromptCancelled` propagates to worker → `WorkerState::Failed` +5. After `TOOL_NUDGE_MAX_RETRIES` (2) consecutive text-only responses: `PromptCancelled` propagates to worker → `WorkerState::Failed` +6. Any successful tool call (inside the Rig agent loop) resets `nudge_attempts` to zero via `on_tool_result` **History hygiene**: - Synthetic nudge prompts are removed from history on both success and retry @@ -100,6 +115,9 @@ The nudging behavior has comprehensive test coverage: - **Unit tests** (`src/hooks/spacebot.rs`): - `nudges_on_every_text_only_response_without_outcome` — nudge fires on every text-only response - `nudges_after_tool_calls_without_outcome` — the exact bug case (read_skill + progress status + text exit) + - `nudges_on_reasoning_only_response_without_outcome` — nudge fires when response is only Reasoning blocks (no text, no tools) + - `tool_result_resets_consecutive_nudge_counter` — successful tool calls reset the nudge attempt counter + - `nudge_counter_not_reset_when_policy_disabled` — counter untouched when nudging is disabled - `outcome_signal_allows_text_only_completion` — outcome signal unlocks exit - `progress_status_does_not_signal_outcome` — explicit progress kind doesn't unlock - `default_status_kind_does_not_signal_outcome` — omitted kind doesn't unlock diff --git a/interface/src/api/client.ts b/interface/src/api/client.ts index 50725a6c5..092e9ff98 100644 --- a/interface/src/api/client.ts +++ b/interface/src/api/client.ts @@ -146,6 +146,13 @@ export interface OpenCodePartUpdatedEvent { part: OpenCodePart; } +export interface WorkerTextEvent { + type: "worker_text"; + agent_id: string; + worker_id: string; + text: string; +} + export type ApiEvent = | InboundMessageEvent | OutboundMessageEvent @@ -159,7 +166,8 @@ export type ApiEvent = | BranchCompletedEvent | ToolStartedEvent | ToolCompletedEvent - | OpenCodePartUpdatedEvent; + | OpenCodePartUpdatedEvent + | WorkerTextEvent; async function fetchJson(path: string): Promise { const response = await fetch(`${API_BASE}${path}`); diff --git a/interface/src/components/ToolCall.tsx b/interface/src/components/ToolCall.tsx new file mode 100644 index 000000000..d6ff56839 --- /dev/null +++ b/interface/src/components/ToolCall.tsx @@ -0,0 +1,1166 @@ +import { useState } from "react"; +import { cx } from "@/ui/utils"; +import type { TranscriptStep, OpenCodePart } from "@/api/client"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export type ToolCallStatus = "running" | "completed" | "error"; + +export interface ToolCallPair { + /** The call_id linking tool_call to tool_result */ + id: string; + /** Tool name (e.g. "browser_navigate", "shell") */ + name: string; + /** Raw JSON args string from the tool_call */ + argsRaw: string; + /** Parsed args object (null if parse failed) */ + args: Record | null; + /** Raw result text from tool_result (null if still running) */ + resultRaw: string | null; + /** Parsed result object (null if not JSON or still running) */ + result: Record | null; + /** Current state */ + status: ToolCallStatus; + /** Human-readable summary provided by live opencode parts */ + title?: string | null; +} + +// --------------------------------------------------------------------------- +// Transcript → ToolCallPair[] pairing +// --------------------------------------------------------------------------- + +/** + * Walk a flat TranscriptStep[] and pair each tool_call with its tool_result + * via call_id matching, plus emit standalone text steps. Returns an ordered + * list of renderable items: text blocks and paired tool calls. + */ +export type TranscriptItem = + | { kind: "text"; text: string } + | { kind: "tool"; pair: ToolCallPair }; + +export function pairTranscriptSteps(steps: TranscriptStep[]): TranscriptItem[] { + const items: TranscriptItem[] = []; + const resultsById = new Map(); + + // First pass: index all tool_result steps by call_id + for (const step of steps) { + if (step.type === "tool_result") { + resultsById.set(step.call_id, { name: step.name, text: step.text }); + } + } + + // Second pass: emit items in order + for (const step of steps) { + if (step.type === "action") { + for (const content of step.content) { + if (content.type === "text") { + items.push({ kind: "text", text: content.text }); + } else if (content.type === "tool_call") { + const result = resultsById.get(content.id); + const parsedArgs = tryParseJson(content.args); + const parsedResult = result ? tryParseJson(result.text) : null; + + // Detect error: result text starts with "Error" or contains error indicators + const isError = result + ? isErrorResult(result.text, parsedResult) + : false; + + items.push({ + kind: "tool", + pair: { + id: content.id, + name: content.name, + argsRaw: content.args, + args: parsedArgs, + resultRaw: result?.text ?? null, + result: parsedResult, + status: result + ? isError + ? "error" + : "completed" + : "running", + }, + }); + } + } + } + // tool_result steps are consumed by the pairing above, not rendered standalone + } + + return items; +} + +/** + * Convert an OpenCode live part (tool type) into a ToolCallPair so it can be + * rendered by the unified ToolCall component. + */ +export function openCodePartToPair( + part: Extract, +): ToolCallPair { + const input = + part.status === "running" || part.status === "completed" + ? (part as any).input + : undefined; + const output = + part.status === "completed" ? (part as any).output : undefined; + const error = + part.status === "error" ? (part as any).error : undefined; + const title = + part.status === "running" || part.status === "completed" + ? (part as any).title + : undefined; + + const argsRaw = input ?? ""; + const resultRaw = error ?? output ?? null; + + return { + id: part.id, + name: part.tool, + argsRaw, + args: tryParseJson(argsRaw), + resultRaw, + result: resultRaw ? tryParseJson(resultRaw) : null, + status: + part.status === "error" + ? "error" + : part.status === "completed" + ? "completed" + : "running", + // Carry the title through for renderers that want it + title: title ?? null, + }; +} + +function tryParseJson(text: string): Record | null { + if (!text || text.trim().length === 0) return null; + try { + const parsed = JSON.parse(text); + if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) { + return parsed as Record; + } + return null; + } catch { + return null; + } +} + +function isErrorResult( + text: string, + parsed: Record | null, +): boolean { + if (parsed?.error) return true; + if (parsed?.status === "error") return true; + // Shell/exec structured results: { success: false } or non-zero exit code + if (parsed?.success === false) return true; + if (typeof parsed?.exit_code === "number" && parsed.exit_code !== 0) return true; + const lower = text.toLowerCase(); + return ( + lower.startsWith("error:") || + lower.startsWith("error -") || + lower.startsWith("failed:") || + lower.startsWith("toolset error:") + ); +} + +// --------------------------------------------------------------------------- +// Tool-specific rendering +// --------------------------------------------------------------------------- + +interface ToolRenderer { + /** One-line summary shown in the collapsed header (after tool name) */ + summary(pair: ToolCallPair): string | null; + /** Custom args display (return null to use default JSON) */ + argsView?(pair: ToolCallPair): React.ReactNode | null; + /** Custom result display (return null to use default text) */ + resultView?(pair: ToolCallPair): React.ReactNode | null; +} + +const toolRenderers: Record = { + browser_launch: { + summary(pair) { + const headless = pair.args?.headless; + return headless === false ? "Launch browser (visible)" : "Launch browser"; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + browser_navigate: { + summary(pair) { + const url = pair.args?.url; + return url ? truncate(String(url), 60) : null; + }, + resultView(pair) { + if (!pair.result) return ; + const title = pair.result.title as string | undefined; + const url = pair.result.url as string | undefined; + return ( +
+ {title && ( +

+ Title: + {title} +

+ )} + {url && ( +

+ {truncate(url, 80)} +

+ )} +
+ ); + }, + }, + + browser_snapshot: { + summary(pair) { + if (!pair.resultRaw) return "Taking snapshot..."; + // Count elements: look for "[N]" patterns in the ARIA snapshot + const matches = pair.resultRaw.match(/\[\d+\]/g); + const count = matches?.length ?? 0; + return count > 0 + ? `${count} interactive element${count !== 1 ? "s" : ""}` + : "Page snapshot"; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + // ARIA tree snapshots are YAML-like text — render in a scrollable pre + // but cap the default view + return ; + }, + }, + + browser_click: { + summary(pair) { + const index = pair.args?.index; + return index !== undefined ? `Click element [${index}]` : "Click"; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + browser_type: { + summary(pair) { + const index = pair.args?.index; + const hasSecret = pair.args?.secret !== undefined; + const text = pair.args?.text; + if (hasSecret) { + return index !== undefined + ? `Type secret into [${index}]` + : "Type secret"; + } + if (text) { + const display = truncate(String(text), 30); + return index !== undefined + ? `Type "${display}" into [${index}]` + : `Type "${display}"`; + } + return index !== undefined ? `Type into [${index}]` : "Type"; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + browser_press_key: { + summary(pair) { + const key = pair.args?.key; + return key ? `Press ${key}` : "Press key"; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + browser_screenshot: { + summary() { + return "Capture screenshot"; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + // Screenshot results may contain base64 image data or a path + if (pair.result?.base64) { + const mimeType = + (pair.result.mime_type as string) ?? "image/png"; + return ( +
+ Browser screenshot +
+ ); + } + return ; + }, + }, + + browser_evaluate: { + summary(pair) { + const expression = pair.args?.expression; + return expression + ? truncate(String(expression), 50) + : "Evaluate JS"; + }, + argsView(pair) { + const expression = pair.args?.expression; + if (!expression) return null; + return ( +
+

+ JavaScript +

+
+						{String(expression)}
+					
+
+ ); + }, + }, + + browser_tab_open: { + summary(pair) { + const url = pair.args?.url; + return url ? `Open tab: ${truncate(String(url), 50)}` : "Open new tab"; + }, + }, + + browser_tab_list: { + summary() { + return "List tabs"; + }, + }, + + browser_tab_close: { + summary(pair) { + const tabId = pair.args?.tab_id; + return tabId !== undefined + ? `Close tab ${tabId}` + : "Close tab"; + }, + }, + + browser_close: { + summary() { + return "Close browser"; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + shell: { + summary(pair) { + const command = pair.args?.command; + if (!command) return null; + // If we have a parsed result, append exit code info + if (pair.result && typeof pair.result.exit_code === "number") { + const code = pair.result.exit_code; + const cmdStr = truncate(String(command), 50); + return code === 0 ? cmdStr : `${cmdStr} (exit ${code})`; + } + return truncate(String(command), 60); + }, + argsView(pair) { + const command = pair.args?.command; + if (!command) return null; + return ( +
+
+						$ 
+						{String(command)}
+					
+
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + file_read: { + summary(pair) { + if (pair.title) return pair.title; + const path = pair.args?.path; + return path ? truncate(String(path), 60) : null; + }, + argsView(pair) { + const path = pair.args?.path; + if (!path) return null; + const offset = pair.args?.offset; + const limit = pair.args?.limit; + return ( +
+

+ {String(path)} + {offset ? ` (from line ${offset})` : ""} + {limit ? ` (${limit} lines)` : ""} +

+
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + file_write: { + summary(pair) { + if (pair.title) return pair.title; + const path = pair.args?.path; + return path ? truncate(String(path), 60) : null; + }, + argsView(pair) { + const path = pair.args?.path; + const content = pair.args?.content; + if (!path && !content) return null; + return ( +
+ {!!path && ( +

+ {String(path)} +

+ )} + {!!content && ( +
+							{truncate(String(content), 2000)}
+						
+ )} +
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + file_edit: { + summary(pair) { + if (pair.title) return pair.title; + const path = pair.args?.path; + return path ? truncate(String(path), 60) : null; + }, + argsView(pair) { + const path = pair.args?.path; + const oldStr = pair.args?.old_string; + const newStr = pair.args?.new_string; + if (!path) return null; + return ( +
+

+ {String(path)} +

+ {!!oldStr && ( +
+

+ Old +

+
+								{truncate(String(oldStr), 500)}
+							
+
+ )} + {!!newStr && ( +
+

+ New +

+
+								{truncate(String(newStr), 500)}
+							
+
+ )} +
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + file_list: { + summary(pair) { + if (pair.title) return pair.title; + const path = pair.args?.path; + return path ? truncate(String(path), 60) : null; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + exec: { + summary(pair) { + const command = pair.args?.command; + if (!command) return null; + if (pair.result && typeof pair.result.exit_code === "number") { + const code = pair.result.exit_code; + const cmdStr = truncate(String(command), 50); + return code === 0 ? cmdStr : `${cmdStr} (exit ${code})`; + } + return truncate(String(command), 60); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + set_status: { + summary(pair) { + const kind = pair.args?.kind; + const message = pair.args?.message; + if (kind === "outcome") { + return message + ? `Outcome: ${truncate(String(message), 50)}` + : "Outcome set"; + } + return message ? truncate(String(message), 60) : null; + }, + resultView() { + // set_status results are not interesting — just "ok" + return null; + }, + }, + + // ----------------------------------------------------------------------- + // OpenCode tools + // ----------------------------------------------------------------------- + + read: { + summary(pair) { + if (pair.title) return pair.title; + const filePath = pair.args?.filePath ?? pair.args?.file_path; + return filePath ? truncate(String(filePath), 60) : null; + }, + argsView(pair) { + const filePath = pair.args?.filePath ?? pair.args?.file_path; + if (!filePath) return null; + const offset = pair.args?.offset; + const limit = pair.args?.limit; + return ( +
+

+ {String(filePath)} + {offset ? ` (from line ${offset})` : ""} + {limit ? ` (${limit} lines)` : ""} +

+
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + write: { + summary(pair) { + if (pair.title) return pair.title; + const filePath = pair.args?.filePath ?? pair.args?.file_path; + return filePath ? truncate(String(filePath), 60) : null; + }, + argsView(pair) { + const filePath = pair.args?.filePath ?? pair.args?.file_path; + const content = pair.args?.content; + if (!filePath && !content) return null; + return ( +
+ {!!filePath && ( +

+ {String(filePath)} +

+ )} + {!!content && ( +
+							{truncate(String(content), 2000)}
+						
+ )} +
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + edit: { + summary(pair) { + if (pair.title) return pair.title; + const filePath = pair.args?.filePath ?? pair.args?.file_path; + return filePath ? truncate(String(filePath), 60) : null; + }, + argsView(pair) { + const filePath = pair.args?.filePath ?? pair.args?.file_path; + const oldStr = pair.args?.oldString ?? pair.args?.old_string; + const newStr = pair.args?.newString ?? pair.args?.new_string; + if (!filePath) return null; + return ( +
+

+ {String(filePath)} +

+ {!!oldStr && ( +
+

+ Old +

+
+								{truncate(String(oldStr), 500)}
+							
+
+ )} + {!!newStr && ( +
+

+ New +

+
+								{truncate(String(newStr), 500)}
+							
+
+ )} +
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + bash: { + summary(pair) { + if (pair.title) return pair.title; + const command = pair.args?.command; + if (!command) return null; + if (pair.result && typeof pair.result.exit_code === "number") { + const code = pair.result.exit_code; + const cmdStr = truncate(String(command), 50); + return code === 0 ? cmdStr : `${cmdStr} (exit ${code})`; + } + return truncate(String(command), 60); + }, + argsView(pair) { + const command = pair.args?.command; + if (!command) return null; + return ( +
+
+						$ 
+						{String(command)}
+					
+
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + glob: { + summary(pair) { + if (pair.title) return pair.title; + const pattern = pair.args?.pattern; + return pattern ? truncate(String(pattern), 60) : null; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + grep: { + summary(pair) { + if (pair.title) return pair.title; + const pattern = pair.args?.pattern; + const include = pair.args?.include; + if (pattern && include) { + return `/${pattern}/ in ${include}`; + } + return pattern ? `/${truncate(String(pattern), 40)}/` : null; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + webfetch: { + summary(pair) { + if (pair.title) return pair.title; + const url = pair.args?.url; + return url ? truncate(String(url), 60) : null; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + read_skill: { + summary(pair) { + if (pair.title) return pair.title; + const name = pair.args?.name; + return name ? String(name) : null; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + web_search: { + summary(pair) { + if (pair.title) return pair.title; + const query = pair.args?.query; + const resultCount = pair.result?.result_count ?? (Array.isArray(pair.result?.results) ? (pair.result!.results as unknown[]).length : null); + const queryStr = query ? truncate(String(query), 50) : null; + if (queryStr && resultCount != null) { + return `${queryStr} (${resultCount} result${resultCount !== 1 ? "s" : ""})`; + } + return queryStr; + }, + argsView(pair) { + const query = pair.args?.query; + if (!query) return null; + const count = pair.args?.count; + const freshness = pair.args?.freshness; + return ( +
+

+ Search: + {String(query)} +

+ {!!(count || freshness) && ( +

+ {count ? `${count} results` : ""} + {count && freshness ? " · " : ""} + {freshness ? `${freshness}` : ""} +

+ )} +
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + spacebot_docs: { + summary(pair) { + if (pair.title) return pair.title; + const action = pair.args?.action; + const docId = pair.args?.doc_id; + if (action === "read" && docId) { + return truncate(String(docId), 50); + } + return action ? String(action) : "list"; + }, + argsView(pair) { + const action = pair.args?.action; + const docId = pair.args?.doc_id; + const query = pair.args?.query; + if (!action && !docId) return null; + return ( +
+ {!!docId && ( +

+ {String(docId)} +

+ )} + {!!query && ( +

+ filter: {String(query)} +

+ )} +
+ ); + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, + + todowrite: { + summary(pair) { + if (pair.title) return pair.title; + return "Update tasks"; + }, + resultView() { + return null; + }, + }, + + task: { + summary(pair) { + if (pair.title) return pair.title; + const description = pair.args?.description; + return description ? truncate(String(description), 60) : null; + }, + resultView(pair) { + if (!pair.resultRaw) return null; + return ; + }, + }, +}; + +// Default renderer for tools without a specific renderer +const defaultRenderer: ToolRenderer = { + summary(pair) { + if (!pair.argsRaw || pair.argsRaw === "{}") return null; + return truncate(pair.argsRaw, 60); + }, +}; + +function getRenderer(name: string): ToolRenderer { + return toolRenderers[name] ?? defaultRenderer; +} + +// --------------------------------------------------------------------------- +// Shared sub-components +// --------------------------------------------------------------------------- + +function ResultLine({ text }: { text: string }) { + return ( +

{text}

+ ); +} + +function ResultText({ text }: { text: string | null }) { + if (!text) return null; + return ( +
+			{text}
+		
+ ); +} + +function CollapsiblePre({ + text, + maxLines = 20, +}: { + text: string; + maxLines?: number; +}) { + const [expanded, setExpanded] = useState(false); + const lines = text.split("\n"); + const needsCollapse = lines.length > maxLines; + const displayText = + needsCollapse && !expanded + ? lines.slice(0, maxLines).join("\n") + "\n..." + : text; + + return ( +
+
+				{displayText}
+			
+ {needsCollapse && ( + + )} +
+ ); +} + +// --------------------------------------------------------------------------- +// Shell result rendering +// --------------------------------------------------------------------------- + +function ShellResultView({ pair }: { pair: ToolCallPair }) { + const r = pair.result; + + // If we can't parse structured output, fall back to raw text + if (!r || typeof r.exit_code !== "number") { + return ; + } + + const exitCode = r.exit_code as number; + const stdout = typeof r.stdout === "string" ? r.stdout : ""; + const stderr = typeof r.stderr === "string" ? r.stderr : ""; + const hasStdout = stdout.trim().length > 0; + const hasStderr = stderr.trim().length > 0; + const isError = exitCode !== 0; + + // Nothing to show + if (!hasStdout && !hasStderr && exitCode === 0) { + return ; + } + + return ( +
+ {/* Exit code badge for non-zero */} + {isError && ( +
+ + exit {exitCode} + +
+ )} + + {/* stdout */} + {hasStdout && ( +
+ +
+ )} + + {/* stderr */} + {hasStderr && ( +
+
+ + stderr + +
+
+						{stderr.replace(/\n$/, "")}
+					
+
+ )} +
+ ); +} + +// --------------------------------------------------------------------------- +// Status display helpers +// --------------------------------------------------------------------------- + +const STATUS_ICONS: Record = { + running: "\u25B6", // ▶ + completed: "\u2713", // ✓ + error: "\u2717", // ✗ +}; + +const STATUS_COLORS: Record = { + running: "text-accent", + completed: "text-emerald-500", + error: "text-red-400", +}; + +/** Human-readable tool name: browser_navigate → Navigate */ +function formatToolName(name: string): string { + // Handle specific tool name overrides + const overrides: Record = { + webfetch: "Web Fetch", + todowrite: "Todo", + read_skill: "Read Skill", + web_search: "Web Search", + spacebot_docs: "Docs", + }; + if (overrides[name]) return overrides[name]; + + // Strip common prefixes for cleaner display + const stripped = name + .replace(/^browser_/, "") + .replace(/^file_/, "") + .replace(/^tab_/, "Tab "); + + return stripped + .split("_") + .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) + .join(" "); +} + +/** Tool category label shown as a faint prefix */ +function toolCategory(name: string): string | null { + if (name.startsWith("browser_")) return "Browser"; + if (name.startsWith("file_")) return "File"; + return null; +} + +// --------------------------------------------------------------------------- +// Main component +// --------------------------------------------------------------------------- + +export function ToolCall({ pair }: { pair: ToolCallPair }) { + const [expanded, setExpanded] = useState(false); + const renderer = getRenderer(pair.name); + const summary = renderer.summary(pair); + const category = toolCategory(pair.name); + const displayName = formatToolName(pair.name); + + return ( +
+ {/* Header — always visible */} + + + {/* Expanded body */} + {expanded && ( +
+ {/* Args section */} + {renderArgs(pair, renderer)} + {/* Result section */} + {renderResult(pair, renderer)} +
+ )} +
+ ); +} + +function renderArgs(pair: ToolCallPair, renderer: ToolRenderer): React.ReactNode { + // Try custom args view first + if (renderer.argsView) { + const custom = renderer.argsView(pair); + if (custom) return custom; + } + + // Default: show parsed args as key-value pairs, or raw JSON + if (pair.args && Object.keys(pair.args).length > 0) { + return ( +
+
+ {Object.entries(pair.args).map(([key, value]) => ( +

+ {key}: + + {formatArgValue(key, value)} + +

+ ))} +
+
+ ); + } + + if (pair.argsRaw && pair.argsRaw !== "{}" && pair.argsRaw.trim().length > 0) { + return ( +
+
+					{pair.argsRaw}
+				
+
+ ); + } + + return null; +} + +function renderResult(pair: ToolCallPair, renderer: ToolRenderer): React.ReactNode { + if (pair.status === "running") { + return ( +
+ + Running... +
+ ); + } + + // Try custom result view first + if (renderer.resultView) { + const custom = renderer.resultView(pair); + if (custom !== undefined) return custom; + } + + // Default result rendering + if (!pair.resultRaw) return null; + + // If we have parsed JSON result, show key-value pairs + if (pair.result && Object.keys(pair.result).length > 0) { + return ( +
+
+ {Object.entries(pair.result).map(([key, value]) => ( +

+ {key}: + + {typeof value === "string" + ? truncate(value, 200) + : JSON.stringify(value)} + +

+ ))} +
+
+ ); + } + + // Plain text result + return ; +} + +function formatArgValue(key: string, value: unknown): string { + // Redact secret references + if (key === "secret" && typeof value === "string") { + return "***"; + } + if (typeof value === "string") { + return truncate(value, 100); + } + if (typeof value === "boolean" || typeof value === "number") { + return String(value); + } + return JSON.stringify(value); +} + +function truncate(text: string, maxLen: number): string { + if (text.length <= maxLen) return text; + return text.slice(0, maxLen) + "..."; +} diff --git a/interface/src/components/WebChatPanel.tsx b/interface/src/components/WebChatPanel.tsx index e473ddc4f..d46c7bd66 100644 --- a/interface/src/components/WebChatPanel.tsx +++ b/interface/src/components/WebChatPanel.tsx @@ -4,8 +4,6 @@ import { useWebChat } from "@/hooks/useWebChat"; import { isOpenCodeWorker, type ActiveWorker } from "@/hooks/useChannelLiveState"; import { useLiveContext } from "@/hooks/useLiveContext"; import { Markdown } from "@/components/Markdown"; -import { LiveDuration } from "@/components/LiveDuration"; -import type { TimelineWorkerRun } from "@/api/client"; interface WebChatPanelProps { agentId: string; @@ -60,88 +58,6 @@ function ActiveWorkersPanel({ workers, agentId }: { workers: ActiveWorker[]; age ); } -function ChatWorkerRunItem({ item, live, agentId }: { item: TimelineWorkerRun; live?: ActiveWorker; agentId: string }) { - const [expanded, setExpanded] = useState(!!live); - const wasLiveRef = useRef(!!live); - - // Auto-expand when a worker becomes live after initial mount - useEffect(() => { - if (live && !wasLiveRef.current) { - setExpanded(true); - } - wasLiveRef.current = !!live; - }, [live]); - - const oc = isOpenCodeWorker(live ?? { task: item.task }); - const isLive = !!live; - - return ( -
-
- - - Open - -
- {expanded && isLive && live && ( -
- - {live.status} - {live.currentTool && ( - {live.currentTool} - )} - {live.toolCalls > 0 && ( - {live.toolCalls} tool calls - )} -
- )} - {expanded && !isLive && item.result && ( -
-
- {item.result} -
-
- )} -
- ); -} - function ThinkingIndicator() { return (
@@ -285,17 +201,6 @@ export function WebChatPanel({ agentId }: WebChatPanelProps) { )} {timeline.map((item) => { - if (item.type === "worker_run") { - const live = liveState?.workers[item.id]; - return ( - - ); - } if (item.type !== "message") return null; return (
diff --git a/interface/src/hooks/useLiveContext.tsx b/interface/src/hooks/useLiveContext.tsx index 0fc58d4c8..584413e87 100644 --- a/interface/src/hooks/useLiveContext.tsx +++ b/interface/src/hooks/useLiveContext.tsx @@ -1,6 +1,6 @@ import { createContext, useContext, useCallback, useRef, useState, useMemo, type ReactNode } from "react"; import { useQuery, useQueryClient } from "@tanstack/react-query"; -import { api, type AgentMessageEvent, type ChannelInfo, type ToolStartedEvent, type ToolCompletedEvent, type WorkerStatusEvent, type TranscriptStep, type OpenCodePart, type OpenCodePartUpdatedEvent } from "@/api/client"; +import { api, type AgentMessageEvent, type ChannelInfo, type ToolStartedEvent, type ToolCompletedEvent, type TranscriptStep, type OpenCodePart, type OpenCodePartUpdatedEvent, type WorkerTextEvent } from "@/api/client"; import { generateId } from "@/lib/id"; import { useEventSource, type ConnectionState } from "@/hooks/useEventSource"; import { useChannelLiveState, type ChannelLiveState, type ActiveWorker } from "@/hooks/useChannelLiveState"; @@ -147,18 +147,9 @@ export function LiveContextProvider({ children }: { children: ReactNode }) { const wrappedWorkerStatus = useCallback((data: unknown) => { channelHandlers.worker_status(data); - const event = data as WorkerStatusEvent; - // Push status text as an action step in the live transcript - if (event.status && event.status !== "starting" && event.status !== "running") { - setLiveTranscripts((prev) => { - const steps = prev[event.worker_id] ?? []; - const step: TranscriptStep = { - type: "action", - content: [{ type: "text", text: event.status }], - }; - return { ...prev, [event.worker_id]: [...steps, step] }; - }); - } + // Status text comes from set_status tool calls which already appear as + // paired tool_started/tool_completed events in the transcript. No need + // to duplicate them as standalone text steps. bumpWorkerVersion(); }, [channelHandlers, bumpWorkerVersion]); @@ -249,6 +240,20 @@ export function LiveContextProvider({ children }: { children: ReactNode }) { bumpWorkerVersion(); }, [bumpWorkerVersion]); + // Handle worker text — model reasoning text emitted between tool calls + const handleWorkerText = useCallback((data: unknown) => { + const event = data as WorkerTextEvent; + setLiveTranscripts((prev) => { + const steps = prev[event.worker_id] ?? []; + const step: TranscriptStep = { + type: "action", + content: [{ type: "text", text: event.text }], + }; + return { ...prev, [event.worker_id]: [...steps, step] }; + }); + bumpWorkerVersion(); + }, [bumpWorkerVersion]); + // Merge channel handlers with agent message + task handlers const handlers = useMemo( () => ({ @@ -260,11 +265,12 @@ export function LiveContextProvider({ children }: { children: ReactNode }) { tool_started: wrappedToolStarted, tool_completed: wrappedToolCompleted, opencode_part_updated: handleOpenCodePartUpdated, + worker_text: handleWorkerText, agent_message_sent: handleAgentMessage, agent_message_received: handleAgentMessage, task_updated: bumpTaskVersion, }), - [channelHandlers, wrappedWorkerStarted, wrappedWorkerStatus, wrappedWorkerIdle, wrappedWorkerCompleted, wrappedToolStarted, wrappedToolCompleted, handleOpenCodePartUpdated, handleAgentMessage, bumpTaskVersion], + [channelHandlers, wrappedWorkerStarted, wrappedWorkerStatus, wrappedWorkerIdle, wrappedWorkerCompleted, wrappedToolStarted, wrappedToolCompleted, handleOpenCodePartUpdated, handleWorkerText, handleAgentMessage, bumpTaskVersion], ); const onReconnect = useCallback(() => { diff --git a/interface/src/routes/AgentWorkers.tsx b/interface/src/routes/AgentWorkers.tsx index 184bb5f87..93217cc07 100644 --- a/interface/src/routes/AgentWorkers.tsx +++ b/interface/src/routes/AgentWorkers.tsx @@ -8,9 +8,9 @@ import { type WorkerRunInfo, type WorkerDetailResponse, type TranscriptStep, - type ActionContent, type OpenCodePart, } from "@/api/client"; +import {ToolCall, pairTranscriptSteps, openCodePartToPair} from "@/components/ToolCall"; import {Badge} from "@/ui/Badge"; import {formatTimeAgo, formatDuration} from "@/lib/format"; import {LiveDuration} from "@/components/LiveDuration"; @@ -408,9 +408,13 @@ function WorkerDetail({ setActiveTab(hasOpenCodeEmbed ? "opencode" : "transcript"); }, [detail.id, hasOpenCodeEmbed]); - // Use persisted transcript if available, otherwise fall back to live SSE transcript. - // Strip the final action step if it duplicates the result text shown above. - const rawTranscript = detail.transcript ?? (isLive ? liveTranscript : null); + // For live workers, prefer the SSE-accumulated transcript when it has content + // (it's the most up-to-date). Fall back to detail.transcript which may come + // from the DB or the server-side live cache (survives page refreshes). + // For completed workers, always use the persisted DB transcript. + const rawTranscript = isLive + ? (liveTranscript && liveTranscript.length > 0 ? liveTranscript : detail.transcript ?? null) + : detail.transcript ?? null; const transcript = useMemo(() => { if (!rawTranscript || !detail.result) return rawTranscript; const last = rawTranscript[rawTranscript.length - 1]; @@ -575,22 +579,22 @@ function WorkerDetail({ {isLive && !isIdle ? "Live Transcript" : "Transcript"}
- {transcript.map((step, index) => ( + {pairTranscriptSteps(transcript).map((item, index) => ( - + {item.kind === "text" ? ( +
+ {item.text} +
+ ) : ( + + )}
))} - {isRunning && currentTool && ( -
- - Running {currentTool}... -
- )} {isIdle && (
Waiting for follow-up input... @@ -1026,20 +1030,6 @@ function TaskText({text}: {text: string}) { ); } -function TranscriptStepView({step}: {step: TranscriptStep}) { - if (step.type === "action") { - return ( -
- {step.content.map((content, index) => ( - - ))} -
- ); - } - - return ; -} - function CancelWorkerButton({ channelId, workerId, @@ -1066,82 +1056,7 @@ function CancelWorkerButton({ ); } -function ActionContentView({content}: {content: ActionContent}) { - if (content.type === "text") { - return ( -
- {content.text} -
- ); - } - - return ; -} - -function ToolCallView({ - content, -}: { - content: Extract; -}) { - const [expanded, setExpanded] = useState(false); - return ( -
- - {expanded && ( -
-					{content.args}
-				
- )} -
- ); -} - -function ToolResultView({ - step, -}: { - step: Extract; -}) { - const [expanded, setExpanded] = useState(false); - const isLong = step.text.length > 300; - const displayText = - isLong && !expanded ? step.text.slice(0, 300) + "..." : step.text; - - return ( -
-
- - {step.name && ( - - {step.name} - - )} -
-
-				{displayText}
-			
- {isLong && ( - - )} -
- ); -} // -- OpenCode-native part renderers -- @@ -1154,7 +1069,7 @@ function OpenCodePartView({part}: {part: OpenCodePart}) {
); case "tool": - return ; + return ; case "step_start": return (
@@ -1178,93 +1093,4 @@ function OpenCodePartView({part}: {part: OpenCodePart}) { } } -function OpenCodeToolPartView({ - part, -}: { - part: Extract; -}) { - const [expanded, setExpanded] = useState(false); - const isRunning = part.status === "running"; - const isCompleted = part.status === "completed"; - const isError = part.status === "error"; - - const statusIcon = isCompleted - ? "\u2713" - : isError - ? "\u2717" - : isRunning - ? "\u25B6" - : "\u25CB"; - - const statusColor = isCompleted - ? "text-emerald-500" - : isError - ? "text-red-400" - : isRunning - ? "text-accent" - : "text-ink-faint"; - - const title = - (part.status === "running" || part.status === "completed") - ? (part as any).title - : undefined; - - const input = - (part.status === "running" || part.status === "completed") - ? (part as any).input - : undefined; - - const output = part.status === "completed" ? (part as any).output : undefined; - const error = part.status === "error" ? (part as any).error : undefined; - return ( -
- - {expanded && ( -
- {input && ( -
-

Input

-
-								{input}
-							
-
- )} - {output && ( -
-

Output

-
-								{output.length > 500 ? output.slice(0, 500) + "..." : output}
-							
-
- )} - {error && ( -
-

Error

-
{error}
-
- )} -
- )} -
- ); -} diff --git a/prompts/en/fragments/worker_capabilities.md.j2 b/prompts/en/fragments/worker_capabilities.md.j2 index dfab94c2e..ccef7726d 100644 --- a/prompts/en/fragments/worker_capabilities.md.j2 +++ b/prompts/en/fragments/worker_capabilities.md.j2 @@ -18,7 +18,7 @@ When you spawn a worker, it runs independently with a task description and retur - **exec** — run subprocesses with environment control - **set_status** — update worker status visible in your status block {%- if browser_enabled %} -- **browser** — browse web pages, take screenshots, click elements, fill forms +- **browser_*** — suite of browser tools: navigate, snapshot, click, type, screenshot, press_key, evaluate, tab management {%- endif %} {%- if web_search_enabled %} - **web_search** — search the web via Brave Search API diff --git a/prompts/en/tools/browser_description.md.j2 b/prompts/en/tools/browser_description.md.j2 index 26500c354..7c3da4c5b 100644 --- a/prompts/en/tools/browser_description.md.j2 +++ b/prompts/en/tools/browser_description.md.j2 @@ -1 +1 @@ -Browser automation tool. Launch a headless Chrome browser, navigate pages, interact with elements, take screenshots, and extract page content. Workflow: launch → navigate → snapshot (get element refs) → act (click/type by ref) → screenshot. Element refs like "e1", "e2" are assigned during snapshot and used in act calls. \ No newline at end of file +Browser automation tools. Navigate pages, get ARIA snapshots with numbered element indices, click/type by index, take screenshots. Workflow: browser_navigate → browser_snapshot → browser_click/browser_type by index → browser_screenshot. For passwords and credentials, use browser_type with `secret` (not `text`) to type values from the secret store without exposing them. diff --git a/prompts/en/tools/file_description.md.j2 b/prompts/en/tools/file_description.md.j2 deleted file mode 100644 index c019f0c2a..000000000 --- a/prompts/en/tools/file_description.md.j2 +++ /dev/null @@ -1 +0,0 @@ -Perform file operations: read, write, or list files. Use this to examine code, read documentation, write files, or explore directory structures. Protected paths (prompts/, identity/, data/, SOUL.md, IDENTITY.md, USER.md) cannot be accessed - use memory_save instead for that content. \ No newline at end of file diff --git a/prompts/en/tools/file_edit_description.md.j2 b/prompts/en/tools/file_edit_description.md.j2 new file mode 100644 index 000000000..869c758ff --- /dev/null +++ b/prompts/en/tools/file_edit_description.md.j2 @@ -0,0 +1 @@ +Edit a file by replacing exact text matches. Finds old_string in the file and replaces it with new_string. The match must be exact (including whitespace and indentation). By default, requires a unique match — if multiple matches exist, provide more surrounding context or set replace_all to true. Use file_read first to verify the exact text you want to replace. \ No newline at end of file diff --git a/prompts/en/tools/file_list_description.md.j2 b/prompts/en/tools/file_list_description.md.j2 new file mode 100644 index 000000000..b6a474143 --- /dev/null +++ b/prompts/en/tools/file_list_description.md.j2 @@ -0,0 +1 @@ +List directory contents. Returns file and directory entries with names, types, and sizes. Directories are listed first, then files, both sorted alphabetically. Use this to explore the filesystem structure. \ No newline at end of file diff --git a/prompts/en/tools/file_read_description.md.j2 b/prompts/en/tools/file_read_description.md.j2 new file mode 100644 index 000000000..173e72ae0 --- /dev/null +++ b/prompts/en/tools/file_read_description.md.j2 @@ -0,0 +1 @@ +Read a file's contents. Supports line-based offset and limit for reading specific sections of large files. Returns the full file content (up to 50KB), or a windowed view when offset/limit are provided. Use this to examine source code, read documentation, or inspect file content. \ No newline at end of file diff --git a/prompts/en/tools/file_write_description.md.j2 b/prompts/en/tools/file_write_description.md.j2 new file mode 100644 index 000000000..fe02efa78 --- /dev/null +++ b/prompts/en/tools/file_write_description.md.j2 @@ -0,0 +1 @@ +Write content to a file. Creates the file if it doesn't exist, or overwrites it entirely if it does. Parent directories are created automatically. Use this for creating new files or replacing an entire file's content. For targeted edits to existing files, prefer file_edit instead. Protected paths (SOUL.md, IDENTITY.md, USER.md) cannot be written — use the identity management API for those. \ No newline at end of file diff --git a/prompts/en/tools/set_status_description.md.j2 b/prompts/en/tools/set_status_description.md.j2 index c9eaf1281..c0b609a4c 100644 --- a/prompts/en/tools/set_status_description.md.j2 +++ b/prompts/en/tools/set_status_description.md.j2 @@ -1 +1 @@ -Report the current status of your work. The status appears in the channel's status block. Keep statuses concise (1-2 sentences) and informative. Use kind "progress" (default) for intermediate updates. Use kind "outcome" when the task has reached a terminal result — you must signal an outcome before finishing. \ No newline at end of file +Report the current status of your work. The status appears in the channel's status block. Keep statuses concise (1-2 sentences) and informative. Use kind "progress" (default) for intermediate updates. Use kind "outcome" ONLY when ALL parts of the task have reached a terminal result — you must signal an outcome before finishing, but signaling prematurely (before every step is complete) is a critical error that causes the task to be reported as done when it isn't. \ No newline at end of file diff --git a/prompts/en/worker.md.j2 b/prompts/en/worker.md.j2 index 111f0b18f..366bbe81b 100644 --- a/prompts/en/worker.md.j2 +++ b/prompts/en/worker.md.j2 @@ -36,14 +36,14 @@ Sandbox mode is **disabled**. - `shell`/`exec` subprocesses run without OS-level filesystem containment. - Host filesystem access follows the OS permissions of the Spacebot process. - Environment sanitization still applies (clean env + explicit passthrough/tool vars). -- The `file` tool can access any path readable by the process (no workspace restriction). +- The file tools (`file_read`, `file_write`, `file_edit`, `file_list`) can access any path readable by the process (no workspace restriction). {%- endif %} ## Your Role Execute the task you were given. Use your tools. Report your status as you make progress. -**When the task is complete** (or you've reached a definitive result), call `set_status` with `kind: "outcome"` and a summary of what happened. Only then provide your final text response. If you try to finish without signaling an outcome, the system will send you back to keep working. +**When the task is fully complete** (or you've reached a definitive result for every part of the task), call `set_status` with `kind: "outcome"` and a summary of what happened. Only then provide your final text response. If you try to finish without signaling an outcome, the system will send you back to keep working. Do not signal an outcome prematurely — completing part of a multi-step task is not completion. ## Task @@ -69,7 +69,11 @@ Bad progress updates: - "starting" - "reading file" -**Outcome** (`kind: "outcome"`): Signal that you have reached a terminal result. You **must** call `set_status` with `kind: "outcome"` before finishing your task. Include a concise summary of the result. Examples: +**Outcome** (`kind: "outcome"`): Signal that you have reached a terminal result. You **must** call `set_status` with `kind: "outcome"` before finishing your task. Include a concise summary of the result. + +**CRITICAL: Do not signal an outcome until every step of the task is actually complete.** If the task has multiple steps, you must finish ALL of them before calling `set_status(kind: "outcome")`. Signaling an outcome after completing only some steps is a failure — it tells the channel the work is done when it isn't. Review your original task description and verify every requirement has been met before signaling. + +Examples: - `set_status(status: "Email sent to jamie@spacedrive.com", kind: "outcome")` - `set_status(status: "Build failed: 3 type errors in auth module", kind: "outcome")` @@ -79,9 +83,14 @@ Bad progress updates: Execute shell commands. Use this for running builds, tests, git operations, package management, and any system commands. -### file +### File tools (file_read, file_write, file_edit, file_list) + +Four separate tools for file operations: -Read, write, and list files. Use this for viewing source code, writing changes, and navigating the filesystem. +- `file_read` — Read a file's contents. Supports `offset` (1-indexed line number) and `limit` (line count) for reading specific sections. +- `file_write` — Write content to a file (create or overwrite). Both `path` and `content` are required. +- `file_edit` — Edit a file by replacing exact text: provide `path`, `old_string`, and `new_string`. Use `file_read` first to see the exact text. Set `replace_all: true` for multiple occurrences. +- `file_list` — List directory contents with names, types, and sizes. Path restrictions apply: you cannot write to identity files (SOUL.md, IDENTITY.md, USER.md) or memory storage paths. Use the appropriate system tools for those. @@ -89,28 +98,33 @@ Path restrictions apply: you cannot write to identity files (SOUL.md, IDENTITY.m Run a subprocess with specific arguments. Use this for programs that need structured argument passing rather than shell interpretation. -### browser +### Browser tools (browser_*) -Automate a headless Chrome browser. Use this for web scraping, testing web interfaces, filling out forms, or any task requiring browser interaction. +Automate a headless Chrome browser. Each action is a separate tool. **Workflow:** -1. `launch` — Start the browser -2. `navigate` — Go to a URL -3. `snapshot` — Get the page's accessibility tree with element refs (e1, e2, e3...) -4. `act` — Interact with elements by ref: `click`, `type`, `press_key`, `hover`, `scroll_into_view`, `focus` -5. `screenshot` — Capture the page or a specific element +1. `browser_launch` — Start the browser (or auto-launched by `browser_navigate`) +2. `browser_navigate` — Go to a URL +3. `browser_snapshot` — Get the page's ARIA accessibility tree with numbered element indices +4. `browser_click` — Click an element: `{"index": 5}` or `{"selector": "#my-button"}` +5. `browser_type` — Type into an input: `{"index": 3, "text": "hello"}` or `{"selector": "#login_field", "text": "hello"}` +6. `browser_screenshot` — Capture the page {%- if browser_persist_session %} -6. `close` — Detach from the browser when done (tabs and session are preserved for the next worker) +7. `browser_close` — Detach from the browser when done (tabs and session preserved) {%- else %} -6. `close` — Shut down the browser when done +7. `browser_close` — Shut down the browser when done {%- endif %} -**Multi-tab support:** Use `open` to create new tabs, `tabs` to list them, `focus` to switch between them, `close_tab` to close one. +**Targeting elements:** `browser_click` and `browser_type` accept either `index` (from `browser_snapshot`) or `selector` (a CSS selector like `"#login_field"` or `"button.submit"`). Use `index` when you have a fresh snapshot; use `selector` when you know the element's ID or a stable CSS path — it's more reliable on dynamic pages. + +**Element indices** are assigned during `browser_snapshot` and look like `[index=0]`, `[index=1]`. Always snapshot before using index-based interaction — indices reset on each snapshot. + +**Passwords and credentials:** `browser_type` accepts a `secret` parameter for secure text entry. When a task requires entering a password, token, or API key, use `secret` instead of `text`: `{"index": 2, "secret": "GH_PASSWORD"}`. The secret is resolved from the secret store and typed into the field — the actual value never appears in tool arguments or output. NEVER put credentials in the `text` parameter or resolve them via `shell` (`echo $PASSWORD`). If the task references an environment variable like `$GH_PASSWORD`, pass that name directly to the `secret` parameter. -**Element refs** are assigned during `snapshot` and look like "e1", "e2". Always snapshot before interacting — refs reset on each snapshot or navigation. +**Other tools:** `browser_press_key` (Enter, Tab, Escape), `browser_evaluate` (run JS on the page), `browser_tab_open`, `browser_tab_list`, `browser_tab_close`. -**Additional actions:** `content` (get page HTML), `evaluate` (run JavaScript, if enabled in config). +**Prefer index or selector over evaluate:** Use `browser_snapshot` + `browser_click`/`browser_type` to interact with elements. Fall back to `browser_evaluate` for reading page data or when the other tools don't work for a specific page. ### secret_set @@ -135,7 +149,7 @@ Do not log or echo the secret value after storing it. 1. Do the work. Don't describe what you would do — use the tools and do it. 2. Update your status at meaningful checkpoints. The channel is using your status to keep the user informed. 3. If a tool call fails, try to recover. Read the error, adjust, and retry. Don't give up on the first failure. -4. **Signal your outcome.** When the task is done, call `set_status(kind: "outcome")` with a summary of the result before providing your final text. You cannot finish without this — the system will reject premature exits. +4. **Signal your outcome — only when truly done.** When ALL steps of the task are complete, call `set_status(kind: "outcome")` with a summary of the result before providing your final text. You cannot finish without this — the system will reject premature exits. Conversely, do NOT signal an outcome if there are remaining steps. Completing 2 out of 7 steps is not done. Clicking a button and not waiting for the result is not done. If the task says "do X, Y, and Z", you must do X AND Y AND Z before signaling. 5. Stay focused on the task. Don't explore tangential work unless it's necessary to complete what you were asked to do. 6. If you receive follow-up messages (interactive mode), treat them as additional instructions building on your existing context. {% if tool_secret_names %} diff --git a/src/agent/channel_dispatch.rs b/src/agent/channel_dispatch.rs index cfd682aec..f6097098b 100644 --- a/src/agent/channel_dispatch.rs +++ b/src/agent/channel_dispatch.rs @@ -419,7 +419,6 @@ pub async fn spawn_worker_from_state( "worker.run", worker_id = %worker_id, channel_id = %state.channel_id, - task = %task, ); let secrets_store = state.deps.runtime_config.secrets.load().as_ref().clone(); let handle = spawn_worker_task( @@ -550,7 +549,6 @@ pub async fn spawn_opencode_worker_from_state( "worker.run", worker_id = %worker_id, channel_id = %state.channel_id, - task = %task, worker_type = "opencode", ); let sqlite_pool = state.deps.sqlite_pool.clone(); @@ -802,7 +800,6 @@ pub async fn resume_idle_worker_into_state( "worker.resume", worker_id = %worker_id, channel_id = %state.channel_id, - task = %idle_worker.task, worker_type = "opencode", ); let sqlite_pool = state.deps.sqlite_pool.clone(); @@ -926,7 +923,6 @@ pub async fn resume_idle_worker_into_state( "worker.resume", worker_id = %worker_id, channel_id = %state.channel_id, - task = %idle_worker.task, ); let secrets_store = state.deps.runtime_config.secrets.load().as_ref().clone(); let handle = spawn_worker_task( diff --git a/src/agent/channel_history.rs b/src/agent/channel_history.rs index 27c455449..f56a9624f 100644 --- a/src/agent/channel_history.rs +++ b/src/agent/channel_history.rs @@ -422,7 +422,8 @@ pub(crate) fn event_is_for_channel(event: &ProcessEvent, channel_id: &ChannelId) } => event_channel.as_ref() == Some(channel_id), ProcessEvent::OpenCodePartUpdated { .. } | ProcessEvent::StatusUpdate { .. } - | ProcessEvent::TaskUpdated { .. } => false, + | ProcessEvent::TaskUpdated { .. } + | ProcessEvent::WorkerText { .. } => false, } } diff --git a/src/agent/cortex.rs b/src/agent/cortex.rs index 6d7342361..7e44d6fa6 100644 --- a/src/agent/cortex.rs +++ b/src/agent/cortex.rs @@ -1257,7 +1257,8 @@ fn signal_from_event(event: ProcessEvent) -> Option { // UI-only events — no cortex signal needed. ProcessEvent::OpenCodeSessionCreated { .. } | ProcessEvent::OpenCodePartUpdated { .. } - | ProcessEvent::WorkerInitialResult { .. } => return None, + | ProcessEvent::WorkerInitialResult { .. } + | ProcessEvent::WorkerText { .. } => return None, }) } diff --git a/src/agent/worker.rs b/src/agent/worker.rs index b0f40706d..acf4af1de 100644 --- a/src/agent/worker.rs +++ b/src/agent/worker.rs @@ -303,7 +303,7 @@ impl Worker { let mut segments_run = 0; let mut overflow_retries = 0; - let result = if resuming { + let mut result = if resuming { // For resumed workers, synthesize a "result" from the task // since the original initial result was already relayed. String::new() @@ -336,6 +336,7 @@ impl Worker { }); } + self.persist_transcript(&compacted_history, &history).await; self.maybe_compact_history(&mut compacted_history, &mut history) .await; prompt = @@ -395,6 +396,34 @@ impl Worker { } }; + // Safety net: if the worker produced an empty result (e.g. reasoning-only + // response that slipped past the nudge gate), treat it as a failure — unless + // the worker already signaled a meaningful outcome via set_status. A worker + // that signaled outcome but ran out of turns still completed its task; the + // outcome status text is the result. + if !resuming && result.trim().is_empty() { + if self.hook.outcome_signaled() { + tracing::info!( + worker_id = %self.id, + "worker produced empty text but outcome was signaled, treating as success" + ); + // Use a synthetic result — the channel already received the + // outcome status via the event stream, so this is just for the + // worker result record. + result = "Task completed (outcome signaled via set_status).".to_string(); + } else { + self.state = WorkerState::Failed; + self.hook.send_status("failed (empty result)"); + self.write_failure_log(&history, "worker produced empty result — likely a reasoning-only exit that bypassed the outcome gate"); + self.persist_transcript(&compacted_history, &history).await; + tracing::error!(worker_id = %self.id, "worker produced empty result, treating as failure"); + return Err(crate::error::AgentError::Other(anyhow::anyhow!( + "worker produced empty result without signaling a meaningful outcome" + )) + .into()); + } + } + // For interactive workers, enter a follow-up loop let mut follow_up_failure: Option = None; if let Some(mut input_rx) = self.input_rx.take() { diff --git a/src/api/state.rs b/src/api/state.rs index d8236c03d..77ec725b1 100644 --- a/src/api/state.rs +++ b/src/api/state.rs @@ -4,6 +4,7 @@ use crate::agent::channel::ChannelState; use crate::agent::cortex_chat::CortexChatSession; use crate::agent::status::StatusBlock; use crate::config::{Binding, DefaultsConfig, DiscordPermissions, RuntimeConfig, SlackPermissions}; +use crate::conversation::worker_transcript::{ActionContent, TranscriptStep}; use crate::cron::{CronStore, Scheduler}; use crate::llm::LlmManager; use crate::mcp::McpManager; @@ -118,6 +119,11 @@ pub struct ApiState { pub agent_groups: ArcSwap>, /// Org-level humans for the topology UI. pub agent_humans: ArcSwap>, + /// Live transcript cache for running workers. Accumulates `TranscriptStep`s + /// from `ToolStarted`/`ToolCompleted` events so that page refreshes can + /// recover the transcript without waiting for the worker to complete. + /// Keyed by worker_id, cleared on worker completion. + pub live_worker_transcripts: Arc>>>, } /// Events sent to SSE clients. Wraps ProcessEvents with agent context. @@ -243,6 +249,12 @@ pub enum ApiEvent { worker_id: String, part: crate::opencode::types::OpenCodePart, }, + /// A worker emitted text content (model reasoning between tool calls). + WorkerText { + agent_id: String, + worker_id: String, + text: String, + }, } impl ApiState { @@ -295,6 +307,7 @@ impl ApiState { agent_links: ArcSwap::from_pointee(Vec::new()), agent_groups: ArcSwap::from_pointee(Vec::new()), agent_humans: ArcSwap::from_pointee(Vec::new()), + live_worker_transcripts: Arc::new(RwLock::new(HashMap::new())), } } @@ -325,6 +338,16 @@ impl ApiState { self.channel_states.write().await.remove(channel_id); } + /// Retrieve the live transcript cache for a running worker. + /// + /// Returns `Some` with the accumulated transcript steps if the worker is + /// currently running and has emitted tool calls. Returns `None` if no + /// cached transcript exists (worker completed or never started). + pub async fn get_live_transcript(&self, worker_id: &str) -> Option> { + let guard = self.live_worker_transcripts.read().await; + guard.get(worker_id).cloned() + } + /// Register an agent's event stream. Spawns a task that forwards /// ProcessEvents into the aggregated API event stream. pub fn register_agent_events( @@ -333,6 +356,7 @@ impl ApiState { mut agent_event_rx: broadcast::Receiver, ) { let api_tx = self.event_tx.clone(); + let live_transcripts = self.live_worker_transcripts.clone(); tokio::spawn(async move { loop { match agent_event_rx.recv().await { @@ -347,6 +371,10 @@ impl ApiState { interactive, .. } => { + live_transcripts + .write() + .await + .insert(worker_id.to_string(), Vec::new()); api_tx .send(ApiEvent::WorkerStarted { agent_id: agent_id.clone(), @@ -408,6 +436,10 @@ impl ApiState { success, .. } => { + live_transcripts + .write() + .await + .remove(&worker_id.to_string()); api_tx .send(ApiEvent::WorkerCompleted { agent_id: agent_id.clone(), @@ -441,6 +473,21 @@ impl ApiState { .. } => { let (process_type, id_str) = process_id_info(process_id); + // Accumulate tool call into live transcript for workers. + if let ProcessId::Worker(worker_id) = process_id { + let call_id = format!("live_{}", uuid::Uuid::new_v4()); + let step = TranscriptStep::Action { + content: vec![ActionContent::ToolCall { + id: call_id, + name: tool_name.clone(), + args: args.clone(), + }], + }; + let mut guard = live_transcripts.write().await; + if let Some(steps) = guard.get_mut(&worker_id.to_string()) { + steps.push(step); + } + } api_tx .send(ApiEvent::ToolStarted { agent_id: agent_id.clone(), @@ -460,6 +507,18 @@ impl ApiState { .. } => { let (process_type, id_str) = process_id_info(process_id); + // Accumulate tool result into live transcript for workers. + if let ProcessId::Worker(worker_id) = process_id { + let step = TranscriptStep::ToolResult { + call_id: String::new(), + name: tool_name.clone(), + text: result.clone(), + }; + let mut guard = live_transcripts.write().await; + if let Some(steps) = guard.get_mut(&worker_id.to_string()) { + steps.push(step); + } + } api_tx .send(ApiEvent::ToolCompleted { agent_id: agent_id.clone(), @@ -544,6 +603,27 @@ impl ApiState { }) .ok(); } + ProcessEvent::WorkerText { + worker_id, text, .. + } => { + // Accumulate into live transcript cache + let step = TranscriptStep::Action { + content: vec![ActionContent::Text { text: text.clone() }], + }; + let mut guard = live_transcripts.write().await; + if let Some(steps) = guard.get_mut(&worker_id.to_string()) { + steps.push(step); + } + drop(guard); + + api_tx + .send(ApiEvent::WorkerText { + agent_id: agent_id.clone(), + worker_id: worker_id.to_string(), + text: text.clone(), + }) + .ok(); + } _ => {} } } diff --git a/src/api/system.rs b/src/api/system.rs index 2891caa9e..4fe9d27f6 100644 --- a/src/api/system.rs +++ b/src/api/system.rs @@ -99,6 +99,7 @@ pub(super) async fn events_sse( ApiEvent::AgentMessageReceived { .. } => "agent_message_received", ApiEvent::TaskUpdated { .. } => "task_updated", ApiEvent::OpenCodePartUpdated { .. } => "opencode_part_updated", + ApiEvent::WorkerText { .. } => "worker_text", }; yield Ok(axum::response::sse::Event::default() .event(event_type) diff --git a/src/api/workers.rs b/src/api/workers.rs index dff7a4f09..aecd65df9 100644 --- a/src/api/workers.rs +++ b/src/api/workers.rs @@ -170,13 +170,18 @@ pub(super) async fn worker_detail( })? .ok_or(StatusCode::NOT_FOUND)?; - let transcript = detail.transcript_blob.as_deref().and_then(|blob| { - worker_transcript::deserialize_transcript(blob) + let transcript = match detail.transcript_blob.as_deref() { + Some(blob) => worker_transcript::deserialize_transcript(blob) .map_err(|error| { tracing::warn!(%error, worker_id = %query.worker_id, "failed to decompress transcript"); }) - .ok() - }); + .ok(), + None => { + // No persisted transcript yet — check the live transcript cache + // so page refreshes can recover in-progress worker transcripts. + state.get_live_transcript(&query.worker_id).await + } + }; Ok(Json(WorkerDetailResponse { id: detail.id, diff --git a/src/hooks.rs b/src/hooks.rs index 6ae37cccb..c1ef0c45f 100644 --- a/src/hooks.rs +++ b/src/hooks.rs @@ -1,7 +1,9 @@ //! Prompt hooks for observing and controlling agent behavior. pub mod cortex; +pub mod loop_guard; pub mod spacebot; pub use cortex::CortexHook; +pub use loop_guard::{LoopGuard, LoopGuardConfig, LoopGuardVerdict}; pub use spacebot::{SpacebotHook, ToolNudgePolicy}; diff --git a/src/hooks/loop_guard.rs b/src/hooks/loop_guard.rs new file mode 100644 index 000000000..7d37e4294 --- /dev/null +++ b/src/hooks/loop_guard.rs @@ -0,0 +1,645 @@ +//! Tool loop detection for the agent execution loop. + +use crate::ProcessType; + +use sha2::{Digest, Sha256}; +use std::collections::{HashMap, HashSet, VecDeque}; + +const POLL_TOOLS: &[&str] = &["shell"]; +const HISTORY_CAPACITY: usize = 30; + +/// Avoids hashing multi-MB tool outputs while still catching identical short +/// results. Large outputs that differ only in the tail (growing log files, +/// etc.) won't hash-match, which is correct — the tool is returning new data. +const RESULT_HASH_TRUNCATION: usize = 1_000; + +#[derive(Debug, Clone)] +pub struct LoopGuardConfig { + pub warn_threshold: u32, + pub block_threshold: u32, + pub global_circuit_breaker: u32, + pub poll_multiplier: u32, + pub outcome_warn_threshold: u32, + pub outcome_block_threshold: u32, + pub ping_pong_min_repeats: u32, + pub max_warnings_per_call: u32, +} + +impl LoopGuardConfig { + pub fn for_process(process_type: ProcessType) -> Self { + match process_type { + // Workers do repetitive tool work (build, test, fix cycles). + // Generous thresholds to avoid interfering with legitimate iteration. + ProcessType::Worker => Self { + warn_threshold: 4, + block_threshold: 7, + global_circuit_breaker: 80, + poll_multiplier: 3, + outcome_warn_threshold: 3, + outcome_block_threshold: 4, + ping_pong_min_repeats: 4, + max_warnings_per_call: 3, + }, + // Branches think and recall, moderate iteration expected. + ProcessType::Branch => Self { + warn_threshold: 3, + block_threshold: 5, + global_circuit_breaker: 40, + poll_multiplier: 2, + outcome_warn_threshold: 2, + outcome_block_threshold: 3, + ping_pong_min_repeats: 3, + max_warnings_per_call: 2, + }, + // Channels, compactors, cortex: minimal tool loops expected. + ProcessType::Channel | ProcessType::Compactor | ProcessType::Cortex => Self { + warn_threshold: 2, + block_threshold: 4, + global_circuit_breaker: 20, + poll_multiplier: 2, + outcome_warn_threshold: 2, + outcome_block_threshold: 3, + ping_pong_min_repeats: 3, + max_warnings_per_call: 2, + }, + } + } +} + +/// Maps to Rig hook actions: `Allow` -> `Continue`, `Block` -> `Skip` (message +/// becomes the tool result the LLM sees), `CircuitBreak` -> `Terminate`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum LoopGuardVerdict { + Allow, + Block(String), + CircuitBreak(String), +} + +/// Held behind `Arc>` on `SpacebotHook`. Persists across tool calls +/// within one Rig `agent.prompt()` invocation, reset at prompt boundaries. +pub struct LoopGuard { + config: LoopGuardConfig, + call_counts: HashMap, + total_calls: u32, + outcome_counts: HashMap, + // Call hashes poisoned by outcome detection — next check() auto-blocks. + blocked_outcomes: HashSet, + recent_calls: VecDeque, + warnings_emitted: HashMap, + hash_to_tool: HashMap, +} + +impl LoopGuard { + pub fn new(config: LoopGuardConfig) -> Self { + Self { + config, + call_counts: HashMap::new(), + total_calls: 0, + outcome_counts: HashMap::new(), + blocked_outcomes: HashSet::new(), + recent_calls: VecDeque::with_capacity(HISTORY_CAPACITY), + warnings_emitted: HashMap::new(), + hash_to_tool: HashMap::new(), + } + } + + pub fn reset(&mut self) { + self.call_counts.clear(); + self.total_calls = 0; + self.outcome_counts.clear(); + self.blocked_outcomes.clear(); + self.recent_calls.clear(); + self.warnings_emitted.clear(); + self.hash_to_tool.clear(); + } + + pub fn check(&mut self, tool_name: &str, args: &str) -> LoopGuardVerdict { + self.total_calls += 1; + + if self.total_calls > self.config.global_circuit_breaker { + return LoopGuardVerdict::CircuitBreak(format!( + "Circuit breaker: exceeded {} total tool calls in this loop. \ + The agent appears to be stuck.", + self.config.global_circuit_breaker + )); + } + + let call_hash = Self::compute_call_hash(tool_name, args); + self.hash_to_tool + .entry(call_hash.clone()) + .or_insert_with(|| tool_name.to_string()); + + if self.recent_calls.len() >= HISTORY_CAPACITY { + self.recent_calls.pop_front(); + } + self.recent_calls.push_back(call_hash.clone()); + + if self.blocked_outcomes.contains(&call_hash) { + return LoopGuardVerdict::Block(format!( + "Blocked: tool '{}' is returning identical results repeatedly. \ + The current approach is not working — try something different.", + tool_name + )); + } + + let count = self.call_counts.entry(call_hash.clone()).or_insert(0); + *count += 1; + let count_value = *count; + + let is_poll = Self::is_poll_call(tool_name, args); + let multiplier = if is_poll { + self.config.poll_multiplier + } else { + 1 + }; + let effective_warn = self.config.warn_threshold * multiplier; + let effective_block = self.config.block_threshold * multiplier; + + if count_value >= effective_block { + return LoopGuardVerdict::Block(format!( + "Blocked: tool '{}' called {} times with identical parameters. \ + Try a different approach or different parameters.", + tool_name, count_value + )); + } + + // Warn threshold — escalates to block after max_warnings_per_call. + if count_value >= effective_warn { + let warning_count = self.warnings_emitted.entry(call_hash.clone()).or_insert(0); + *warning_count += 1; + if *warning_count > self.config.max_warnings_per_call { + return LoopGuardVerdict::Block(format!( + "Blocked: tool '{}' called {} times with identical parameters \ + (warnings exhausted). Try a different approach.", + tool_name, count_value + )); + } + return LoopGuardVerdict::Block(format!( + "Warning: tool '{}' has been called {} times with identical parameters. \ + Consider trying a different approach or different parameters.", + tool_name, count_value + )); + } + + // Ping-pong detection runs even if individual hash counts are low. + if let Some(ping_pong_message) = self.detect_ping_pong() { + let repeats = self.count_ping_pong_repeats(); + if repeats >= self.config.ping_pong_min_repeats { + return LoopGuardVerdict::Block(ping_pong_message); + } + // Below min_repeats, send a warning via skip. + let warning_count = self + .warnings_emitted + .entry(format!("pingpong_{call_hash}")) + .or_insert(0); + *warning_count += 1; + if *warning_count <= self.config.max_warnings_per_call { + return LoopGuardVerdict::Block(ping_pong_message); + } + } + + LoopGuardVerdict::Allow + } + + /// If the same `(tool_name, args, result)` triple is seen enough times, + /// the call hash is poisoned so the next `check()` blocks before execution. + pub fn record_outcome(&mut self, tool_name: &str, args: &str, result: &str) { + let outcome_hash = Self::compute_outcome_hash(tool_name, args, result); + let call_hash = Self::compute_call_hash(tool_name, args); + + let count = self.outcome_counts.entry(outcome_hash).or_insert(0); + *count += 1; + let count_value = *count; + + if count_value >= self.config.outcome_block_threshold { + // Poison the call hash so the next check() auto-blocks. + self.blocked_outcomes.insert(call_hash); + tracing::debug!( + tool_name, + outcome_count = count_value, + "loop guard: outcome block threshold reached, poisoning call hash" + ); + } else if count_value >= self.config.outcome_warn_threshold { + tracing::debug!( + tool_name, + outcome_count = count_value, + "loop guard: identical outcome detected" + ); + } + } + + fn detect_ping_pong(&self) -> Option { + let history: Vec<&String> = self.recent_calls.iter().collect(); + let length = history.len(); + + // Check for pattern of length 2 (A-B-A-B-A-B) — need 6 entries. + if length >= 6 { + let tail = &history[length - 6..]; + let a = tail[0]; + let b = tail[1]; + if a != b && tail[2] == a && tail[3] == b && tail[4] == a && tail[5] == b { + let tool_a = self.resolve_tool_name(a); + let tool_b = self.resolve_tool_name(b); + return Some(format!( + "Ping-pong detected: tools '{}' and '{}' are alternating \ + repeatedly. Break the cycle by trying a different approach.", + tool_a, tool_b + )); + } + } + + // Check for pattern of length 3 (A-B-C-A-B-C-A-B-C) — need 9 entries. + if length >= 9 { + let tail = &history[length - 9..]; + let a = tail[0]; + let b = tail[1]; + let c = tail[2]; + // Ensure they're not all the same (that's just repetition). + if !(a == b && b == c) + && tail[3] == a + && tail[4] == b + && tail[5] == c + && tail[6] == a + && tail[7] == b + && tail[8] == c + { + let tool_a = self.resolve_tool_name(a); + let tool_b = self.resolve_tool_name(b); + let tool_c = self.resolve_tool_name(c); + return Some(format!( + "Ping-pong detected: tools '{}', '{}', '{}' are cycling \ + repeatedly. Break the cycle by trying a different approach.", + tool_a, tool_b, tool_c + )); + } + } + + None + } + + fn count_ping_pong_repeats(&self) -> u32 { + let history: Vec<&String> = self.recent_calls.iter().collect(); + let length = history.len(); + + if length >= 4 { + let a = history[length - 2]; + let b = history[length - 1]; + if a != b { + let mut repeats: u32 = 0; + let mut index = length; + while index >= 2 { + index -= 2; + if history[index] == a && history[index + 1] == b { + repeats += 1; + } else { + break; + } + } + if repeats >= 2 { + return repeats; + } + } + } + + if length >= 6 { + let a = history[length - 3]; + let b = history[length - 2]; + let c = history[length - 1]; + if !(a == b && b == c) { + let mut repeats: u32 = 0; + let mut index = length; + while index >= 3 { + index -= 3; + if history[index] == a && history[index + 1] == b && history[index + 2] == c { + repeats += 1; + } else { + break; + } + } + if repeats >= 2 { + return repeats; + } + } + } + + 0 + } + + fn resolve_tool_name(&self, hash: &str) -> String { + self.hash_to_tool + .get(hash) + .cloned() + .unwrap_or_else(|| "unknown".to_string()) + } + + // Poll tools get relaxed thresholds because they're expected to be + // called repeatedly (checking build output, watching deployment status). + fn is_poll_call(tool_name: &str, args: &str) -> bool { + if POLL_TOOLS.contains(&tool_name) { + let args_lower = args.to_lowercase(); + if args.len() < 200 + && (args_lower.contains("status") + || args_lower.contains("poll") + || args_lower.contains("wait") + || args_lower.contains("watch") + || args_lower.contains("tail") + || args_lower.contains("\"ps ") + || args_lower.contains("jobs") + || args_lower.contains("pgrep") + || args_lower.contains("docker ps") + || args_lower.contains("kubectl get")) + { + return true; + } + } + false + } + + fn compute_call_hash(tool_name: &str, args: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(tool_name.as_bytes()); + hasher.update(b"|"); + hasher.update(args.as_bytes()); + hex::encode(hasher.finalize()) + } + + fn compute_outcome_hash(tool_name: &str, args: &str, result: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(tool_name.as_bytes()); + hasher.update(b"|"); + hasher.update(args.as_bytes()); + hasher.update(b"|"); + let truncated = if result.len() > RESULT_HASH_TRUNCATION { + &result[..RESULT_HASH_TRUNCATION] + } else { + result + }; + hasher.update(truncated.as_bytes()); + hex::encode(hasher.finalize()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn worker_config() -> LoopGuardConfig { + LoopGuardConfig::for_process(ProcessType::Worker) + } + + fn channel_config() -> LoopGuardConfig { + LoopGuardConfig::for_process(ProcessType::Channel) + } + + #[test] + fn allow_below_threshold() { + let mut guard = LoopGuard::new(worker_config()); + let verdict = guard.check("shell", r#"{"command":"ls"}"#); + assert_eq!(verdict, LoopGuardVerdict::Allow); + let verdict = guard.check("shell", r#"{"command":"ls"}"#); + assert_eq!(verdict, LoopGuardVerdict::Allow); + } + + #[test] + fn block_at_warn_threshold() { + let mut guard = LoopGuard::new(worker_config()); + let args = r#"{"command":"cargo build"}"#; + // Worker warn_threshold = 4, so calls 1-3 are Allow. + for _ in 0..3 { + assert_eq!(guard.check("shell", args), LoopGuardVerdict::Allow); + } + // Call 4 hits warn threshold — delivered as a Block/Skip with warning. + let verdict = guard.check("shell", args); + assert!( + matches!(verdict, LoopGuardVerdict::Block(ref message) if message.contains("Warning")) + ); + } + + #[test] + fn block_at_block_threshold() { + let mut guard = LoopGuard::new(worker_config()); + let args = r#"{"command":"cargo build"}"#; + // Worker block_threshold = 7. + for _ in 0..6 { + guard.check("shell", args); + } + let verdict = guard.check("shell", args); + assert!( + matches!(verdict, LoopGuardVerdict::Block(ref message) if message.contains("Blocked")) + ); + } + + #[test] + fn different_params_no_collision() { + let mut guard = LoopGuard::new(worker_config()); + for i in 0..20 { + let args = format!(r#"{{"query":"query_{i}"}}"#); + assert_eq!(guard.check("web_search", &args), LoopGuardVerdict::Allow); + } + } + + #[test] + fn global_circuit_breaker() { + let config = LoopGuardConfig { + global_circuit_breaker: 5, + warn_threshold: 100, + block_threshold: 100, + ..worker_config() + }; + let mut guard = LoopGuard::new(config); + for i in 0..5 { + let args = format!(r#"{{"n":{i}}}"#); + assert_eq!(guard.check("tool", &args), LoopGuardVerdict::Allow); + } + // Call 6 triggers circuit breaker (> 5). + let verdict = guard.check("tool", r#"{"n":5}"#); + assert!(matches!(verdict, LoopGuardVerdict::CircuitBreak(_))); + } + + #[test] + fn channel_has_stricter_thresholds_than_worker() { + let channel = channel_config(); + let worker = worker_config(); + assert!(channel.warn_threshold < worker.warn_threshold); + assert!(channel.block_threshold < worker.block_threshold); + assert!(channel.global_circuit_breaker < worker.global_circuit_breaker); + } + + #[test] + fn outcome_repetition_poisons_call_hash() { + let mut guard = LoopGuard::new(worker_config()); + let args = r#"{"query":"weather"}"#; + let result = "sunny 72F"; + + // Worker outcome_block_threshold = 4. Record 4 identical outcomes. + for _ in 0..4 { + guard.record_outcome("web_search", args, result); + } + + // The next check() for this call hash should auto-block. + let verdict = guard.check("web_search", args); + assert!( + matches!(verdict, LoopGuardVerdict::Block(ref message) if message.contains("identical results")) + ); + } + + #[test] + fn different_results_do_not_poison() { + let mut guard = LoopGuard::new(worker_config()); + let args = r#"{"command":"cargo build"}"#; + + // Each call produces a different result — the agent is making progress. + for i in 0..10 { + guard.record_outcome("shell", args, &format!("error on line {i}")); + } + + // Call hash should not be poisoned. + let verdict = guard.check("shell", args); + assert_eq!(verdict, LoopGuardVerdict::Allow); + } + + #[test] + fn ping_pong_ab_detection() { + let config = LoopGuardConfig { + warn_threshold: 100, + block_threshold: 100, + ping_pong_min_repeats: 3, + ..worker_config() + }; + let mut guard = LoopGuard::new(config); + let args_a = r#"{"file":"a.txt"}"#; + let args_b = r#"{"file":"b.txt"}"#; + + // A-B-A-B-A-B = 3 repeats of (A,B). + for _ in 0..3 { + guard.check("file", args_a); + guard.check("file", args_b); + } + + // On the 7th call, pattern should be detected. + let verdict = guard.check("file", args_a); + assert!( + matches!(verdict, LoopGuardVerdict::Block(ref message) if message.contains("Ping-pong")) + || matches!(verdict, LoopGuardVerdict::Allow), + "Expected ping-pong detection or allow (pattern detected on prior call), got: {verdict:?}" + ); + } + + #[test] + fn ping_pong_abc_detection() { + let config = LoopGuardConfig { + warn_threshold: 100, + block_threshold: 100, + ping_pong_min_repeats: 3, + ..worker_config() + }; + let mut guard = LoopGuard::new(config); + + // A-B-C-A-B-C-A-B-C = 3 repeats of (A,B,C). + for _ in 0..3 { + guard.check("tool_a", r#"{"a":1}"#); + guard.check("tool_b", r#"{"b":2}"#); + guard.check("tool_c", r#"{"c":3}"#); + } + + // The 10th call should detect the pattern. + let verdict = guard.check("tool_a", r#"{"a":1}"#); + assert!( + matches!(verdict, LoopGuardVerdict::Block(ref message) if message.contains("Ping-pong")), + "Expected ping-pong detection, got: {verdict:?}" + ); + } + + #[test] + fn no_false_ping_pong() { + let mut guard = LoopGuard::new(worker_config()); + for i in 0..10 { + let args = format!(r#"{{"n":{i}}}"#); + let verdict = guard.check("tool", &args); + assert_eq!(verdict, LoopGuardVerdict::Allow); + } + } + + #[test] + fn poll_tool_relaxed_thresholds() { + let mut guard = LoopGuard::new(worker_config()); + // Worker: warn=4, poll_multiplier=3, so effective warn=12. + let args = r#"{"command":"docker ps --status running"}"#; + + // Calls 1-11 should all be Allow (below effective warn=12). + for i in 1..=11 { + let verdict = guard.check("shell", args); + assert_eq!( + verdict, + LoopGuardVerdict::Allow, + "Call {i} should be allowed for poll tool" + ); + } + + // Call 12 should trigger a warning. + let verdict = guard.check("shell", args); + assert!( + matches!(verdict, LoopGuardVerdict::Block(ref message) if message.contains("Warning")), + "Expected warning at poll threshold, got: {verdict:?}" + ); + } + + #[test] + fn non_poll_shell_not_relaxed() { + let mut guard = LoopGuard::new(worker_config()); + let args = r#"{"command":"echo hello"}"#; + + // Worker: warn=4, no poll multiplier for non-poll commands. + for _ in 0..3 { + assert_eq!(guard.check("shell", args), LoopGuardVerdict::Allow); + } + let verdict = guard.check("shell", args); + assert!(matches!(verdict, LoopGuardVerdict::Block(_))); + } + + #[test] + fn reset_clears_all_state() { + let mut guard = LoopGuard::new(worker_config()); + let args = r#"{"command":"ls"}"#; + + for _ in 0..3 { + guard.check("shell", args); + } + guard.record_outcome("shell", args, "file1 file2"); + + guard.reset(); + + // After reset, the first call should be allowed again. + assert_eq!(guard.check("shell", args), LoopGuardVerdict::Allow); + assert_eq!(guard.total_calls, 1); + } + + #[test] + fn warning_bucket_escalates_to_block() { + let config = LoopGuardConfig { + warn_threshold: 2, + block_threshold: 100, + max_warnings_per_call: 2, + ..worker_config() + }; + let mut guard = LoopGuard::new(config); + let args = r#"{"x":1}"#; + + // Call 1: Allow. + assert_eq!(guard.check("tool", args), LoopGuardVerdict::Allow); + + // Call 2: Block with warning (hits warn_threshold=2). + let verdict = guard.check("tool", args); + assert!(matches!(verdict, LoopGuardVerdict::Block(ref m) if m.contains("Warning"))); + + // Call 3: Block with warning again. + let verdict = guard.check("tool", args); + assert!(matches!(verdict, LoopGuardVerdict::Block(ref m) if m.contains("Warning"))); + + // Call 4: Block with "warnings exhausted" (max_warnings_per_call=2 exceeded). + let verdict = guard.check("tool", args); + assert!( + matches!(verdict, LoopGuardVerdict::Block(ref m) if m.contains("warnings exhausted")) + ); + } +} diff --git a/src/hooks/spacebot.rs b/src/hooks/spacebot.rs index 9cc7bace9..9232bb99e 100644 --- a/src/hooks/spacebot.rs +++ b/src/hooks/spacebot.rs @@ -1,5 +1,6 @@ //! SpacebotHook: Prompt hook for channels, branches, and workers. +use crate::hooks::loop_guard::{LoopGuard, LoopGuardConfig, LoopGuardVerdict}; use crate::{AgentId, ChannelId, ProcessEvent, ProcessId, ProcessType}; use rig::agent::{HookAction, PromptHook, ToolCallHookAction}; use rig::completion::{CompletionModel, CompletionResponse, Message, Prompt, PromptError}; @@ -41,6 +42,13 @@ pub struct SpacebotHook { /// Once signaled, the nudge system allows text-only responses to pass /// through as legitimate completions. outcome_signaled: std::sync::Arc, + /// Counts consecutive text-only nudge attempts. Reset to zero whenever a + /// tool call completes successfully, so the budget tracks *consecutive* + /// text-only responses rather than total across the worker's lifetime. + nudge_attempts: std::sync::Arc, + /// Detects repetitive tool calling patterns (identical calls, identical + /// outcomes, ping-pong cycles) and blocks them before execution. + loop_guard: std::sync::Arc>, } impl SpacebotHook { @@ -61,6 +69,7 @@ impl SpacebotHook { channel_id: Option, event_tx: broadcast::Sender, ) -> Self { + let loop_guard_config = LoopGuardConfig::for_process(process_type); Self { agent_id, process_id, @@ -71,6 +80,10 @@ impl SpacebotHook { completion_calls: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)), nudge_request_active: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)), outcome_signaled: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)), + nudge_attempts: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)), + loop_guard: std::sync::Arc::new(std::sync::Mutex::new(LoopGuard::new( + loop_guard_config, + ))), } } @@ -85,12 +98,23 @@ impl SpacebotHook { self.tool_nudge_policy } - /// Reset per-prompt state (tool nudging and outcome tracking). + /// Returns `true` if the worker has called `set_status` with `kind: "outcome"`. + pub fn outcome_signaled(&self) -> bool { + self.outcome_signaled + .load(std::sync::atomic::Ordering::Relaxed) + } + + /// Reset per-prompt state (tool nudging, outcome tracking, and loop guard). pub fn reset_tool_nudge_state(&self) { self.completion_calls .store(0, std::sync::atomic::Ordering::Relaxed); self.outcome_signaled .store(false, std::sync::atomic::Ordering::Relaxed); + self.nudge_attempts + .store(0, std::sync::atomic::Ordering::Relaxed); + if let Ok(mut guard) = self.loop_guard.lock() { + guard.reset(); + } } fn set_tool_nudge_request_active(&self, active: bool) { @@ -119,7 +143,6 @@ impl SpacebotHook { self.reset_tool_nudge_state(); self.set_tool_nudge_request_active(true); - let mut nudge_attempts = 0usize; let mut current_prompt = std::borrow::Cow::Borrowed(prompt); let mut using_tool_nudge_prompt = false; @@ -133,19 +156,29 @@ impl SpacebotHook { match &result { Err(PromptError::PromptCancelled { reason, .. }) - if Self::is_tool_nudge_reason(reason) - && nudge_attempts < Self::TOOL_NUDGE_MAX_RETRIES => + if Self::is_tool_nudge_reason(reason) => { + // Read the current attempt count. on_tool_result resets + // this to zero whenever a tool call completes, so this + // tracks *consecutive* text-only responses rather than + // total nudges across the worker's lifetime. + let attempts = self + .nudge_attempts + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + if attempts >= Self::TOOL_NUDGE_MAX_RETRIES { + // Retries exhausted — propagate the cancellation. + self.set_tool_nudge_request_active(false); + return result; + } Self::prune_tool_nudge_retry_history( history, history_len_before_attempt, using_tool_nudge_prompt, ); - nudge_attempts += 1; tracing::warn!( process_id = %self.process_id, process_type = %self.process_type, - attempt = nudge_attempts, + attempt = attempts + 1, "text-only response without outcome signal, nudging tool usage" ); current_prompt = std::borrow::Cow::Borrowed(Self::TOOL_NUDGE_PROMPT); @@ -402,14 +435,29 @@ impl SpacebotHook { return false; } - // Text-only response without a prior outcome signal — nudge. - response.choice.iter().any(|content| { + // Response without tool calls and without a prior outcome signal. + // Nudge if the response contains any non-empty text, OR if it + // contains no text at all (e.g. reasoning-only). A response that + // is purely reasoning/image with no text and no tool calls means + // the worker hasn't actually done anything — send it back. + let has_any_text = response.choice.iter().any(|content| { if let rig::message::AssistantContent::Text(text) = content { !text.text.trim().is_empty() } else { false } - }) + }); + + // Nudge on non-empty text (worker tried to narrate instead of + // working) or on no text at all (reasoning-only exit attempt). + has_any_text + || !response.choice.iter().any(|content| { + matches!( + content, + rig::message::AssistantContent::Text(_) + | rig::message::AssistantContent::ToolCall(_) + ) + }) } } @@ -458,6 +506,40 @@ where }; } + // Emit text content from worker completion responses so the live + // transcript can show the model's reasoning between tool calls. + if self.process_type == ProcessType::Worker { + let text: String = response + .choice + .iter() + .filter_map(|content| { + if let rig::message::AssistantContent::Text(text) = content { + let trimmed = text.text.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed) + } + } else { + None + } + }) + .collect::>() + .join("\n\n"); + + if !text.is_empty() + && let ProcessId::Worker(worker_id) = &self.process_id + { + let event = ProcessEvent::WorkerText { + agent_id: self.agent_id.clone(), + worker_id: *worker_id, + channel_id: self.channel_id.clone(), + text, + }; + self.event_tx.send(event).ok(); + } + } + HookAction::Continue } @@ -485,6 +567,31 @@ where _internal_call_id: &str, args: &str, ) -> ToolCallHookAction { + // Loop guard: check for repetitive tool calling before execution. + // Runs for all process types. Block → Skip (message becomes tool + // result), CircuitBreak → Terminate. + if let Ok(mut guard) = self.loop_guard.lock() { + match guard.check(tool_name, args) { + LoopGuardVerdict::Allow => {} + LoopGuardVerdict::Block(reason) => { + tracing::warn!( + process_id = %self.process_id, + tool_name = %tool_name, + "loop guard blocked tool call" + ); + return ToolCallHookAction::Skip { reason }; + } + LoopGuardVerdict::CircuitBreak(reason) => { + tracing::warn!( + process_id = %self.process_id, + tool_name = %tool_name, + "loop guard circuit-breaking agent loop" + ); + return ToolCallHookAction::Terminate { reason }; + } + } + } + // Leak blocking is enforced at channel egress (`reply`). Worker and // branch tool calls may legitimately handle secrets internally. if self.process_type == ProcessType::Channel @@ -583,6 +690,24 @@ where self.record_tool_result_metrics(tool_name, internal_call_id); + // Record outcome for loop guard (outcome-aware repetition detection). + // The guard uses the (tool_name, args, result) triple to detect when + // the same call produces the same result repeatedly, and poisons the + // call hash so the next check() in on_tool_call auto-blocks. + if let Ok(mut guard) = self.loop_guard.lock() { + guard.record_outcome(tool_name, _args, result); + } + + // A successful tool call proves the worker is still productive. + // Reset the consecutive nudge counter so a brief narration blip + // after many tool calls doesn't exhaust the retry budget. + // Tool errors (from Rig's error path) don't count as productive. + let is_tool_error = result.starts_with("Toolset error:"); + if self.tool_nudge_policy.is_enabled() && !is_tool_error { + self.nudge_attempts + .store(0, std::sync::atomic::Ordering::Relaxed); + } + // Detect terminal outcome signal from successful set_status results. // We check the *result* (not the args) so the flag is only set after the // tool actually executed successfully. A failed set_status call won't @@ -1134,4 +1259,104 @@ mod tests { } if text_delta == "hi" && aggregated_text == "hi" )); } + + #[tokio::test] + async fn tool_result_resets_consecutive_nudge_counter() { + // The exact scenario from the Railway browser worker failure: + // worker makes many tool calls, then narrates, gets nudged, but the + // nudge counter should have been reset by the prior tool calls so + // the budget isn't exhausted from earlier narration blips. + let hook = make_hook().with_tool_nudge_policy(ToolNudgePolicy::Enabled); + hook.reset_tool_nudge_state(); + hook.set_tool_nudge_request_active(true); + + // Simulate 2 nudge attempts (would exhaust budget under old behavior) + hook.nudge_attempts + .store(2, std::sync::atomic::Ordering::Relaxed); + + // A successful tool call should reset the counter + let _ = >::on_tool_result( + &hook, + "browser_click", + None, + "internal_1", + "{\"index\": 13}", + "{\"success\":true,\"message\":\"Clicked element at index 13\"}", + ) + .await; + + assert_eq!( + hook.nudge_attempts + .load(std::sync::atomic::Ordering::Relaxed), + 0, + "Tool result should reset consecutive nudge counter to zero" + ); + } + + #[tokio::test] + async fn nudge_counter_not_reset_when_policy_disabled() { + // When nudge policy is disabled, on_tool_result should not touch + // the counter (it's irrelevant but ensures no side effects). + let hook = make_hook().with_tool_nudge_policy(ToolNudgePolicy::Disabled); + hook.reset_tool_nudge_state(); + + hook.nudge_attempts + .store(2, std::sync::atomic::Ordering::Relaxed); + + let _ = >::on_tool_result( + &hook, + "shell", + None, + "internal_1", + "{\"command\":\"ls\"}", + "file1.txt\nfile2.txt", + ) + .await; + + assert_eq!( + hook.nudge_attempts + .load(std::sync::atomic::Ordering::Relaxed), + 2, + "Nudge counter should not be reset when policy is disabled" + ); + } + + #[tokio::test] + async fn nudges_on_reasoning_only_response_without_outcome() { + // A response with only Reasoning content (no Text, no ToolCall) should + // trigger a nudge. This is the exact bug case: models with extended + // thinking produce a Reasoning block and exit the agent loop without + // doing any work. + let hook = make_hook().with_tool_nudge_policy(ToolNudgePolicy::Enabled); + let prompt = prompt_message(); + hook.reset_tool_nudge_state(); + hook.set_tool_nudge_request_active(true); + + let reasoning_response = CompletionResponse { + choice: OneOrMany::one(AssistantContent::reasoning("thinking about the task...")), + message_id: None, + usage: Usage::default(), + raw_response: RawResponse { + body: serde_json::json!({}), + }, + }; + + let _ = + >::on_completion_call(&hook, &prompt, &[]) + .await; + let response = >::on_completion_response( + &hook, + &prompt, + &reasoning_response, + ) + .await; + assert!( + matches!( + response, + HookAction::Terminate { ref reason } + if reason == SpacebotHook::TOOL_NUDGE_REASON + ), + "Expected nudge — reasoning-only response without outcome should be rejected" + ); + } } diff --git a/src/lib.rs b/src/lib.rs index 294c4d023..c13adaf87 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -289,6 +289,14 @@ pub enum ProcessEvent { text_delta: String, aggregated_text: String, }, + /// A worker emitted text content (model reasoning between tool calls). + /// Sent once per completion response, containing the full text for that turn. + WorkerText { + agent_id: AgentId, + worker_id: WorkerId, + channel_id: Option, + text: String, + }, } /// Default broadcast capacity for the per-agent control event bus. diff --git a/src/prompts/text.rs b/src/prompts/text.rs index 2af8b0131..46ab0a02e 100644 --- a/src/prompts/text.rs +++ b/src/prompts/text.rs @@ -147,7 +147,18 @@ fn lookup(lang: &str, key: &str) -> &'static str { include_str!("../../prompts/en/tools/set_status_description.md.j2") } ("en", "tools/shell") => include_str!("../../prompts/en/tools/shell_description.md.j2"), - ("en", "tools/file") => include_str!("../../prompts/en/tools/file_description.md.j2"), + ("en", "tools/file_read") => { + include_str!("../../prompts/en/tools/file_read_description.md.j2") + } + ("en", "tools/file_write") => { + include_str!("../../prompts/en/tools/file_write_description.md.j2") + } + ("en", "tools/file_edit") => { + include_str!("../../prompts/en/tools/file_edit_description.md.j2") + } + ("en", "tools/file_list") => { + include_str!("../../prompts/en/tools/file_list_description.md.j2") + } ("en", "tools/exec") => include_str!("../../prompts/en/tools/exec_description.md.j2"), ("en", "tools/browser") => include_str!("../../prompts/en/tools/browser_description.md.j2"), ("en", "tools/web_search") => { diff --git a/src/tools.rs b/src/tools.rs index 8502d2a48..bb88fe5d3 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -18,7 +18,7 @@ //! - `spawn_worker` is included for channel-originated branches only //! //! **Worker ToolServer** (one per worker, created at spawn time): -//! - `shell`, `file`, `exec` — stateless, registered at creation +//! - `shell`, `file_read`/`file_write`/`file_edit`/`file_list`, `exec` — stateless, registered at creation //! - `task_update` — scoped to the worker's assigned task //! - `set_status` — per-worker instance, registered at creation //! @@ -67,8 +67,8 @@ pub use attachment_recall::{ }; pub use branch_tool::{BranchArgs, BranchError, BranchOutput, BranchTool}; pub use browser::{ - ActKind, BrowserAction, BrowserArgs, BrowserError, BrowserOutput, BrowserTool, ElementSummary, - SharedBrowserHandle, TabInfo, + BrowserError, BrowserOutput, SharedBrowserHandle, TabInfo, new_shared_browser_handle, + register_browser_tools, }; pub use cancel::{CancelArgs, CancelError, CancelOutput, CancelTool}; pub use channel_recall::{ @@ -80,7 +80,11 @@ pub use config_inspect::{ pub use cron::{CronArgs, CronError, CronOutput, CronTool}; pub use email_search::{EmailSearchArgs, EmailSearchError, EmailSearchOutput, EmailSearchTool}; pub use exec::{EnvVar, ExecArgs, ExecError, ExecOutput, ExecResult, ExecTool}; -pub use file::{FileArgs, FileEntry, FileEntryOutput, FileError, FileOutput, FileTool, FileType}; +pub use file::{ + FileEditArgs, FileEditTool, FileEntry, FileEntryOutput, FileError, FileListArgs, FileListTool, + FileOutput, FileReadArgs, FileReadTool, FileType, FileWriteArgs, FileWriteTool, + register_file_tools, +}; pub use mcp::{McpToolAdapter, McpToolError, McpToolOutput}; pub use memory_delete::{ MemoryDeleteArgs, MemoryDeleteError, MemoryDeleteOutput, MemoryDeleteTool, @@ -497,8 +501,7 @@ pub fn create_worker_tool_server( ) -> ToolServerHandle { let mut server = ToolServer::new() .tool(ShellTool::new(workspace.clone(), sandbox.clone())) - .tool(FileTool::new(workspace.clone(), sandbox.clone())) - .tool(ExecTool::new(workspace, sandbox)) + .tool(ExecTool::new(workspace.clone(), sandbox.clone())) .tool(TaskUpdateTool::for_worker( task_store, agent_id.clone(), @@ -513,21 +516,14 @@ pub fn create_worker_tool_server( }) .tool(ReadSkillTool::new(runtime_config.clone())); + server = register_file_tools(server, workspace, sandbox); + if let Some(store) = runtime_config.secrets.load().as_ref() { server = server.tool(SecretSetTool::new(store.clone())); } if browser_config.enabled { - let browser_tool = if let Some(shared) = runtime_config - .shared_browser - .as_ref() - .filter(|_| browser_config.persist_session) - { - BrowserTool::new_shared(shared.clone(), browser_config, screenshot_dir) - } else { - BrowserTool::new(browser_config, screenshot_dir) - }; - server = server.tool(browser_tool); + server = register_browser_tools(server, browser_config, screenshot_dir, &runtime_config); } if let Some(key) = brave_search_key { @@ -605,20 +601,12 @@ pub fn create_cortex_chat_tool_server( .tool(TaskListTool::new(task_store.clone(), agent_id.to_string())) .tool(TaskUpdateTool::for_branch(task_store, agent_id.clone())) .tool(ShellTool::new(workspace.clone(), sandbox.clone())) - .tool(FileTool::new(workspace.clone(), sandbox.clone())) - .tool(ExecTool::new(workspace, sandbox)); + .tool(ExecTool::new(workspace.clone(), sandbox.clone())); + + server = register_file_tools(server, workspace, sandbox); if browser_config.enabled { - let browser_tool = if let Some(shared) = runtime_config - .shared_browser - .as_ref() - .filter(|_| browser_config.persist_session) - { - BrowserTool::new_shared(shared.clone(), browser_config, screenshot_dir) - } else { - BrowserTool::new(browser_config, screenshot_dir) - }; - server = server.tool(browser_tool); + server = register_browser_tools(server, browser_config, screenshot_dir, &runtime_config); } if let Some(key) = brave_search_key { diff --git a/src/tools/browser.rs b/src/tools/browser.rs index 90186b262..fb231e09e 100644 --- a/src/tools/browser.rs +++ b/src/tools/browser.rs @@ -1,18 +1,32 @@ -//! Browser automation tool for workers. +//! Browser automation tools for workers. //! -//! Provides navigation, element interaction, screenshots, and page observation -//! via headless Chrome using chromiumoxide. Uses an accessibility-tree based -//! ref system for LLM-friendly element addressing. +//! Provides a suite of separate tools (navigate, click, type, snapshot, etc.) +//! backed by a shared browser state. Uses Chrome's native CDP Accessibility API +//! (`Accessibility.getFullAXTree`) to extract the ARIA tree directly from the +//! browser's accessibility layer. Interactive elements get sequential indices +//! and their `BackendNodeId` is stored for reliable element resolution. +//! +//! Element resolution: index → `BackendNodeId` (from CDP accessibility tree) → +//! `DOM.getBoxModel` for coordinates → `Input.dispatchMouseEvent` for clicks. +//! This avoids fragile CSS selectors and works reliably on SPAs and complex +//! pages where JS injection fails. use crate::config::BrowserConfig; +use crate::secrets::store::SecretsStore; + use chromiumoxide::browser::{Browser, BrowserConfig as ChromeConfig}; use chromiumoxide::fetcher::{BrowserFetcher, BrowserFetcherOptions}; +use chromiumoxide::handler::viewport::Viewport; use chromiumoxide::page::ScreenshotParams; use chromiumoxide_cdp::cdp::browser_protocol::accessibility::{ - EnableParams as AxEnableParams, GetFullAxTreeParams, + AxNode, AxPropertyName, GetFullAxTreeParams, +}; +use chromiumoxide_cdp::cdp::browser_protocol::dom::{ + BackendNodeId, GetBoxModelParams, ResolveNodeParams, }; use chromiumoxide_cdp::cdp::browser_protocol::input::{ - DispatchKeyEventParams, DispatchKeyEventType, + DispatchKeyEventParams, DispatchKeyEventType, DispatchMouseEventParams, DispatchMouseEventType, + MouseButton, }; use chromiumoxide_cdp::cdp::browser_protocol::page::CaptureScreenshotFormat; use futures::StreamExt as _; @@ -22,12 +36,14 @@ use rig::tool::Tool; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::net::{IpAddr, Ipv4Addr}; use std::path::{Path, PathBuf}; use std::sync::Arc; use tokio::sync::Mutex; use tokio::task::JoinHandle; +// URL validation (SSRF protection) + /// Validate that a URL is safe for the browser to navigate to. /// Blocks private/loopback IPs, link-local addresses, and cloud metadata endpoints /// to prevent server-side request forgery. @@ -48,7 +64,6 @@ fn validate_url(url: &str) -> Result<(), BrowserError> { return Err(BrowserError::new("URL has no host")); }; - // Block cloud metadata endpoints regardless of how the IP resolves if host == "metadata.google.internal" || host == "169.254.169.254" || host == "metadata.aws.internal" @@ -58,7 +73,6 @@ fn validate_url(url: &str) -> Result<(), BrowserError> { )); } - // If the host parses as an IP address, check against blocked ranges if let Ok(ip) = host.parse::() && is_blocked_ip(ip) { @@ -67,7 +81,6 @@ fn validate_url(url: &str) -> Result<(), BrowserError> { ))); } - // IPv6 addresses in brackets (url crate strips them for host_str) if let Some(stripped) = host.strip_prefix('[').and_then(|h| h.strip_suffix(']')) && let Ok(ip) = stripped.parse::() && is_blocked_ip(ip) @@ -80,44 +93,40 @@ fn validate_url(url: &str) -> Result<(), BrowserError> { Ok(()) } -/// Returns true if the IP address belongs to a private, loopback, or -/// link-local range that should not be reachable from the browser tool. fn is_blocked_ip(ip: IpAddr) -> bool { match ip { IpAddr::V4(v4) => { - v4.is_loopback() // 127.0.0.0/8 - || v4.is_private() // 10/8, 172.16/12, 192.168/16 - || v4.is_link_local() // 169.254.0.0/16 - || v4.is_broadcast() // 255.255.255.255 - || v4.is_unspecified() // 0.0.0.0 - || is_v4_cgnat(v4) // 100.64.0.0/10 + v4.is_loopback() + || v4.is_private() + || v4.is_link_local() + || v4.is_broadcast() + || v4.is_unspecified() + || is_v4_cgnat(v4) } IpAddr::V6(v6) => { - v6.is_loopback() // ::1 - || v6.is_unspecified() // :: - || is_v6_unique_local(v6) // fd00::/8 (fc00::/7) - || is_v6_link_local(v6) // fe80::/10 - || is_v4_mapped_blocked(v6) + v6.is_loopback() + || v6.is_unspecified() + || is_v6_unique_local(v6) + || is_v6_link_local(v6) + || is_v4_mapped_blocked(v6) } } } fn is_v4_cgnat(ip: Ipv4Addr) -> bool { let octets = ip.octets(); - octets[0] == 100 && (octets[1] & 0xC0) == 64 // 100.64.0.0/10 + octets[0] == 100 && (octets[1] & 0xC0) == 64 } -fn is_v6_unique_local(ip: Ipv6Addr) -> bool { - (ip.segments()[0] & 0xFE00) == 0xFC00 // fc00::/7 +fn is_v6_unique_local(ip: std::net::Ipv6Addr) -> bool { + (ip.segments()[0] & 0xFE00) == 0xFC00 } -fn is_v6_link_local(ip: Ipv6Addr) -> bool { - (ip.segments()[0] & 0xFFC0) == 0xFE80 // fe80::/10 +fn is_v6_link_local(ip: std::net::Ipv6Addr) -> bool { + (ip.segments()[0] & 0xFFC0) == 0xFE80 } -/// Check if an IPv6 address is a v4-mapped address (::ffff:x.x.x.x) -/// pointing to a blocked IPv4 range. -fn is_v4_mapped_blocked(ip: Ipv6Addr) -> bool { +fn is_v4_mapped_blocked(ip: std::net::Ipv6Addr) -> bool { if let Some(v4) = ip.to_ipv4_mapped() { is_blocked_ip(IpAddr::V4(v4)) } else { @@ -125,6 +134,480 @@ fn is_v4_mapped_blocked(ip: Ipv6Addr) -> bool { } } +// Page readiness helper + +/// Wait for the page to be "ready enough" for DOM extraction. +/// +/// Many sites are SPAs that load a shell document and then hydrate with JS. +/// `chromiumoxide::Page::goto` returns after the main-frame load event, but +/// interactive content may not exist yet. This helper polls the page's +/// `document.readyState` and body child count to detect when content has +/// rendered, with a generous timeout so we don't block forever on broken pages. +async fn wait_for_page_ready(page: &chromiumoxide::Page) { + const POLL_INTERVAL: std::time::Duration = std::time::Duration::from_millis(150); + const MAX_WAIT: std::time::Duration = std::time::Duration::from_secs(3); + + let start = std::time::Instant::now(); + + loop { + if start.elapsed() >= MAX_WAIT { + break; + } + + // Check if the page has meaningful content: readyState is "complete" + // and the body has at least one child element. + let ready = page + .evaluate( + "(document.readyState === 'complete' && \ + document.body && document.body.children.length > 0)", + ) + .await + .ok() + .and_then(|result| result.value().cloned()) + .and_then(|value| value.as_bool()) + .unwrap_or(false); + + if ready { + // Give JS frameworks one more tick to finish hydration. + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + break; + } + + tokio::time::sleep(POLL_INTERVAL).await; + } +} + +// Accessibility snapshot types (CDP-native) + +/// Roles that represent interactive elements the LLM can target via index. +/// Only nodes with these roles get assigned an index in the snapshot. +const INTERACTIVE_ROLES: &[&str] = &[ + "button", + "checkbox", + "combobox", + "link", + "menuitem", + "menuitemcheckbox", + "menuitemradio", + "option", + "radio", + "searchbox", + "slider", + "spinbutton", + "switch", + "tab", + "textbox", + "treeitem", +]; + +/// Roles that are structural noise and should be skipped when they have no +/// name and no interactive descendants. Keeps the YAML output compact. +const SKIP_UNNAMED_ROLES: &[&str] = &[ + "generic", + "none", + "presentation", + "InlineTextBox", + "LineBreak", +]; + +/// A snapshot of the page's accessibility tree, extracted via CDP. +/// +/// Contains a tree of `SnapshotNode`s for LLM display and a parallel map from +/// element indices to `BackendNodeId` for interaction. +#[derive(Debug, Clone)] +pub(crate) struct AxSnapshot { + /// Root nodes of the tree (usually one, but CDP can return multiple roots). + pub roots: Vec, + /// `BackendNodeId` for each indexed interactive element. + /// `node_ids[i]` corresponds to the element with `index == i` in the tree. + pub node_ids: Vec, +} + +/// A node in the processed accessibility tree, ready for rendering. +#[derive(Debug, Clone)] +pub(crate) struct SnapshotNode { + pub role: String, + pub name: String, + /// Sequential index assigned to interactive elements. `None` for structural nodes. + pub index: Option, + pub children: Vec, + // State properties + pub checked: Option, + pub disabled: bool, + pub expanded: Option, + pub selected: bool, + pub level: Option, + pub pressed: Option, + pub value: Option, + pub description: Option, +} + +impl AxSnapshot { + /// Build from a flat CDP `AxNode` list by reconstructing the tree and + /// assigning sequential indices to interactive elements. + fn from_ax_nodes(nodes: Vec) -> Self { + // Build a lookup from AxNodeId → index in the flat list. + let id_to_idx: HashMap = nodes + .iter() + .enumerate() + .map(|(i, node)| (node.node_id.inner().clone(), i)) + .collect(); + + // Track which nodes are roots (no parent) or whose parent_id doesn't + // exist in the tree (happens with frame roots). + let root_indices: Vec = nodes + .iter() + .enumerate() + .filter(|(_, node)| { + node.parent_id + .as_ref() + .is_none_or(|pid| !id_to_idx.contains_key(pid.inner())) + }) + .map(|(i, _)| i) + .collect(); + + let mut next_index: usize = 0; + let mut node_ids: Vec = Vec::new(); + + // Collect non-ignored descendants of an ignored node. The CDP AX + // tree marks many structural nodes as ignored, but their children may + // be interactive (button inside an ignored generic div). We must walk + // through ignored nodes to reach them. + fn collect_children_of_ignored( + flat: &[AxNode], + idx: usize, + id_to_idx: &HashMap, + next_index: &mut usize, + node_ids: &mut Vec, + ) -> Vec { + let ax = &flat[idx]; + let mut result = Vec::new(); + if let Some(child_ids) = &ax.child_ids { + for child_id in child_ids { + if let Some(&child_idx) = id_to_idx.get(child_id.inner()) { + if flat[child_idx].ignored { + // Recursively collect through chains of ignored nodes. + result.extend(collect_children_of_ignored( + flat, child_idx, id_to_idx, next_index, node_ids, + )); + } else if let Some(child_node) = + build_node(flat, child_idx, id_to_idx, next_index, node_ids) + { + result.push(child_node); + } + } + } + } + result + } + + // Recursive tree builder + fn build_node( + flat: &[AxNode], + idx: usize, + id_to_idx: &HashMap, + next_index: &mut usize, + node_ids: &mut Vec, + ) -> Option { + let ax = &flat[idx]; + + // Ignored nodes aren't rendered, but their children may contain + // non-ignored interactive elements. Handled by the caller via + // `collect_children_of_ignored`. + if ax.ignored { + return None; + } + + let role = ax + .role + .as_ref() + .and_then(|v| v.value.as_ref()) + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + + let name = ax + .name + .as_ref() + .and_then(|v| v.value.as_ref()) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let description = ax + .description + .as_ref() + .and_then(|v| v.value.as_ref()) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .filter(|s| !s.is_empty()); + + let value_text = ax + .value + .as_ref() + .and_then(|v| v.value.as_ref()) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .filter(|s| !s.is_empty()); + + // Extract state properties from the properties array. + let mut checked = None; + let mut disabled = false; + let mut expanded = None; + let mut selected = false; + let mut level = None; + let mut pressed = None; + + if let Some(properties) = &ax.properties { + for prop in properties { + match prop.name { + AxPropertyName::Checked => { + checked = prop + .value + .value + .as_ref() + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + } + AxPropertyName::Disabled => { + disabled = prop + .value + .value + .as_ref() + .and_then(|v| v.as_bool()) + .unwrap_or(false); + } + AxPropertyName::Expanded => { + expanded = prop.value.value.as_ref().and_then(|v| v.as_bool()); + } + AxPropertyName::Selected => { + selected = prop + .value + .value + .as_ref() + .and_then(|v| v.as_bool()) + .unwrap_or(false); + } + AxPropertyName::Level => { + level = prop.value.value.as_ref().and_then(|v| v.as_i64()); + } + AxPropertyName::Pressed => { + pressed = prop + .value + .value + .as_ref() + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + } + _ => {} + } + } + } + + // Build children recursively. Ignored children are transparent — + // we walk through them to collect their non-ignored descendants. + let mut children = Vec::new(); + if let Some(child_ids) = &ax.child_ids { + for child_id in child_ids { + if let Some(&child_idx) = id_to_idx.get(child_id.inner()) { + if flat[child_idx].ignored { + children.extend(collect_children_of_ignored( + flat, child_idx, id_to_idx, next_index, node_ids, + )); + } else if let Some(child_node) = + build_node(flat, child_idx, id_to_idx, next_index, node_ids) + { + children.push(child_node); + } + } + } + } + + // Assign an index to interactive elements that have a BackendNodeId. + let role_lower = role.to_lowercase(); + let is_interactive = INTERACTIVE_ROLES.contains(&role_lower.as_str()); + let element_index = + if let Some(backend_node_id) = ax.backend_dom_node_id.filter(|_| is_interactive) { + let idx = *next_index; + *next_index += 1; + node_ids.push(backend_node_id); + Some(idx) + } else { + None + }; + + // Skip structural nodes that have no name, no index, and only serve + // as tree wrappers — but only if they have exactly one child (pass + // through) or zero children (prune entirely). + if SKIP_UNNAMED_ROLES.contains(&role.as_str()) + && element_index.is_none() + && name.is_empty() + { + match children.len() { + 0 => return None, + 1 => return Some(children.into_iter().next().expect("len checked")), + _ => { + // Multiple children — keep the node as a container + // but only if children are non-trivial. + } + } + } + + Some(SnapshotNode { + role, + name, + index: element_index, + children, + checked, + disabled, + expanded, + selected, + level, + pressed, + value: value_text, + description, + }) + } + + let mut roots = Vec::new(); + for &root_idx in &root_indices { + if nodes[root_idx].ignored { + // Root itself is ignored — collect its non-ignored descendants. + roots.extend(collect_children_of_ignored( + &nodes, + root_idx, + &id_to_idx, + &mut next_index, + &mut node_ids, + )); + } else if let Some(node) = + build_node(&nodes, root_idx, &id_to_idx, &mut next_index, &mut node_ids) + { + roots.push(node); + } + } + + Self { roots, node_ids } + } + + /// Render the accessibility tree as compact YAML-like text for LLM consumption. + pub fn render(&self) -> String { + let mut output = String::with_capacity(4096); + for root in &self.roots { + render_snapshot_node(root, 0, &mut output); + } + output + } + + /// Look up the `BackendNodeId` for an element index. + pub fn backend_node_id_for_index(&self, index: usize) -> Option { + self.node_ids.get(index).copied() + } + + /// Total number of indexed interactive elements. + pub fn element_count(&self) -> usize { + self.node_ids.len() + } +} + +fn render_snapshot_node(node: &SnapshotNode, depth: usize, output: &mut String) { + let indent = " ".repeat(depth); + + // Skip the root "RootWebArea" wrapper — just render children directly. + if node.role == "RootWebArea" || node.role == "rootWebArea" { + for child in &node.children { + render_snapshot_node(child, depth, output); + } + return; + } + + // Build the line: `- role "name" [attrs]:` + output.push_str(&indent); + output.push_str("- "); + output.push_str(&node.role); + + if !node.name.is_empty() { + output.push_str(" \""); + // Truncate very long names for context efficiency. + let display_name = if node.name.len() > 200 { + format!("{}...", &node.name[..200]) + } else { + node.name.clone() + }; + output.push_str(&display_name.replace('"', "\\\"")); + output.push('"'); + } + + // Attributes + if let Some(index) = node.index { + output.push_str(&format!(" [index={index}]")); + } + if let Some(level) = node.level { + output.push_str(&format!(" [level={level}]")); + } + if node.disabled { + output.push_str(" [disabled]"); + } + if node.selected { + output.push_str(" [selected]"); + } + if let Some(ref checked) = node.checked { + match checked.as_str() { + "true" => output.push_str(" [checked]"), + "false" => output.push_str(" [unchecked]"), + "mixed" => output.push_str(" [checked=mixed]"), + _ => {} + } + } + if let Some(ref pressed) = node.pressed { + match pressed.as_str() { + "true" => output.push_str(" [pressed]"), + "mixed" => output.push_str(" [pressed=mixed]"), + _ => {} + } + } + if let Some(true) = node.expanded { + output.push_str(" [expanded]"); + } else if let Some(false) = node.expanded { + output.push_str(" [collapsed]"); + } + + // Value (e.g., text input current value) + if let Some(ref value) = node.value { + let display_value = if value.len() > 100 { + format!("{}...", &value[..100]) + } else { + value.clone() + }; + output.push_str(&format!( + " value=\"{}\"", + display_value.replace('"', "\\\"") + )); + } + + let has_children = !node.children.is_empty(); + let has_description = node.description.is_some(); + + if has_children || has_description { + output.push_str(":\n"); + } else { + output.push('\n'); + } + + if let Some(ref description) = node.description { + output.push_str(&indent); + output.push_str(" /description: "); + output.push_str(description); + output.push('\n'); + } + + for child in &node.children { + render_snapshot_node(child, depth + 1, output); + } +} + +// Shared browser state + /// Opaque handle to shared browser state that persists across worker lifetimes. /// /// Held by `RuntimeConfig` when `persist_session = true`. All workers for the @@ -136,38 +619,22 @@ pub fn new_shared_browser_handle() -> SharedBrowserHandle { Arc::new(Mutex::new(BrowserState::new())) } -/// Tool for browser automation (worker-only). -#[derive(Debug, Clone)] -pub struct BrowserTool { - state: Arc>, - config: BrowserConfig, - screenshot_dir: PathBuf, -} - /// Internal browser state managed across tool invocations. /// /// When `persist_session` is enabled this struct lives in `RuntimeConfig` (via /// `SharedBrowserHandle`) and is shared across worker lifetimes. Otherwise each -/// `BrowserTool` owns its own instance. +/// tool set owns its own instance. pub struct BrowserState { browser: Option, - /// Background task driving the CDP WebSocket handler. _handler_task: Option>, - /// Tracked pages by target ID. pages: HashMap, - /// Currently active page target ID. active_target: Option, - /// Element ref map from the last snapshot, keyed by ref like "e1". - element_refs: HashMap, - /// Counter for generating element refs. - next_ref: usize, - /// Chrome's user data directory. For ephemeral sessions this is a random - /// temp dir cleaned up on drop. For persistent sessions this is a stable - /// path under the instance dir that survives agent restarts. + /// Cached accessibility snapshot from the last `browser_snapshot` call. + /// Invalidated on navigation, tab switch, and explicit snapshot refresh. + snapshot: Option, user_data_dir: Option, /// When true, `user_data_dir` is a stable path that should NOT be deleted - /// on drop — it holds cookies, localStorage, and login sessions that must - /// survive across agent restarts. + /// on drop — it holds cookies, localStorage, and login sessions. persistent_profile: bool, } @@ -178,28 +645,27 @@ impl BrowserState { _handler_task: None, pages: HashMap::new(), active_target: None, - element_refs: HashMap::new(), - next_ref: 0, + snapshot: None, user_data_dir: None, persistent_profile: false, } } + + /// Invalidate the cached snapshot. Called after any page-mutating action. + fn invalidate_snapshot(&mut self) { + self.snapshot = None; + } } impl Drop for BrowserState { fn drop(&mut self) { - // Browser and handler task are dropped automatically — - // chromiumoxide's Child has kill_on_drop(true). - - // Persistent profiles store cookies, localStorage, and login sessions - // that must survive across agent restarts — never delete them. + // Persistent profiles store cookies and login sessions that must + // survive across agent restarts — never delete them. if self.persistent_profile { return; } if let Some(dir) = self.user_data_dir.take() { - // Offload sync fs cleanup to a blocking thread so we don't stall - // the tokio worker that's dropping this state. if let Ok(handle) = tokio::runtime::Handle::try_current() { handle.spawn_blocking(move || { if let Err(error) = std::fs::remove_dir_all(&dir) { @@ -210,14 +676,11 @@ impl Drop for BrowserState { ); } }); - } else { - // Dropped outside a tokio runtime (unlikely) — clean up inline. - if let Err(error) = std::fs::remove_dir_all(&dir) { - eprintln!( - "failed to clean up browser user data dir {}: {error}", - dir.display() - ); - } + } else if let Err(error) = std::fs::remove_dir_all(&dir) { + eprintln!( + "failed to clean up browser user data dir {}: {error}", + dir.display() + ); } } } @@ -229,53 +692,14 @@ impl std::fmt::Debug for BrowserState { .field("has_browser", &self.browser.is_some()) .field("pages", &self.pages.len()) .field("active_target", &self.active_target) - .field("element_refs", &self.element_refs.len()) + .field("has_snapshot", &self.snapshot.is_some()) .field("persistent_profile", &self.persistent_profile) .finish() } } -/// Stored info about an element from the accessibility tree snapshot. -#[derive(Debug, Clone)] -#[allow(dead_code)] -struct ElementRef { - role: String, - name: Option, - description: Option, - /// The AX node ID from the accessibility tree. - ax_node_id: String, - backend_node_id: Option, -} - -impl BrowserTool { - /// Create a tool with its own isolated browser state (default, non-persistent). - pub fn new(config: BrowserConfig, screenshot_dir: PathBuf) -> Self { - Self { - state: Arc::new(Mutex::new(BrowserState::new())), - config, - screenshot_dir, - } - } - - /// Create a tool backed by a shared browser state handle. - /// - /// Used when `persist_session = true`. Multiple workers share the same - /// `SharedBrowserHandle`, so the browser process and tabs survive across - /// worker lifetimes. - pub fn new_shared( - shared_state: SharedBrowserHandle, - config: BrowserConfig, - screenshot_dir: PathBuf, - ) -> Self { - Self { - state: shared_state, - config, - screenshot_dir, - } - } -} +// Error type -/// Error type for browser tool operations. #[derive(Debug, thiserror::Error)] #[error("Browser error: {message}")] pub struct BrowserError { @@ -290,104 +714,24 @@ impl BrowserError { } } -/// The action to perform. -#[derive(Debug, Clone, Deserialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub enum BrowserAction { - /// Launch the browser. Must be called before any other action. - Launch, - /// Navigate the active tab to a URL. - Navigate, - /// Open a new tab, optionally at a URL. - Open, - /// List all open tabs. - Tabs, - /// Focus a tab by its target ID. - Focus, - /// Close a tab by its target ID (or the active tab if omitted). - CloseTab, - /// Get an accessibility tree snapshot of the active page with element refs. - Snapshot, - /// Perform an interaction on an element by ref. - Act, - /// Take a screenshot of the active page or a specific element. - Screenshot, - /// Evaluate JavaScript in the active page. - Evaluate, - /// Get the page HTML content. - Content, - /// Shut down the browser. - Close, -} - -/// The kind of interaction to perform via the `act` action. -#[derive(Debug, Clone, Deserialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub enum ActKind { - /// Click an element by ref. - Click, - /// Type text into an element by ref. - Type, - /// Press a keyboard key (e.g., "Enter", "Tab", "Escape"). - PressKey, - /// Hover over an element by ref. - Hover, - /// Scroll an element into the viewport by ref. - ScrollIntoView, - /// Focus an element by ref. - Focus, -} - -/// Arguments for the browser tool. -#[derive(Debug, Deserialize, JsonSchema)] -pub struct BrowserArgs { - /// The action to perform. - pub action: BrowserAction, - /// URL for navigate/open actions. - pub url: Option, - /// Target ID for focus/close_tab actions. - pub target_id: Option, - /// Element reference (e.g., "e3") for act/screenshot actions. - pub element_ref: Option, - /// Kind of interaction for the act action. - pub act_kind: Option, - /// Text to type for act:type. - pub text: Option, - /// Key to press for act:press_key (e.g., "Enter", "Tab"). - pub key: Option, - /// Whether to take a full-page screenshot. - #[serde(default)] - pub full_page: bool, - /// JavaScript expression to evaluate. - pub script: Option, -} +// Common output type -/// Output from the browser tool. #[derive(Debug, Serialize)] pub struct BrowserOutput { - /// Whether the action succeeded. pub success: bool, - /// Human-readable result message. pub message: String, - /// Page title (when available). #[serde(skip_serializing_if = "Option::is_none")] pub title: Option, - /// Current URL (when available). #[serde(skip_serializing_if = "Option::is_none")] pub url: Option, - /// Element snapshot data from the accessibility tree. #[serde(skip_serializing_if = "Option::is_none")] - pub elements: Option>, - /// List of open tabs. + pub snapshot: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tabs: Option>, - /// Path to saved screenshot file. #[serde(skip_serializing_if = "Option::is_none")] pub screenshot_path: Option, - /// JavaScript evaluation result. #[serde(skip_serializing_if = "Option::is_none")] pub eval_result: Option, - /// Page HTML content. #[serde(skip_serializing_if = "Option::is_none")] pub content: Option, } @@ -399,7 +743,7 @@ impl BrowserOutput { message: message.into(), title: None, url: None, - elements: None, + snapshot: None, tabs: None, screenshot_path: None, eval_result: None, @@ -414,25 +758,6 @@ impl BrowserOutput { } } -/// Summary of an interactive element from the accessibility tree. -#[derive(Debug, Clone, Serialize)] -pub struct ElementSummary { - /// Short ref like "e1", "e2" for use in subsequent act calls. - pub ref_id: String, - /// ARIA role (e.g., "button", "textbox", "link"). - pub role: String, - /// Accessible name. - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - /// Accessible description. - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, - /// Current value (for inputs, sliders, etc.). - #[serde(skip_serializing_if = "Option::is_none")] - pub value: Option, -} - -/// Info about an open browser tab. #[derive(Debug, Clone, Serialize)] pub struct TabInfo { pub target_id: String, @@ -441,141 +766,333 @@ pub struct TabInfo { pub active: bool, } -/// Roles that are interactive and worth assigning refs to. -const INTERACTIVE_ROLES: &[&str] = &[ - "button", - "checkbox", - "combobox", - "link", - "listbox", - "menu", - "menubar", - "menuitem", - "menuitemcheckbox", - "menuitemradio", - "option", - "radio", - "scrollbar", - "searchbox", - "slider", - "spinbutton", - "switch", - "tab", - "textbox", - "treeitem", -]; +// Shared helper struct that all tools reference -/// Max elements to assign refs to in a single snapshot. -const MAX_ELEMENT_REFS: usize = 200; +/// Shared context cloned into each browser tool. Holds the browser state mutex, +/// config, screenshot directory, and optional secrets store for secure text entry. +#[derive(Debug, Clone)] +pub(crate) struct BrowserContext { + state: Arc>, + config: BrowserConfig, + screenshot_dir: PathBuf, + /// Secrets store for resolving secret names in `browser_type`. When present, + /// the `secret` parameter can look up credential values without exposing + /// them in tool arguments or output. + secrets: Option>, +} -impl Tool for BrowserTool { - const NAME: &'static str = "browser"; +impl BrowserContext { + fn new( + state: Arc>, + config: BrowserConfig, + screenshot_dir: PathBuf, + secrets: Option>, + ) -> Self { + Self { + state, + config, + screenshot_dir, + secrets, + } + } - type Error = BrowserError; - type Args = BrowserArgs; - type Output = BrowserOutput; + /// Get the active page or return an error. Does NOT hold the lock — caller + /// must pass a reference to the already-locked state. + fn require_active_page<'a>( + &self, + state: &'a BrowserState, + ) -> Result<&'a chromiumoxide::Page, BrowserError> { + let target = state + .active_target + .as_ref() + .ok_or_else(|| BrowserError::new("no active tab — navigate or open a tab first"))?; + state + .pages + .get(target) + .ok_or_else(|| BrowserError::new("active tab no longer exists")) + } - async fn definition(&self, _prompt: String) -> ToolDefinition { - ToolDefinition { - name: Self::NAME.to_string(), - description: crate::prompts::text::get("tools/browser").to_string(), - parameters: serde_json::json!({ - "type": "object", - "properties": { - "action": { - "type": "string", - "enum": ["launch", "navigate", "open", "tabs", "focus", "close_tab", - "snapshot", "act", "screenshot", "evaluate", "content", "close"], - "description": "The browser action to perform" - }, - "url": { - "type": "string", - "description": "URL for navigate/open actions" - }, - "target_id": { - "type": "string", - "description": "Tab target ID for focus/close_tab actions" - }, - "element_ref": { - "type": "string", - "description": "Element reference from snapshot (e.g., \"e3\") for act/screenshot" - }, - "act_kind": { - "type": "string", - "enum": ["click", "type", "press_key", "hover", "scroll_into_view", "focus"], - "description": "Kind of interaction for the act action" - }, - "text": { - "type": "string", - "description": "Text to type for act:type" - }, - "key": { - "type": "string", - "description": "Key to press for act:press_key (e.g., \"Enter\", \"Tab\", \"Escape\")" - }, - "full_page": { - "type": "boolean", - "default": false, - "description": "Take full-page screenshot instead of viewport only" - }, - "script": { - "type": "string", - "description": "JavaScript expression to evaluate (requires evaluate_enabled in config)" - } - }, - "required": ["action"] - }), + /// Extract the accessibility snapshot from the active page via CDP. + /// Caches the result on `BrowserState` so repeated reads don't re-extract. + async fn extract_snapshot<'a>( + &self, + state: &'a mut BrowserState, + ) -> Result<&'a AxSnapshot, BrowserError> { + // The `is_some` + `expect` pattern is intentional — `if let Some(s) = + // state.snapshot.as_ref()` would borrow `state` for `'a`, preventing + // the mutable assignment to `state.snapshot` in the cache-miss path. + #[allow(clippy::unnecessary_unwrap)] + if state.snapshot.is_some() { + return Ok(state.snapshot.as_ref().expect("just checked")); } + + let page = self.require_active_page(state)?; + + let result = page + .execute(GetFullAxTreeParams::default()) + .await + .map_err(|error| { + BrowserError::new(format!("accessibility tree extraction failed: {error}")) + })?; + + let ax_nodes = result.result.nodes; + + tracing::debug!( + node_count = ax_nodes.len(), + "CDP Accessibility.getFullAXTree returned" + ); + + let snapshot = AxSnapshot::from_ax_nodes(ax_nodes); + + tracing::debug!( + interactive_count = snapshot.element_count(), + "accessibility snapshot built" + ); + + state.snapshot = Some(snapshot); + Ok(state.snapshot.as_ref().expect("just stored")) } - async fn call(&self, args: Self::Args) -> Result { - match args.action { - BrowserAction::Launch => self.handle_launch().await, - BrowserAction::Navigate => self.handle_navigate(args.url).await, - BrowserAction::Open => self.handle_open(args.url).await, - BrowserAction::Tabs => self.handle_tabs().await, - BrowserAction::Focus => self.handle_focus(args.target_id).await, - BrowserAction::CloseTab => self.handle_close_tab(args.target_id).await, - BrowserAction::Snapshot => self.handle_snapshot().await, - BrowserAction::Act => { - self.handle_act(args.act_kind, args.element_ref, args.text, args.key) - .await + /// Resolve an element target to a `BackendNodeId` for interaction. + /// + /// When an index is provided, the cached snapshot's `BackendNodeId` map is + /// used. When a CSS selector string is provided, it's resolved via + /// `page.find_element()` — this path is a fallback for when the LLM already + /// knows a selector (e.g., `#login_field`). + async fn find_element( + &self, + state: &mut BrowserState, + target: &ElementTarget, + ) -> Result { + match target { + ElementTarget::Index(index) => { + self.extract_snapshot(state).await?; + let snapshot = state.snapshot.as_ref().expect("just extracted"); + + let backend_node_id = + snapshot.backend_node_id_for_index(*index).ok_or_else(|| { + BrowserError::new(format!( + "element index {index} not found — run browser_snapshot to get fresh \ + indices (max index in current snapshot: {})", + snapshot.element_count().saturating_sub(1) + )) + })?; + + Ok(ElementHandle::BackendNode(backend_node_id)) } - BrowserAction::Screenshot => { - self.handle_screenshot(args.element_ref, args.full_page) - .await + ElementTarget::Selector(selector) => { + let page = self.require_active_page(state)?; + let element = page.find_element(selector).await.map_err(|error| { + BrowserError::new(format!( + "element not found via selector '{selector}': {error}. \ + The page may have changed — run browser_snapshot again." + )) + })?; + Ok(ElementHandle::CssElement(element)) } - BrowserAction::Evaluate => self.handle_evaluate(args.script).await, - BrowserAction::Content => self.handle_content().await, - BrowserAction::Close => self.handle_close().await, } } -} -impl BrowserTool { - async fn handle_launch(&self) -> Result { - // Quick check under the lock — if a browser is already running and - // we're in persistent mode, reconnect to existing tabs. + /// Click an element resolved by `find_element`. Uses CDP mouse events for + /// `BackendNodeId` targets (avoids CSS selector fragility) and the + /// chromiumoxide `Element::click()` for CSS selector targets. + async fn click_element( + &self, + state: &BrowserState, + handle: &ElementHandle, + ) -> Result<(), BrowserError> { + match handle { + ElementHandle::BackendNode(backend_node_id) => { + let page = self.require_active_page(state)?; + let (center_x, center_y) = get_element_center(page, *backend_node_id).await?; + + page.execute( + DispatchMouseEventParams::builder() + .r#type(DispatchMouseEventType::MousePressed) + .x(center_x) + .y(center_y) + .button(MouseButton::Left) + .click_count(1) + .build() + .map_err(|error| { + BrowserError::new(format!("failed to build mouse press event: {error}")) + })?, + ) + .await + .map_err(|error| BrowserError::new(format!("mouse press failed: {error}")))?; + + page.execute( + DispatchMouseEventParams::builder() + .r#type(DispatchMouseEventType::MouseReleased) + .x(center_x) + .y(center_y) + .button(MouseButton::Left) + .click_count(1) + .build() + .map_err(|error| { + BrowserError::new(format!( + "failed to build mouse release event: {error}" + )) + })?, + ) + .await + .map_err(|error| BrowserError::new(format!("mouse release failed: {error}")))?; + + Ok(()) + } + ElementHandle::CssElement(element) => { + element + .click() + .await + .map_err(|error| BrowserError::new(format!("click failed: {error}")))?; + Ok(()) + } + } + } + + /// Focus an element and type text into it. Uses CDP `DOM.focus` for + /// `BackendNodeId` targets. + async fn focus_and_type( + &self, + state: &BrowserState, + handle: &ElementHandle, + text: &str, + clear: bool, + ) -> Result<(), BrowserError> { + let page = self.require_active_page(state)?; + + match handle { + ElementHandle::BackendNode(backend_node_id) => { + // Resolve BackendNodeId to RemoteObject so we can call JS on it + let resolve_result = page + .execute(ResolveNodeParams { + backend_node_id: Some(*backend_node_id), + ..Default::default() + }) + .await + .map_err(|error| { + BrowserError::new(format!("failed to resolve node for typing: {error}")) + })?; + + let object_id = resolve_result.result.object.object_id.ok_or_else(|| { + BrowserError::new( + "resolved node has no object_id — element may have been removed", + ) + })?; + + // Use callFunctionOn to focus, validate, clear, and set value + let text_json = serde_json::to_string(text).unwrap_or_default(); + let clear_js = if clear { + "if (this.isContentEditable) { this.textContent = ''; } else { this.value = ''; }" + } else { + "" + }; + + let js = format!( + r#"function() {{ + const tag = this.tagName; + const editable = this.isContentEditable; + if (tag !== 'INPUT' && tag !== 'TEXTAREA' && tag !== 'SELECT' && !editable) {{ + return 'element is a ' + tag.toLowerCase() + ', not a text input'; + }} + this.focus(); + {clear_js} + if (editable) {{ + this.textContent = {text_json}; + }} else {{ + this.value = {text_json}; + }} + this.dispatchEvent(new Event('input', {{bubbles: true}})); + this.dispatchEvent(new Event('change', {{bubbles: true}})); + return null; + }}"#, + ); + + let call_result = page + .execute( + chromiumoxide_cdp::cdp::js_protocol::runtime::CallFunctionOnParams::builder() + .function_declaration(js) + .object_id(object_id) + .return_by_value(true) + .user_gesture(true) + .silent(true) + .build() + .map_err(|error| { + BrowserError::new(format!( + "failed to build CallFunctionOn params: {error}" + )) + })?, + ) + .await + .map_err(|error| { + BrowserError::new(format!("type action failed: {error}")) + })?; + + // Check for validation error returned from the JS function. + // The function returns null on success, or an error string. + if let Some(value) = &call_result.result.result.value + && let Some(error_msg) = value.as_str() + { + return Err(BrowserError::new(error_msg.to_string())); + } + + Ok(()) + } + ElementHandle::CssElement(element) => { + // CSS selector path — click to focus, then use JS to type. + element + .click() + .await + .map_err(|error| BrowserError::new(format!("focus failed: {error}")))?; + + let text_json = serde_json::to_string(text).unwrap_or_default(); + let clear_js = if clear { "el.value = '';" } else { "" }; + let js = format!( + r#"(() => {{ + const el = document.activeElement; + if (!el) return 'no focused element after click'; + const tag = el.tagName; + const editable = el.isContentEditable; + if (tag !== 'INPUT' && tag !== 'TEXTAREA' && tag !== 'SELECT' && !editable) {{ + return 'element is a ' + tag.toLowerCase() + ', not a text input'; + }} + if (editable) {{ + {clear_editable} + el.textContent = {text_json}; + }} else {{ + {clear_js} + el.value = {text_json}; + }} + el.dispatchEvent(new Event('input', {{bubbles: true}})); + el.dispatchEvent(new Event('change', {{bubbles: true}})); + return null; + }})()"#, + clear_editable = if clear { "el.textContent = '';" } else { "" }, + ); + + page.evaluate(js) + .await + .map_err(|error| BrowserError::new(format!("type failed: {error}")))?; + + Ok(()) + } + } + } + + /// Launch the browser if not already running. Returns a status message. + async fn ensure_launched(&self) -> Result { { let mut state = self.state.lock().await; if state.browser.is_some() { if self.config.persist_session { return self.reconnect_existing_tabs(&mut state).await; } - return Ok(BrowserOutput::success("Browser already running")); + return Ok("Browser already running".to_string()); } } - // Resolve the Chrome executable path (may download ~150MB on first use): - // 1. Explicit config override - // 2. CHROME / CHROME_PATH env vars - // 3. chromiumoxide default detection (system PATH + well-known paths) - // 4. Auto-download via BrowserFetcher (cached in chrome_cache_dir) let executable = resolve_chrome_executable(&self.config).await?; - // Persistent sessions use a stable profile dir under chrome_cache_dir so - // cookies, localStorage, and login sessions survive across agent restarts. - // Ephemeral sessions use a random temp dir to avoid singleton lock collisions. let (user_data_dir, persistent_profile) = if self.config.persist_session { (self.config.chrome_cache_dir.join("profile"), true) } else { @@ -584,13 +1101,33 @@ impl BrowserTool { (dir, false) }; + if persistent_profile { + let lock_file = user_data_dir.join("SingletonLock"); + if lock_file.exists() { + tracing::debug!(path = %lock_file.display(), "removing stale Chrome SingletonLock"); + let _ = std::fs::remove_file(&lock_file); + } + } + let mut builder = ChromeConfig::builder() .no_sandbox() .chrome_executable(&executable) .user_data_dir(&user_data_dir); - if !self.config.headless { - builder = builder.with_head().window_size(1280, 900); + if self.config.headless { + // Headless has no real window — set an explicit viewport so + // screenshots render at a reasonable desktop size instead of + // the chromiumoxide default of 800x600. + builder = builder.viewport(Viewport { + width: 1280, + height: 900, + ..Default::default() + }); + } else { + // Headed mode: disable viewport emulation so the page fills + // the actual window. The default 800x600 viewport constrains + // page content to a smaller area than the window. + builder = builder.with_head().window_size(1280, 900).viewport(None); } let chrome_config = builder.build().map_err(|error| { @@ -610,18 +1147,12 @@ impl BrowserTool { let handler_task = tokio::spawn(async move { while handler.next().await.is_some() {} }); - // Re-acquire the lock only to store state. let mut state = self.state.lock().await; - // Guard against a concurrent launch that won the race. + // Guard against concurrent launch race if state.browser.is_some() { - // Another call launched while we were downloading/starting. Clean up - // the browser we just created and return success. drop(browser); handler_task.abort(); - // Clean up the temp user data dir — but only for ephemeral sessions. - // Persistent profiles use a shared stable path that the winner is - // actively using. if !persistent_profile { let dir = user_data_dir; tokio::spawn(async move { @@ -634,11 +1165,10 @@ impl BrowserTool { } }); } - if self.config.persist_session { return self.reconnect_existing_tabs(&mut state).await; } - return Ok(BrowserOutput::success("Browser already running")); + return Ok("Browser already running".to_string()); } state.browser = Some(browser); @@ -647,24 +1177,14 @@ impl BrowserTool { state.persistent_profile = persistent_profile; tracing::info!("browser launched"); - Ok(BrowserOutput::success("Browser launched successfully")) + Ok("Browser launched successfully".to_string()) } /// Discover existing tabs from the browser and rebuild the page map. - /// - /// Called when a worker connects to an already-running persistent browser. - /// Rebuilds `state.pages` from `browser.pages()` so stale entries (tabs - /// closed externally) are pruned. Validates that `active_target` still - /// points to a live page. - /// - /// Note: holds the mutex across CDP calls. `Browser::pages()` and the - /// per-tab title/url queries are quick CDP round-trips, and concurrent - /// browser use during reconnect is rare (workers typically call `launch` - /// once at the start of their run). async fn reconnect_existing_tabs( &self, state: &mut BrowserState, - ) -> Result { + ) -> Result { let browser = state .browser .as_ref() @@ -674,7 +1194,6 @@ impl BrowserTool { BrowserError::new(format!("failed to enumerate existing tabs: {error}")) })?; - // Rebuild the page map from the live browser, pruning stale entries. let previous_ids: std::collections::HashSet = state.pages.keys().cloned().collect(); let mut refreshed_pages = HashMap::with_capacity(pages.len()); for page in pages { @@ -685,9 +1204,10 @@ impl BrowserTool { .keys() .filter(|id| !previous_ids.contains(*id)) .count(); + state.pages = refreshed_pages; + state.invalidate_snapshot(); - // Ensure active_target points to a valid page. let active_valid = state .active_target .as_ref() @@ -697,387 +1217,489 @@ impl BrowserTool { } let tab_count = state.pages.len(); - let mut tabs = Vec::with_capacity(tab_count); - for (id, page) in &state.pages { - let title = page.get_title().await.ok().flatten(); - let url = page.url().await.ok().flatten(); - let is_active = state.active_target.as_deref() == Some(id); - tabs.push(TabInfo { - target_id: id.clone(), - url, - title, - active: is_active, - }); - } - tracing::info!(tab_count, discovered, "reconnected to persistent browser"); - Ok(BrowserOutput { - success: true, - message: format!( - "Connected to persistent browser ({tab_count} tab{} open, {discovered} newly discovered)", - if tab_count == 1 { "" } else { "s" } - ), - url: None, - title: None, - elements: None, - tabs: Some(tabs), - screenshot_path: None, - eval_result: None, - content: None, - }) + Ok(format!( + "Connected to persistent browser ({tab_count} tab{} open, {discovered} newly discovered)", + if tab_count == 1 { "" } else { "s" } + )) } +} - async fn handle_navigate(&self, url: Option) -> Result { - let Some(url) = url else { - return Err(BrowserError::new("url is required for navigate action")); - }; - - validate_url(&url)?; +// Tool: browser_launch - let mut state = self.state.lock().await; - let page = self.get_or_create_page(&mut state, Some(&url)).await?; +#[derive(Debug, Clone)] +pub struct BrowserLaunchTool { + context: BrowserContext, +} - page.goto(&url) - .await - .map_err(|error| BrowserError::new(format!("navigation failed: {error}")))?; +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserLaunchArgs {} - let title = page.get_title().await.ok().flatten(); - let current_url = page.url().await.ok().flatten(); +impl Tool for BrowserLaunchTool { + const NAME: &'static str = "browser_launch"; + type Error = BrowserError; + type Args = BrowserLaunchArgs; + type Output = BrowserOutput; - // Clear stale element refs on navigation - state.element_refs.clear(); - state.next_ref = 0; + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Launch the browser. Must be called before any other browser tool." + .to_string(), + parameters: serde_json::json!({ "type": "object", "properties": {} }), + } + } - Ok( - BrowserOutput::success(format!("Navigated to {url}")) - .with_page_info(title, current_url), - ) + async fn call(&self, _args: Self::Args) -> Result { + let message = self.context.ensure_launched().await?; + Ok(BrowserOutput::success(message)) } +} - async fn handle_open(&self, url: Option) -> Result { - let mut state = self.state.lock().await; - let browser = state - .browser - .as_ref() - .ok_or_else(|| BrowserError::new("browser not launched — call launch first"))?; +// Tool: browser_navigate - let target_url = url.as_deref().unwrap_or("about:blank"); +#[derive(Debug, Clone)] +pub struct BrowserNavigateTool { + context: BrowserContext, +} - if target_url != "about:blank" { - validate_url(target_url)?; +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserNavigateArgs { + /// The URL to navigate to. + pub url: String, +} + +impl Tool for BrowserNavigateTool { + const NAME: &'static str = "browser_navigate"; + type Error = BrowserError; + type Args = BrowserNavigateArgs; + type Output = BrowserOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Navigate the active tab to a URL. Auto-launches the browser if needed." + .to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "url": { "type": "string", "description": "The URL to navigate to" } + }, + "required": ["url"] + }), } + } - let page = browser - .new_page(target_url) + async fn call(&self, args: Self::Args) -> Result { + validate_url(&args.url)?; + self.context.ensure_launched().await?; + + let mut state = self.context.state.lock().await; + + // Get or create the active page + let page = get_or_create_page(&self.context, &mut state, Some(&args.url)).await?; + + page.goto(&args.url) .await - .map_err(|error| BrowserError::new(format!("failed to open tab: {error}")))?; + .map_err(|error| BrowserError::new(format!("navigation failed: {error}")))?; + + // Wait briefly for SPA content to render. Many sites load a shell via + // the initial HTML then hydrate with JS — without this pause, + // `browser_snapshot` runs before any interactive elements exist. + wait_for_page_ready(page).await; - let target_id = page_target_id(&page); let title = page.get_title().await.ok().flatten(); let current_url = page.url().await.ok().flatten(); + state.invalidate_snapshot(); - state.pages.insert(target_id.clone(), page); - state.active_target = Some(target_id.clone()); + Ok(BrowserOutput::success(format!("Navigated to {}", args.url)) + .with_page_info(title, current_url)) + } +} - // Clear refs when switching pages - state.element_refs.clear(); - state.next_ref = 0; +// Tool: browser_snapshot - Ok(BrowserOutput { - tabs: None, - elements: None, - screenshot_path: None, - eval_result: None, - content: None, - success: true, - message: format!("Opened new tab (target: {target_id})"), - title, - url: current_url, - }) - } +#[derive(Debug, Clone)] +pub struct BrowserSnapshotTool { + context: BrowserContext, +} - async fn handle_tabs(&self) -> Result { - let state = self.state.lock().await; +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserSnapshotArgs {} - let mut tabs = Vec::new(); - for (target_id, page) in &state.pages { - let title = page.get_title().await.ok().flatten(); - let url = page.url().await.ok().flatten(); - let active = state.active_target.as_ref() == Some(target_id); +impl Tool for BrowserSnapshotTool { + const NAME: &'static str = "browser_snapshot"; + type Error = BrowserError; + type Args = BrowserSnapshotArgs; + type Output = BrowserOutput; - tabs.push(TabInfo { - target_id: target_id.clone(), - title, - url, - active, - }); + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Get the page's ARIA accessibility tree with numbered element indices. \ + Use the [index=N] values in browser_click, browser_type, etc. When \ + you see password fields or other sensitive inputs, use browser_type \ + with the `secret` parameter (not `text`) to type credentials securely." + .to_string(), + parameters: serde_json::json!({ "type": "object", "properties": {} }), } + } + + async fn call(&self, _args: Self::Args) -> Result { + let mut state = self.context.state.lock().await; + + // Force a fresh snapshot + state.invalidate_snapshot(); + let snapshot = self.context.extract_snapshot(&mut state).await?; + + let rendered = snapshot.render(); + let element_count = snapshot.element_count(); + let title = self + .context + .require_active_page(&state)? + .get_title() + .await + .ok() + .flatten(); + let url = self + .context + .require_active_page(&state)? + .url() + .await + .ok() + .flatten(); - let count = tabs.len(); Ok(BrowserOutput { success: true, - message: format!("{count} tab(s) open"), - title: None, - url: None, - elements: None, - tabs: Some(tabs), + message: format!("{element_count} interactive element(s) found"), + title, + url, + snapshot: Some(rendered), + tabs: None, screenshot_path: None, eval_result: None, content: None, }) } +} - async fn handle_focus(&self, target_id: Option) -> Result { - let Some(target_id) = target_id else { - return Err(BrowserError::new("target_id is required for focus action")); - }; +// Shared element targeting — tools accept either an index or a CSS selector. - let mut state = self.state.lock().await; +/// A resolved element handle — either a CDP `BackendNodeId` (from snapshot +/// index lookup) or a chromiumoxide `Element` (from CSS selector query). +enum ElementHandle { + BackendNode(BackendNodeId), + CssElement(chromiumoxide::Element), +} - if !state.pages.contains_key(&target_id) { - return Err(BrowserError::new(format!( - "no tab with target_id '{target_id}'" - ))); - } +/// Resolved element target for click/type/press_key tools. +enum ElementTarget { + Index(usize), + Selector(String), +} - state.active_target = Some(target_id.clone()); - state.element_refs.clear(); - state.next_ref = 0; +impl ElementTarget { + /// Build from optional index + optional selector args. + /// At least one must be provided; `selector` wins if both are present. + fn from_args(index: Option, selector: Option) -> Result { + match (selector, index) { + (Some(sel), _) if !sel.is_empty() => Ok(Self::Selector(sel)), + (_, Some(idx)) => Ok(Self::Index(idx)), + _ => Err(BrowserError::new( + "provide either `index` (from browser_snapshot) or `selector` (CSS selector)", + )), + } + } - Ok(BrowserOutput::success(format!("Focused tab {target_id}"))) + fn display(&self) -> String { + match self { + Self::Index(i) => format!("index {i}"), + Self::Selector(s) => format!("selector '{s}'"), + } } +} - async fn handle_close_tab( - &self, - target_id: Option, - ) -> Result { - let mut state = self.state.lock().await; +// Tool: browser_click - let id = target_id - .or_else(|| state.active_target.clone()) - .ok_or_else(|| BrowserError::new("no active tab to close"))?; +#[derive(Debug, Clone)] +pub struct BrowserClickTool { + context: BrowserContext, +} - let page = state - .pages - .remove(&id) - .ok_or_else(|| BrowserError::new(format!("no tab with target_id '{id}'")))?; +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserClickArgs { + /// The element index from the snapshot (e.g., 5). + pub index: Option, + /// CSS selector to target directly (e.g., "#login_field"). Use this when + /// you know the selector — it's more reliable than index for dynamic pages. + pub selector: Option, +} - page.close() - .await - .map_err(|error| BrowserError::new(format!("failed to close tab: {error}")))?; +impl Tool for BrowserClickTool { + const NAME: &'static str = "browser_click"; + type Error = BrowserError; + type Args = BrowserClickArgs; + type Output = BrowserOutput; - if state.active_target.as_ref() == Some(&id) { - state.active_target = state.pages.keys().next().cloned(); + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Click an element by index (from browser_snapshot) or CSS selector." + .to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "index": { "type": "integer", "description": "Element index from snapshot" }, + "selector": { "type": "string", "description": "CSS selector (e.g. \"#my-button\", \"button.submit\")" } + } + }), } + } - state.element_refs.clear(); - state.next_ref = 0; + async fn call(&self, args: Self::Args) -> Result { + let target = ElementTarget::from_args(args.index, args.selector)?; + let label = target.display(); - Ok(BrowserOutput::success(format!("Closed tab {id}"))) + let mut state = self.context.state.lock().await; + let handle = self.context.find_element(&mut state, &target).await?; + + self.context.click_element(&state, &handle).await?; + + // Clicks often trigger navigation or DOM changes — give the page a + // moment to settle before the next snapshot. + let page = self.context.require_active_page(&state)?; + wait_for_page_ready(page).await; + + state.invalidate_snapshot(); + + Ok(BrowserOutput::success(format!( + "Clicked element at {label}" + ))) } +} - async fn handle_snapshot(&self) -> Result { - let mut state = self.state.lock().await; - let page = self.require_active_page(&state)?.clone(); +// Tool: browser_type - // Enable accessibility domain if not already enabled - page.execute(AxEnableParams::default()) - .await - .map_err(|error| { - BrowserError::new(format!("failed to enable accessibility: {error}")) - })?; +#[derive(Debug, Clone)] +pub struct BrowserTypeTool { + context: BrowserContext, +} - let ax_tree = page - .execute(GetFullAxTreeParams::default()) - .await - .map_err(|error| { - BrowserError::new(format!("failed to get accessibility tree: {error}")) - })?; +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserTypeArgs { + /// The element index from the snapshot. + pub index: Option, + /// CSS selector to target directly (e.g., "#login_field", "input[name='email']"). + pub selector: Option, + /// The text to type into the element. Mutually exclusive with `secret`. + /// Do NOT put secret values (passwords, tokens, API keys) here — they will + /// appear in tool output. Use the `secret` parameter instead. + pub text: Option, + /// Name of a secret from the secret store to type into the element. + /// The secret value is resolved server-side and never appears in tool + /// arguments, output, or LLM context. Use this for passwords, tokens, + /// API keys, and any other sensitive values. Mutually exclusive with `text`. + pub secret: Option, + /// Whether to clear the field before typing. Defaults to true. + #[serde(default = "default_true")] + pub clear: bool, +} + +fn default_true() -> bool { + true +} - state.element_refs.clear(); - state.next_ref = 0; +impl Tool for BrowserTypeTool { + const NAME: &'static str = "browser_type"; + type Error = BrowserError; + type Args = BrowserTypeArgs; + type Output = BrowserOutput; - let mut elements = Vec::new(); + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Type text into an input element by index (from browser_snapshot) or \ + CSS selector. Provide either `text` for plain text or `secret` for \ + sensitive values (passwords, tokens, API keys). When using `secret`, \ + pass the secret name (e.g. \"GH_PASSWORD\") — the value is resolved \ + from the secret store and typed without ever appearing in tool \ + arguments or output. NEVER put passwords or credentials in the `text` \ + parameter — always use `secret` for sensitive values." + .to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "index": { "type": "integer", "description": "Element index from snapshot" }, + "selector": { "type": "string", "description": "CSS selector (e.g. \"#login_field\", \"input[name='email']\")" }, + "text": { "type": "string", "description": "Plain text to type. Do NOT use for passwords or secrets — use the `secret` parameter instead." }, + "secret": { "type": "string", "description": "Name of a secret from the secret store (e.g. \"GH_PASSWORD\", \"NPM_TOKEN\"). The value is resolved securely and never appears in output." }, + "clear": { "type": "boolean", "default": true, "description": "Clear the field before typing (default true)" } + } + }), + } + } - for node in &ax_tree.result.nodes { - if node.ignored { - continue; + async fn call(&self, args: Self::Args) -> Result { + let target = ElementTarget::from_args(args.index, args.selector)?; + let label = target.display(); + + // Resolve the text to type: either from `secret` (secure) or `text` (plain). + let (text_value, is_secret) = match (&args.secret, &args.text) { + (Some(secret_name), None) => { + let store = self.context.secrets.as_ref().ok_or_else(|| { + BrowserError::new( + "secret store is not available — secrets cannot be resolved. \ + Add the secret via the API or use the `text` parameter instead.", + ) + })?; + let decrypted = store.get(secret_name).map_err(|error| { + BrowserError::new(format!("failed to resolve secret '{secret_name}': {error}")) + })?; + (decrypted.expose().to_string(), true) + } + (None, Some(text)) => (text.clone(), false), + (Some(_), Some(_)) => { + return Err(BrowserError::new( + "`text` and `secret` are mutually exclusive — provide one or the other", + )); } + (None, None) => { + return Err(BrowserError::new( + "provide either `text` (plain text) or `secret` (secret name from the \ + secret store) to type into the element", + )); + } + }; - let role = extract_ax_value_string(&node.role); - let Some(role) = role else { continue }; + let mut state = self.context.state.lock().await; + let handle = self.context.find_element(&mut state, &target).await?; - let role_lower = role.to_lowercase(); - let is_interactive = INTERACTIVE_ROLES.contains(&role_lower.as_str()); + self.context + .focus_and_type(&state, &handle, &text_value, args.clear) + .await?; - if !is_interactive { - continue; - } + state.invalidate_snapshot(); - if state.next_ref >= MAX_ELEMENT_REFS { - break; - } + // Secret values must never appear in tool output. + let message = if is_secret { + format!( + "Typed secret '{}' into element at {label}", + args.secret.as_deref().unwrap_or("unknown") + ) + } else { + let display_text = if text_value.len() > 50 { + format!("{}...", &text_value[..50]) + } else { + text_value + }; + format!("Typed '{display_text}' into element at {label}") + }; - let name = extract_ax_value_string(&node.name); - let description = extract_ax_value_string(&node.description); - let value = extract_ax_value_string(&node.value); - let backend_node_id = node.backend_dom_node_id.as_ref().map(|id| *id.inner()); - - let ref_id = format!("e{}", state.next_ref); - state.next_ref += 1; - - state.element_refs.insert( - ref_id.clone(), - ElementRef { - role: role.clone(), - name: name.clone(), - description: description.clone(), - ax_node_id: node.node_id.inner().clone(), - backend_node_id, - }, - ); + Ok(BrowserOutput::success(message)) + } +} - elements.push(ElementSummary { - ref_id, - role, - name, - description, - value, - }); - } +// Tool: browser_press_key - let title = page.get_title().await.ok().flatten(); - let url = page.url().await.ok().flatten(); - let count = elements.len(); +#[derive(Debug, Clone)] +pub struct BrowserPressKeyTool { + context: BrowserContext, +} - Ok(BrowserOutput { - success: true, - message: format!("{count} interactive element(s) found"), - title, - url, - elements: Some(elements), - tabs: None, - screenshot_path: None, - eval_result: None, - content: None, - }) +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserPressKeyArgs { + /// The key to press (e.g., "Enter", "Tab", "Escape", "ArrowDown"). + pub key: String, +} + +impl Tool for BrowserPressKeyTool { + const NAME: &'static str = "browser_press_key"; + type Error = BrowserError; + type Args = BrowserPressKeyArgs; + type Output = BrowserOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Press a keyboard key (e.g., \"Enter\", \"Tab\", \"Escape\").".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "key": { "type": "string", "description": "Key name (Enter, Tab, Escape, ArrowDown, etc.)" } + }, + "required": ["key"] + }), + } } - async fn handle_act( - &self, - act_kind: Option, - element_ref: Option, - text: Option, - key: Option, - ) -> Result { - let Some(act_kind) = act_kind else { - return Err(BrowserError::new("act_kind is required for act action")); - }; + async fn call(&self, args: Self::Args) -> Result { + let state = self.context.state.lock().await; + let page = self.context.require_active_page(&state)?; + dispatch_key_press(page, &args.key).await?; + Ok(BrowserOutput::success(format!( + "Pressed key '{}'", + args.key + ))) + } +} - let state = self.state.lock().await; - let page = self.require_active_page(&state)?; +// Tool: browser_screenshot - match act_kind { - ActKind::Click => { - let element = self.resolve_element_ref(&state, page, element_ref).await?; - element - .click() - .await - .map_err(|error| BrowserError::new(format!("click failed: {error}")))?; - Ok(BrowserOutput::success("Clicked element")) - } - ActKind::Type => { - let Some(text) = text else { - return Err(BrowserError::new("text is required for act:type")); - }; - let element = self.resolve_element_ref(&state, page, element_ref).await?; - element - .click() - .await - .map_err(|error| BrowserError::new(format!("focus failed: {error}")))?; - element - .type_str(&text) - .await - .map_err(|error| BrowserError::new(format!("type failed: {error}")))?; - Ok(BrowserOutput::success(format!( - "Typed '{}' into element", - truncate_for_display(&text, 50) - ))) - } - ActKind::PressKey => { - let Some(key) = key else { - return Err(BrowserError::new("key is required for act:press_key")); - }; - if element_ref.is_some() { - let element = self.resolve_element_ref(&state, page, element_ref).await?; - element - .press_key(&key) - .await - .map_err(|error| BrowserError::new(format!("press_key failed: {error}")))?; - } else { - dispatch_key_press(page, &key).await?; +#[derive(Debug, Clone)] +pub struct BrowserScreenshotTool { + context: BrowserContext, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserScreenshotArgs { + /// Whether to take a full-page screenshot. + #[serde(default)] + pub full_page: bool, +} + +impl Tool for BrowserScreenshotTool { + const NAME: &'static str = "browser_screenshot"; + type Error = BrowserError; + type Args = BrowserScreenshotArgs; + type Output = BrowserOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: + "Take a screenshot of the current page. Saves to disk and returns the file path." + .to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "full_page": { "type": "boolean", "default": false, "description": "Capture entire page, not just viewport" } } - Ok(BrowserOutput::success(format!("Pressed key '{key}'"))) - } - ActKind::Hover => { - let element = self.resolve_element_ref(&state, page, element_ref).await?; - element - .hover() - .await - .map_err(|error| BrowserError::new(format!("hover failed: {error}")))?; - Ok(BrowserOutput::success("Hovered over element")) - } - ActKind::ScrollIntoView => { - let element = self.resolve_element_ref(&state, page, element_ref).await?; - element.scroll_into_view().await.map_err(|error| { - BrowserError::new(format!("scroll_into_view failed: {error}")) - })?; - Ok(BrowserOutput::success("Scrolled element into view")) - } - ActKind::Focus => { - let element = self.resolve_element_ref(&state, page, element_ref).await?; - element - .focus() - .await - .map_err(|error| BrowserError::new(format!("focus failed: {error}")))?; - Ok(BrowserOutput::success("Focused element")) - } + }), } } - async fn handle_screenshot( - &self, - element_ref: Option, - full_page: bool, - ) -> Result { - let state = self.state.lock().await; - let page = self.require_active_page(&state)?; - - let screenshot_data = if let Some(ref_id) = element_ref { - let element = self.resolve_element_ref(&state, page, Some(ref_id)).await?; - element - .screenshot(CaptureScreenshotFormat::Png) - .await - .map_err(|error| BrowserError::new(format!("element screenshot failed: {error}")))? - } else { - let params = ScreenshotParams::builder() - .format(CaptureScreenshotFormat::Png) - .full_page(full_page) - .build(); - page.screenshot(params) - .await - .map_err(|error| BrowserError::new(format!("screenshot failed: {error}")))? - }; + async fn call(&self, args: Self::Args) -> Result { + let state = self.context.state.lock().await; + let page = self.context.require_active_page(&state)?; + + let params = ScreenshotParams::builder() + .format(CaptureScreenshotFormat::Png) + .full_page(args.full_page) + .build(); + + let screenshot_data = page + .screenshot(params) + .await + .map_err(|error| BrowserError::new(format!("screenshot failed: {error}")))?; - // Save to disk let filename = format!( "screenshot_{}.png", chrono::Utc::now().format("%Y%m%d_%H%M%S_%3f") ); - let filepath = self.screenshot_dir.join(&filename); + let filepath = self.context.screenshot_dir.join(&filename); - tokio::fs::create_dir_all(&self.screenshot_dir) + tokio::fs::create_dir_all(&self.context.screenshot_dir) .await .map_err(|error| { BrowserError::new(format!("failed to create screenshot dir: {error}")) @@ -1089,7 +1711,6 @@ impl BrowserTool { let path_str = filepath.to_string_lossy().to_string(); let size_kb = screenshot_data.len() / 1024; - tracing::debug!(path = %path_str, size_kb, "screenshot saved"); Ok(BrowserOutput { @@ -1097,30 +1718,61 @@ impl BrowserTool { message: format!("Screenshot saved ({size_kb}KB)"), title: None, url: None, - elements: None, + snapshot: None, tabs: None, screenshot_path: Some(path_str), eval_result: None, content: None, }) } +} + +// Tool: browser_evaluate + +#[derive(Debug, Clone)] +pub struct BrowserEvaluateTool { + context: BrowserContext, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserEvaluateArgs { + /// JavaScript expression to evaluate in the page. + pub script: String, +} + +impl Tool for BrowserEvaluateTool { + const NAME: &'static str = "browser_evaluate"; + type Error = BrowserError; + type Args = BrowserEvaluateArgs; + type Output = BrowserOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Evaluate JavaScript in the active page and return the result." + .to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "script": { "type": "string", "description": "JavaScript expression to evaluate" } + }, + "required": ["script"] + }), + } + } - async fn handle_evaluate(&self, script: Option) -> Result { - if !self.config.evaluate_enabled { + async fn call(&self, args: Self::Args) -> Result { + if !self.context.config.evaluate_enabled { return Err(BrowserError::new( "JavaScript evaluation is disabled in browser config (set evaluate_enabled = true)", )); } - let Some(script) = script else { - return Err(BrowserError::new("script is required for evaluate action")); - }; - - let state = self.state.lock().await; - let page = self.require_active_page(&state)?; + let state = self.context.state.lock().await; + let page = self.context.require_active_page(&state)?; let result = page - .evaluate(script) + .evaluate(args.script) .await .map_err(|error| BrowserError::new(format!("evaluate failed: {error}")))?; @@ -1131,75 +1783,244 @@ impl BrowserTool { message: "JavaScript evaluated".to_string(), title: None, url: None, - elements: None, + snapshot: None, tabs: None, screenshot_path: None, eval_result: value, content: None, }) } +} + +// Tool: browser_tab_open + +#[derive(Debug, Clone)] +pub struct BrowserTabOpenTool { + context: BrowserContext, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserTabOpenArgs { + /// URL to open in the new tab. Defaults to about:blank. + #[serde(default)] + pub url: Option, +} + +impl Tool for BrowserTabOpenTool { + const NAME: &'static str = "browser_tab_open"; + type Error = BrowserError; + type Args = BrowserTabOpenArgs; + type Output = BrowserOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Open a new browser tab, optionally at a URL.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "url": { "type": "string", "description": "URL to open (default: about:blank)" } + } + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let target_url = args.url.as_deref().unwrap_or("about:blank"); + if target_url != "about:blank" { + validate_url(target_url)?; + } - async fn handle_content(&self) -> Result { - let state = self.state.lock().await; - let page = self.require_active_page(&state)?; + let mut state = self.context.state.lock().await; + let browser = state + .browser + .as_ref() + .ok_or_else(|| BrowserError::new("browser not launched — call browser_launch first"))?; - let html = page - .content() + let page = browser + .new_page(target_url) .await - .map_err(|error| BrowserError::new(format!("failed to get page content: {error}")))?; + .map_err(|error| BrowserError::new(format!("failed to open tab: {error}")))?; + let target_id = page_target_id(&page); let title = page.get_title().await.ok().flatten(); - let url = page.url().await.ok().flatten(); + let current_url = page.url().await.ok().flatten(); - // Truncate very large pages for LLM consumption - let truncated = if html.len() > 100_000 { - format!( - "{}... [truncated, {} bytes total]", - &html[..100_000], - html.len() - ) - } else { - html - }; + state.pages.insert(target_id.clone(), page); + state.active_target = Some(target_id.clone()); + state.invalidate_snapshot(); Ok(BrowserOutput { success: true, - message: "Page content retrieved".to_string(), + message: format!("Opened new tab (target: {target_id})"), title, - url, - elements: None, + url: current_url, + snapshot: None, tabs: None, screenshot_path: None, eval_result: None, - content: Some(truncated), + content: None, + }) + } +} + +// Tool: browser_tab_list + +#[derive(Debug, Clone)] +pub struct BrowserTabListTool { + context: BrowserContext, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserTabListArgs {} + +impl Tool for BrowserTabListTool { + const NAME: &'static str = "browser_tab_list"; + type Error = BrowserError; + type Args = BrowserTabListArgs; + type Output = BrowserOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "List all open browser tabs with their target IDs, titles, and URLs." + .to_string(), + parameters: serde_json::json!({ "type": "object", "properties": {} }), + } + } + + async fn call(&self, _args: Self::Args) -> Result { + let state = self.context.state.lock().await; + let mut tabs = Vec::new(); + for (target_id, page) in &state.pages { + let title = page.get_title().await.ok().flatten(); + let url = page.url().await.ok().flatten(); + let active = state.active_target.as_ref() == Some(target_id); + tabs.push(TabInfo { + target_id: target_id.clone(), + title, + url, + active, + }); + } + + let count = tabs.len(); + Ok(BrowserOutput { + success: true, + message: format!("{count} tab(s) open"), + title: None, + url: None, + snapshot: None, + tabs: Some(tabs), + screenshot_path: None, + eval_result: None, + content: None, }) } +} + +// Tool: browser_tab_close + +#[derive(Debug, Clone)] +pub struct BrowserTabCloseTool { + context: BrowserContext, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserTabCloseArgs { + /// Target ID of the tab to close. If omitted, closes the active tab. + #[serde(default)] + pub target_id: Option, +} + +impl Tool for BrowserTabCloseTool { + const NAME: &'static str = "browser_tab_close"; + type Error = BrowserError; + type Args = BrowserTabCloseArgs; + type Output = BrowserOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Close a browser tab by target_id, or the active tab if omitted." + .to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "target_id": { "type": "string", "description": "Tab target ID (from browser_tab_list). Omit for active tab." } + } + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let mut state = self.context.state.lock().await; + let id = args + .target_id + .or_else(|| state.active_target.clone()) + .ok_or_else(|| BrowserError::new("no active tab to close"))?; + + let page = state + .pages + .remove(&id) + .ok_or_else(|| BrowserError::new(format!("no tab with target_id '{id}'")))?; + + page.close() + .await + .map_err(|error| BrowserError::new(format!("failed to close tab: {error}")))?; + + if state.active_target.as_ref() == Some(&id) { + state.active_target = state.pages.keys().next().cloned(); + } + state.invalidate_snapshot(); + + Ok(BrowserOutput::success(format!("Closed tab {id}"))) + } +} + +// Tool: browser_close + +#[derive(Debug, Clone)] +pub struct BrowserCloseTool { + context: BrowserContext, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BrowserCloseArgs {} + +impl Tool for BrowserCloseTool { + const NAME: &'static str = "browser_close"; + type Error = BrowserError; + type Args = BrowserCloseArgs; + type Output = BrowserOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Close or detach from the browser (behavior depends on config)." + .to_string(), + parameters: serde_json::json!({ "type": "object", "properties": {} }), + } + } - async fn handle_close(&self) -> Result { + async fn call(&self, _args: Self::Args) -> Result { use crate::config::ClosePolicy; - match self.config.close_policy { + match self.context.config.close_policy { ClosePolicy::Detach => { - let mut state = self.state.lock().await; - // Clear per-worker element refs but preserve tabs, browser, - // handler, and active_target so the next worker picks up - // exactly where this one left off. - state.element_refs.clear(); - state.next_ref = 0; + let mut state = self.context.state.lock().await; + state.invalidate_snapshot(); tracing::info!(policy = "detach", "worker detached from browser"); Ok(BrowserOutput::success( "Detached from browser (tabs and session preserved)", )) } ClosePolicy::CloseTabs => { - // Drain pages under the lock, then close them outside it so - // other workers aren't blocked by CDP round-trips. let pages_to_close: Vec<(String, chromiumoxide::Page)> = { - let mut state = self.state.lock().await; + let mut state = self.context.state.lock().await; let pages = state.pages.drain().collect(); state.active_target = None; - state.element_refs.clear(); - state.next_ref = 0; + state.invalidate_snapshot(); pages }; @@ -1229,18 +2050,15 @@ impl BrowserTool { )) } ClosePolicy::CloseBrowser => { - // Take everything out of state under the lock, then do the - // actual teardown outside it. let (browser, handler_task, user_data_dir, persistent_profile) = { - let mut state = self.state.lock().await; + let mut state = self.context.state.lock().await; let browser = state.browser.take(); let handler_task = state._handler_task.take(); let user_data_dir = state.user_data_dir.take(); let persistent_profile = state.persistent_profile; state.pages.clear(); state.active_target = None; - state.element_refs.clear(); - state.next_ref = 0; + state.invalidate_snapshot(); (browser, handler_task, user_data_dir, persistent_profile) }; @@ -1256,9 +2074,6 @@ impl BrowserTool { return Err(BrowserError::new(message)); } - // Clean up the user data dir — but only for ephemeral sessions. - // Persistent profiles hold cookies and login sessions that must - // survive browser restarts. if !persistent_profile && let Some(dir) = user_data_dir { tokio::spawn(async move { if let Err(error) = tokio::fs::remove_dir_all(&dir).await { @@ -1276,80 +2091,104 @@ impl BrowserTool { } } } +} - /// Get the active page, or create a first one if the browser has no pages yet. - async fn get_or_create_page<'a>( - &self, - state: &'a mut BrowserState, - url: Option<&str>, - ) -> Result<&'a chromiumoxide::Page, BrowserError> { - if let Some(target) = state.active_target.as_ref() - && state.pages.contains_key(target) - { - return Ok(&state.pages[target]); - } +// Tool registration helper - let browser = state - .browser - .as_ref() - .ok_or_else(|| BrowserError::new("browser not launched — call launch first"))?; +/// Register all browser tools on a `ToolServer`. The tools share a single +/// `BrowserState` (via `SharedBrowserHandle` for persistent sessions, or a +/// fresh instance for ephemeral sessions). +pub fn register_browser_tools( + server: rig::tool::server::ToolServer, + config: BrowserConfig, + screenshot_dir: PathBuf, + runtime_config: &crate::config::RuntimeConfig, +) -> rig::tool::server::ToolServer { + let state = if let Some(shared) = runtime_config + .shared_browser + .as_ref() + .filter(|_| config.persist_session) + { + shared.clone() + } else { + Arc::new(Mutex::new(BrowserState::new())) + }; - let target_url = url.unwrap_or("about:blank"); - let page = browser - .new_page(target_url) - .await - .map_err(|error| BrowserError::new(format!("failed to create page: {error}")))?; + let secrets = runtime_config.secrets.load().as_ref().as_ref().cloned(); - let target_id = page_target_id(&page); - state.pages.insert(target_id.clone(), page); - state.active_target = Some(target_id.clone()); + let context = BrowserContext::new(state, config, screenshot_dir, secrets); - Ok(&state.pages[&target_id]) - } + server + .tool(BrowserLaunchTool { + context: context.clone(), + }) + .tool(BrowserNavigateTool { + context: context.clone(), + }) + .tool(BrowserSnapshotTool { + context: context.clone(), + }) + .tool(BrowserClickTool { + context: context.clone(), + }) + .tool(BrowserTypeTool { + context: context.clone(), + }) + .tool(BrowserPressKeyTool { + context: context.clone(), + }) + .tool(BrowserScreenshotTool { + context: context.clone(), + }) + .tool(BrowserEvaluateTool { + context: context.clone(), + }) + .tool(BrowserTabOpenTool { + context: context.clone(), + }) + .tool(BrowserTabListTool { + context: context.clone(), + }) + .tool(BrowserTabCloseTool { + context: context.clone(), + }) + .tool(BrowserCloseTool { context }) +} - /// Get the active page or return an error. - fn require_active_page<'a>( - &self, - state: &'a BrowserState, - ) -> Result<&'a chromiumoxide::Page, BrowserError> { - let target = state - .active_target - .as_ref() - .ok_or_else(|| BrowserError::new("no active tab — navigate or open a tab first"))?; +// Shared helpers - state - .pages - .get(target) - .ok_or_else(|| BrowserError::new("active tab no longer exists")) +/// Get the active page, or create a first one if the browser has no pages yet. +async fn get_or_create_page<'a>( + context: &BrowserContext, + state: &'a mut BrowserState, + url: Option<&str>, +) -> Result<&'a chromiumoxide::Page, BrowserError> { + if let Some(target) = state.active_target.as_ref() + && state.pages.contains_key(target) + { + return Ok(&state.pages[target]); } - /// Resolve an element ref (like "e3") to a chromiumoxide Element on the page. - async fn resolve_element_ref( - &self, - state: &BrowserState, - page: &chromiumoxide::Page, - element_ref: Option, - ) -> Result { - let Some(ref_id) = element_ref else { - return Err(BrowserError::new("element_ref is required for this action")); - }; + let browser = state + .browser + .as_ref() + .ok_or_else(|| BrowserError::new("browser not launched — call browser_launch first"))?; - let elem_ref = state.element_refs.get(&ref_id).ok_or_else(|| { - BrowserError::new(format!( - "unknown element ref '{ref_id}' — run snapshot first to get element refs" - )) - })?; + let target_url = url.unwrap_or("about:blank"); + let page = browser + .new_page(target_url) + .await + .map_err(|error| BrowserError::new(format!("failed to create page: {error}")))?; - // Use backend_node_id to find the element via CSS selector derived from role+name, - // or fall back to XPath with aria role and name attributes - let selector = build_selector_for_ref(elem_ref); + let target_id = page_target_id(&page); + state.pages.insert(target_id.clone(), page); + state.active_target = Some(target_id.clone()); - page.find_element(&selector).await.map_err(|error| { - BrowserError::new(format!( - "failed to find element for ref '{ref_id}' (selector: {selector}): {error}" - )) - }) - } + // Suppress the "unused variable" warning — we need `context` for the type + // signature to match the pattern used by the navigate tool. + let _ = context; + + Ok(&state.pages[&target_id]) } /// Dispatch a key press event to the page via CDP Input domain. @@ -1377,52 +2216,47 @@ async fn dispatch_key_press(page: &chromiumoxide::Page, key: &str) -> Result<(), Ok(()) } -/// Extract the string value from an AxValue option. -fn extract_ax_value_string( - ax_value: &Option, -) -> Option { - let val = ax_value.as_ref()?; - val.value - .as_ref() - .and_then(|v| v.as_str().map(|s| s.to_string())) +fn page_target_id(page: &chromiumoxide::Page) -> String { + page.target_id().inner().clone() } -/// Build a CSS selector from an ElementRef's role and name. -fn build_selector_for_ref(elem_ref: &ElementRef) -> String { - // Use ARIA role attribute as primary selector, with name for disambiguation - let role_selector = format!("[role='{}']", elem_ref.role); +/// Get the center coordinates of an element identified by `BackendNodeId`. +/// Uses `DOM.getBoxModel` to get the content box, then computes center. +async fn get_element_center( + page: &chromiumoxide::Page, + backend_node_id: BackendNodeId, +) -> Result<(f64, f64), BrowserError> { + let box_model = page + .execute(GetBoxModelParams { + backend_node_id: Some(backend_node_id), + ..Default::default() + }) + .await + .map_err(|error| { + BrowserError::new(format!( + "failed to get box model for element: {error}. \ + The element may not be visible — try scrolling or taking a screenshot." + )) + })?; - if let Some(name) = &elem_ref.name { - // Escape single quotes in the name for CSS selector safety - let escaped = name.replace('\'', "\\'"); - format!("{role_selector}[aria-label='{escaped}']") - } else { - role_selector + // The content quad is a flat array of 8 values: [x1,y1, x2,y2, x3,y3, x4,y4] + let quad = &box_model.result.model.content; + if quad.inner().len() < 8 { + return Err(BrowserError::new( + "element has no visible content box — it may be hidden or zero-sized", + )); } -} -/// Extract target ID string from a Page. -fn page_target_id(page: &chromiumoxide::Page) -> String { - page.target_id().inner().clone() -} + let points = quad.inner(); + let center_x = (points[0] + points[2] + points[4] + points[6]) / 4.0; + let center_y = (points[1] + points[3] + points[5] + points[7]) / 4.0; -/// Truncate a string for display, appending "..." if truncated. -fn truncate_for_display(text: &str, max_len: usize) -> String { - if text.len() <= max_len { - text.to_string() - } else { - format!("{}...", &text[..max_len]) - } + Ok((center_x, center_y)) } -/// Resolve the Chrome/Chromium executable path using a layered detection chain: -/// -/// 1. Explicit config override (`executable_path` in TOML) -/// 2. `CHROME` / `CHROME_PATH` environment variables -/// 3. chromiumoxide default detection (system PATH + well-known install paths) -/// 4. Auto-download via `BrowserFetcher` (cached in `chrome_cache_dir`) +// Chrome executable resolution + async fn resolve_chrome_executable(config: &BrowserConfig) -> Result { - // 1. Explicit config if let Some(path) = &config.executable_path { let path = PathBuf::from(path); if path.exists() { @@ -1435,19 +2269,16 @@ async fn resolve_chrome_executable(config: &BrowserConfig) -> Result Result Option { for variable in ["CHROME", "CHROME_PATH"] { if let Ok(value) = std::env::var(variable) { @@ -1468,8 +2298,6 @@ fn detect_chrome_from_env() -> Option { None } -/// Download Chromium using chromiumoxide's built-in fetcher. -/// The binary is cached in `cache_dir` and reused on subsequent launches. async fn fetch_chrome(cache_dir: &Path) -> Result { tokio::fs::create_dir_all(cache_dir) .await diff --git a/src/tools/file.rs b/src/tools/file.rs index 88fc62819..b69bb4e28 100644 --- a/src/tools/file.rs +++ b/src/tools/file.rs @@ -1,4 +1,8 @@ -//! File tool for reading/writing/listing files (task workers only). +//! File tools for reading, writing, editing, and listing files (task workers only). +//! +//! Provides a suite of separate tools (`file_read`, `file_write`, `file_edit`, +//! `file_list`) backed by a shared `FileContext` that handles sandbox-aware path +//! validation. This mirrors the flat-tool pattern used by the browser tools. use crate::sandbox::Sandbox; use rig::completion::ToolDefinition; @@ -8,20 +12,19 @@ use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; use std::sync::Arc; -/// Tool for file operations with sandbox-aware path validation. -/// -/// When sandbox mode is enabled, file access is restricted to the workspace -/// boundary. When sandbox is disabled, any path accessible to the process is -/// allowed (relative paths are still resolved against the workspace root). +// Shared context + +/// Shared context cloned into each file tool. Holds workspace root and sandbox +/// for path validation, mirroring how `BrowserContext` is shared across browser +/// tools. #[derive(Debug, Clone)] -pub struct FileTool { +pub(crate) struct FileContext { workspace: PathBuf, sandbox: Arc, } -impl FileTool { - /// Create a new file tool with sandbox-aware path validation. - pub fn new(workspace: PathBuf, sandbox: Arc) -> Self { +impl FileContext { + fn new(workspace: PathBuf, sandbox: Arc) -> Self { Self { workspace, sandbox } } @@ -86,6 +89,27 @@ impl FileTool { Ok(canonical) } + + /// Check whether writing to a path is blocked by identity file protection. + /// Only applies when sandbox is enabled. + fn check_identity_protection(&self, path: &Path) -> Result<(), FileError> { + if !self.sandbox.mode_enabled() { + return Ok(()); + } + let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + const PROTECTED_FILES: &[&str] = &["SOUL.md", "IDENTITY.md", "USER.md"]; + if PROTECTED_FILES + .iter() + .any(|f| file_name.eq_ignore_ascii_case(f)) + { + return Err(FileError( + "ACCESS DENIED: Identity files are protected and cannot be modified \ + through file operations. Use the identity management API instead." + .to_string(), + )); + } + Ok(()) + } } /// Canonicalize as much of the path as possible. For paths where the final @@ -118,30 +142,16 @@ fn best_effort_canonicalize(path: &Path) -> PathBuf { result } -/// Error type for file tool. +// Error type + +/// Error type for file tools. #[derive(Debug, thiserror::Error)] #[error("File operation failed: {0}")] pub struct FileError(String); -/// Arguments for file tool. -#[derive(Debug, Deserialize, JsonSchema)] -pub struct FileArgs { - /// The operation to perform. - pub operation: String, - /// The file or directory path. - pub path: String, - /// Content to write (required for write operation). - pub content: Option, - /// Whether to create parent directories if they don't exist (for write operations). - #[serde(default = "default_create_dirs")] - pub create_dirs: bool, -} - -fn default_create_dirs() -> bool { - true -} +// Shared output types -/// Output from file tool. +/// Output from file tools. #[derive(Debug, Serialize)] pub struct FileOutput { /// Whether the operation succeeded. @@ -151,10 +161,13 @@ pub struct FileOutput { /// The file/directory path. pub path: String, /// File content (for read operations). + #[serde(skip_serializing_if = "Option::is_none")] pub content: Option, /// Directory entries (for list operations). + #[serde(skip_serializing_if = "Option::is_none")] pub entries: Option>, /// Error message if operation failed. + #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, } @@ -169,128 +182,357 @@ pub struct FileEntryOutput { pub size: u64, } -impl Tool for FileTool { - const NAME: &'static str = "file"; +// Tool: file_read + +#[derive(Debug, Clone)] +pub struct FileReadTool { + context: FileContext, +} + +/// Arguments for file_read. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct FileReadArgs { + /// The file path. Relative paths are resolved from the workspace root. + pub path: String, + /// Line number to start reading from (1-indexed). Omit to start from the beginning. + pub offset: Option, + /// Maximum number of lines to return. Omit to read the entire file. + pub limit: Option, +} + +impl Tool for FileReadTool { + const NAME: &'static str = "file_read"; type Error = FileError; - type Args = FileArgs; + type Args = FileReadArgs; type Output = FileOutput; async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), - description: crate::prompts::text::get("tools/file").to_string(), + description: crate::prompts::text::get("tools/file_read").to_string(), parameters: serde_json::json!({ "type": "object", "properties": { - "operation": { + "path": { "type": "string", - "enum": ["read", "write", "list"], - "description": "The file operation to perform" + "description": "The file path to read. Relative paths are resolved from the workspace root." + }, + "offset": { + "type": "integer", + "description": "Line number to start reading from (1-indexed). Omit to start from the beginning." }, + "limit": { + "type": "integer", + "description": "Maximum number of lines to return. Omit to read the entire file (up to size limit)." + } + }, + "required": ["path"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let path = self.context.resolve_path(&args.path)?; + + let raw = tokio::fs::read_to_string(&path) + .await + .map_err(|error| FileError(format!("Failed to read file: {error}")))?; + + // Apply line-based offset/limit if requested + let content = if args.offset.is_some() || args.limit.is_some() { + let lines: Vec<&str> = raw.lines().collect(); + let total_lines = lines.len(); + + // offset is 1-indexed; default to line 1 + let start = args.offset.unwrap_or(1).saturating_sub(1).min(total_lines); + let end = if let Some(limit) = args.limit { + (start + limit).min(total_lines) + } else { + total_lines + }; + + let selected: Vec = lines[start..end] + .iter() + .enumerate() + .map(|(i, line)| format!("{}: {}", start + i + 1, line)) + .collect(); + + let mut result = selected.join("\n"); + if end < total_lines { + result.push_str(&format!( + "\n\n[showing lines {}-{} of {total_lines}. Use offset={} to see more]", + start + 1, + end, + end + 1 + )); + } + result + } else { + crate::tools::truncate_output(&raw, crate::tools::MAX_TOOL_OUTPUT_BYTES) + }; + + Ok(FileOutput { + success: true, + operation: "read".to_string(), + path: path.to_string_lossy().to_string(), + content: Some(content), + entries: None, + error: None, + }) + } +} + +// Tool: file_write + +#[derive(Debug, Clone)] +pub struct FileWriteTool { + context: FileContext, +} + +/// Arguments for file_write. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct FileWriteArgs { + /// The file path to write. Relative paths are resolved from the workspace root. + pub path: String, + /// The content to write to the file. + pub content: String, + /// Whether to create parent directories if they don't exist. Defaults to true. + #[serde(default = "default_create_dirs")] + pub create_dirs: bool, +} + +fn default_create_dirs() -> bool { + true +} + +impl Tool for FileWriteTool { + const NAME: &'static str = "file_write"; + + type Error = FileError; + type Args = FileWriteArgs; + type Output = FileOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: crate::prompts::text::get("tools/file_write").to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { "path": { "type": "string", - "description": "The file or directory path. Relative paths are resolved from the workspace root." + "description": "The file path to write. Relative paths are resolved from the workspace root." }, "content": { "type": "string", - "description": "Content to write to the file (required for write operation)" + "description": "The content to write to the file." }, "create_dirs": { "type": "boolean", "default": true, - "description": "For write operations: create parent directories if they don't exist" + "description": "Create parent directories if they don't exist." } }, - "required": ["operation", "path"] + "required": ["path", "content"] }), } } async fn call(&self, args: Self::Args) -> Result { - let path = self.resolve_path(&args.path)?; - - match args.operation.as_str() { - "read" => do_file_read(&path).await, - "write" => { - // When sandbox is enabled, identity files must go through the - // dedicated identity API to keep the update flow consistent. - // When sandbox is disabled, the user has opted into full access. - if self.sandbox.mode_enabled() { - let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); - const PROTECTED_FILES: &[&str] = &["SOUL.md", "IDENTITY.md", "USER.md"]; - if PROTECTED_FILES - .iter() - .any(|f| file_name.eq_ignore_ascii_case(f)) - { - return Err(FileError( - "ACCESS DENIED: Identity files are protected and cannot be modified \ - through file operations. Use the identity management API instead." - .to_string(), - )); - } - } + let path = self.context.resolve_path(&args.path)?; + self.context.check_identity_protection(&path)?; - let content = args.content.ok_or_else(|| { - FileError("Content is required for write operation".to_string()) - })?; - do_file_write(&path, content, args.create_dirs).await - } - "list" => do_file_list(&path).await, - _ => Err(FileError(format!("Unknown operation: {}", args.operation))), + // Ensure parent directory exists if requested + if args.create_dirs + && let Some(parent) = path.parent() + { + tokio::fs::create_dir_all(parent) + .await + .map_err(|error| FileError(format!("Failed to create directory: {error}")))?; } + + tokio::fs::write(&path, &args.content) + .await + .map_err(|error| FileError(format!("Failed to write file: {error}")))?; + + Ok(FileOutput { + success: true, + operation: "write".to_string(), + path: path.to_string_lossy().to_string(), + content: None, + entries: None, + error: None, + }) } } -async fn do_file_read(path: &Path) -> Result { - let raw = tokio::fs::read_to_string(path) - .await - .map_err(|e| FileError(format!("Failed to read file: {e}")))?; +// Tool: file_edit - let content = crate::tools::truncate_output(&raw, crate::tools::MAX_TOOL_OUTPUT_BYTES); +#[derive(Debug, Clone)] +pub struct FileEditTool { + context: FileContext, +} - Ok(FileOutput { - success: true, - operation: "read".to_string(), - path: path.to_string_lossy().to_string(), - content: Some(content), - entries: None, - error: None, - }) +/// Arguments for file_edit. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct FileEditArgs { + /// The file path to edit. Relative paths are resolved from the workspace root. + pub path: String, + /// The exact text to find in the file. + pub old_string: String, + /// The replacement text. + pub new_string: String, + /// Replace all occurrences instead of just the first. Defaults to false. + #[serde(default)] + pub replace_all: bool, } -async fn do_file_write( - path: &Path, - content: String, - create_dirs: bool, -) -> Result { - // Ensure parent directory exists if requested - if create_dirs && let Some(parent) = path.parent() { - tokio::fs::create_dir_all(parent) +impl Tool for FileEditTool { + const NAME: &'static str = "file_edit"; + + type Error = FileError; + type Args = FileEditArgs; + type Output = FileOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: crate::prompts::text::get("tools/file_edit").to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "The file path to edit. Relative paths are resolved from the workspace root." + }, + "old_string": { + "type": "string", + "description": "The exact text to find in the file." + }, + "new_string": { + "type": "string", + "description": "The replacement text." + }, + "replace_all": { + "type": "boolean", + "default": false, + "description": "Replace all occurrences instead of just the first." + } + }, + "required": ["path", "old_string", "new_string"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let path = self.context.resolve_path(&args.path)?; + self.context.check_identity_protection(&path)?; + + let original = tokio::fs::read_to_string(&path) .await - .map_err(|e| FileError(format!("Failed to create directory: {e}")))?; + .map_err(|error| FileError(format!("Failed to read file: {error}")))?; + + // Count occurrences to provide useful feedback + let match_count = original.matches(&args.old_string).count(); + + if match_count == 0 { + return Err(FileError(format!( + "old_string not found in {}. Make sure the text matches exactly, \ + including whitespace and indentation. Use file_read to verify \ + the current file contents.", + args.path + ))); + } + + if !args.replace_all && match_count > 1 { + return Err(FileError(format!( + "Found {match_count} matches for old_string in {}. Provide more \ + surrounding context in old_string to identify a unique match, \ + or set replace_all to true to replace every occurrence.", + args.path + ))); + } + + let updated = if args.replace_all { + original.replace(&args.old_string, &args.new_string) + } else { + original.replacen(&args.old_string, &args.new_string, 1) + }; + + tokio::fs::write(&path, &updated) + .await + .map_err(|error| FileError(format!("Failed to write file: {error}")))?; + + let replacements = if args.replace_all { match_count } else { 1 }; + + Ok(FileOutput { + success: true, + operation: "edit".to_string(), + path: path.to_string_lossy().to_string(), + content: Some(format!( + "Replaced {replacements} occurrence{} in {}", + if replacements != 1 { "s" } else { "" }, + args.path + )), + entries: None, + error: None, + }) } +} - tokio::fs::write(path, content) - .await - .map_err(|e| FileError(format!("Failed to write file: {e}")))?; +// Tool: file_list - Ok(FileOutput { - success: true, - operation: "write".to_string(), - path: path.to_string_lossy().to_string(), - content: None, - entries: None, - error: None, - }) +#[derive(Debug, Clone)] +pub struct FileListTool { + context: FileContext, +} + +/// Arguments for file_list. +#[derive(Debug, Deserialize, JsonSchema)] +pub struct FileListArgs { + /// The directory path to list. Relative paths are resolved from the workspace root. + pub path: String, +} + +impl Tool for FileListTool { + const NAME: &'static str = "file_list"; + + type Error = FileError; + type Args = FileListArgs; + type Output = FileOutput; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: crate::prompts::text::get("tools/file_list").to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "The directory path to list. Relative paths are resolved from the workspace root." + } + }, + "required": ["path"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let path = self.context.resolve_path(&args.path)?; + do_file_list(&path).await + } } +// Internal helpers + async fn do_file_list(path: &Path) -> Result { let mut entries = Vec::new(); let mut reader = tokio::fs::read_dir(path) .await - .map_err(|e| FileError(format!("Failed to read directory: {e}")))?; + .map_err(|error| FileError(format!("Failed to read directory: {error}")))?; let max_entries = crate::tools::MAX_DIR_ENTRIES; let mut total_count = 0usize; @@ -298,7 +540,7 @@ async fn do_file_list(path: &Path) -> Result { while let Some(entry) = reader .next_entry() .await - .map_err(|e| FileError(format!("Failed to read entry: {e}")))? + .map_err(|error| FileError(format!("Failed to read entry: {error}")))? { total_count += 1; @@ -306,7 +548,7 @@ async fn do_file_list(path: &Path) -> Result { let metadata = entry .metadata() .await - .map_err(|e| FileError(format!("Failed to read metadata: {e}")))?; + .map_err(|error| FileError(format!("Failed to read metadata: {error}")))?; let entry_type = if metadata.is_file() { "file".to_string() @@ -356,6 +598,32 @@ async fn do_file_list(path: &Path) -> Result { }) } +// Tool registration helper + +/// Register all file tools on a `ToolServer`. The tools share a single +/// `FileContext` for path validation and sandbox enforcement. +pub fn register_file_tools( + server: rig::tool::server::ToolServer, + workspace: PathBuf, + sandbox: Arc, +) -> rig::tool::server::ToolServer { + let context = FileContext::new(workspace, sandbox); + + server + .tool(FileReadTool { + context: context.clone(), + }) + .tool(FileWriteTool { + context: context.clone(), + }) + .tool(FileEditTool { + context: context.clone(), + }) + .tool(FileListTool { context }) +} + +// Legacy types (used by system-internal callers) + /// File entry metadata (legacy). #[derive(Debug, Clone)] pub struct FileEntry { @@ -372,34 +640,41 @@ pub enum FileType { Other, } -/// System-internal file operations that bypass workspace containment. -/// These are used by the system itself (not LLM-facing) and operate on -/// arbitrary paths. +/// System-internal file read that bypasses workspace containment. +/// Used by the system itself (not LLM-facing) and operates on arbitrary paths. pub async fn file_read(path: impl AsRef) -> crate::error::Result { - do_file_read(path.as_ref()) + let raw = tokio::fs::read_to_string(path.as_ref()) .await - .map(|output| output.content.unwrap_or_default()) - .map_err(|e| crate::error::AgentError::Other(e.into()).into()) + .map_err(|error| { + crate::error::AgentError::Other(anyhow::anyhow!("Failed to read file: {error}")) + })?; + + let content = crate::tools::truncate_output(&raw, crate::tools::MAX_TOOL_OUTPUT_BYTES); + Ok(content) } +/// System-internal file write that bypasses workspace containment. pub async fn file_write( path: impl AsRef, content: impl AsRef<[u8]>, ) -> crate::error::Result<()> { - do_file_write( - path.as_ref(), - String::from_utf8_lossy(content.as_ref()).to_string(), - true, - ) - .await - .map(|_| ()) - .map_err(|e| crate::error::AgentError::Other(e.into()).into()) + let path = path.as_ref(); + if let Some(parent) = path.parent() { + tokio::fs::create_dir_all(parent).await.map_err(|error| { + crate::error::AgentError::Other(anyhow::anyhow!("Failed to create directory: {error}")) + })?; + } + tokio::fs::write(path, content).await.map_err(|error| { + crate::error::AgentError::Other(anyhow::anyhow!("Failed to write file: {error}")) + })?; + Ok(()) } +/// System-internal directory list that bypasses workspace containment. pub async fn file_list(path: impl AsRef) -> crate::error::Result> { let output = do_file_list(path.as_ref()) .await - .map_err(|e| crate::error::AgentError::Other(e.into()))?; + .map_err(|error| crate::error::AgentError::Other(error.into()))?; let entries = output.entries.ok_or_else(|| { crate::error::AgentError::Other(anyhow::anyhow!("No entries in list result")) @@ -407,18 +682,20 @@ pub async fn file_list(path: impl AsRef) -> crate::error::Result FileType::File, "directory" => FileType::Directory, _ => FileType::Other, }, - size: e.size, + size: entry.size, }) .collect()) } +// Tests + #[cfg(test)] mod tests { use super::*; @@ -434,6 +711,11 @@ mod tests { Arc::new(Sandbox::new_for_test(config, workspace.to_path_buf())) } + fn make_context(mode: SandboxMode, workspace: &Path) -> FileContext { + let sandbox = create_sandbox(mode, workspace); + FileContext::new(workspace.to_path_buf(), sandbox) + } + #[tokio::test] async fn sandbox_enabled_rejects_read_outside_workspace() { let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); @@ -445,15 +727,16 @@ mod tests { let file = outside.join("secret.txt"); fs::write(&file, "secret data").expect("failed to write file"); - let sandbox = create_sandbox(SandboxMode::Enabled, &workspace); - let tool = FileTool::new(workspace, sandbox); + let context = make_context(SandboxMode::Enabled, &workspace); + let tool = FileReadTool { + context: context.clone(), + }; let result = tool - .call(FileArgs { - operation: "read".to_string(), + .call(FileReadArgs { path: file.to_string_lossy().into_owned(), - content: None, - create_dirs: false, + offset: None, + limit: None, }) .await; @@ -474,15 +757,14 @@ mod tests { let file = outside.join("report.txt"); fs::write(&file, "public data").expect("failed to write file"); - let sandbox = create_sandbox(SandboxMode::Disabled, &workspace); - let tool = FileTool::new(workspace, sandbox); + let context = make_context(SandboxMode::Disabled, &workspace); + let tool = FileReadTool { context }; let result = tool - .call(FileArgs { - operation: "read".to_string(), + .call(FileReadArgs { path: file.to_string_lossy().into_owned(), - content: None, - create_dirs: false, + offset: None, + limit: None, }) .await .expect("should succeed when sandbox is disabled"); @@ -501,14 +783,13 @@ mod tests { let file = outside.join("output.txt"); - let sandbox = create_sandbox(SandboxMode::Disabled, &workspace); - let tool = FileTool::new(workspace, sandbox); + let context = make_context(SandboxMode::Disabled, &workspace); + let tool = FileWriteTool { context }; let result = tool - .call(FileArgs { - operation: "write".to_string(), + .call(FileWriteArgs { path: file.to_string_lossy().into_owned(), - content: Some("written outside workspace".to_string()), + content: "written outside workspace".to_string(), create_dirs: false, }) .await @@ -525,14 +806,15 @@ mod tests { let workspace = temp_dir.path().join("workspace"); fs::create_dir_all(&workspace).expect("failed to create workspace"); - let sandbox = create_sandbox(SandboxMode::Enabled, &workspace); - let tool = FileTool::new(workspace.clone(), sandbox); + let context = make_context(SandboxMode::Enabled, &workspace); + let tool = FileWriteTool { + context: context.clone(), + }; let result = tool - .call(FileArgs { - operation: "write".to_string(), + .call(FileWriteArgs { path: workspace.join("SOUL.md").to_string_lossy().into_owned(), - content: Some("overwritten".to_string()), + content: "overwritten".to_string(), create_dirs: false, }) .await; @@ -552,14 +834,13 @@ mod tests { let workspace = temp_dir.path().join("workspace"); fs::create_dir_all(&workspace).expect("failed to create workspace"); - let sandbox = create_sandbox(SandboxMode::Disabled, &workspace); - let tool = FileTool::new(workspace.clone(), sandbox); + let context = make_context(SandboxMode::Disabled, &workspace); + let tool = FileWriteTool { context }; let result = tool - .call(FileArgs { - operation: "write".to_string(), + .call(FileWriteArgs { path: workspace.join("IDENTITY.md").to_string_lossy().into_owned(), - content: Some("new identity".to_string()), + content: "new identity".to_string(), create_dirs: false, }) .await @@ -570,4 +851,177 @@ mod tests { fs::read_to_string(workspace.join("IDENTITY.md")).expect("failed to read file"); assert_eq!(written, "new identity"); } + + #[tokio::test] + async fn file_edit_replaces_single_occurrence() { + let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); + let workspace = temp_dir.path().join("workspace"); + fs::create_dir_all(&workspace).expect("failed to create workspace"); + + let file = workspace.join("test.txt"); + fs::write(&file, "hello world").expect("failed to write file"); + + let context = make_context(SandboxMode::Disabled, &workspace); + let tool = FileEditTool { context }; + + let result = tool + .call(FileEditArgs { + path: file.to_string_lossy().into_owned(), + old_string: "hello".to_string(), + new_string: "goodbye".to_string(), + replace_all: false, + }) + .await + .expect("edit should succeed"); + + assert!(result.success); + let content = fs::read_to_string(&file).expect("failed to read file"); + assert_eq!(content, "goodbye world"); + } + + #[tokio::test] + async fn file_edit_rejects_ambiguous_match() { + let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); + let workspace = temp_dir.path().join("workspace"); + fs::create_dir_all(&workspace).expect("failed to create workspace"); + + let file = workspace.join("test.txt"); + fs::write(&file, "aaa bbb aaa").expect("failed to write file"); + + let context = make_context(SandboxMode::Disabled, &workspace); + let tool = FileEditTool { context }; + + let result = tool + .call(FileEditArgs { + path: file.to_string_lossy().into_owned(), + old_string: "aaa".to_string(), + new_string: "ccc".to_string(), + replace_all: false, + }) + .await; + + let error = result + .expect_err("should reject ambiguous match") + .to_string(); + assert!( + error.contains("Found 2 matches"), + "unexpected error: {error}" + ); + } + + #[tokio::test] + async fn file_edit_replace_all() { + let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); + let workspace = temp_dir.path().join("workspace"); + fs::create_dir_all(&workspace).expect("failed to create workspace"); + + let file = workspace.join("test.txt"); + fs::write(&file, "aaa bbb aaa").expect("failed to write file"); + + let context = make_context(SandboxMode::Disabled, &workspace); + let tool = FileEditTool { context }; + + let result = tool + .call(FileEditArgs { + path: file.to_string_lossy().into_owned(), + old_string: "aaa".to_string(), + new_string: "ccc".to_string(), + replace_all: true, + }) + .await + .expect("replace_all should succeed"); + + assert!(result.success); + let content = fs::read_to_string(&file).expect("failed to read file"); + assert_eq!(content, "ccc bbb ccc"); + } + + #[tokio::test] + async fn file_edit_not_found() { + let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); + let workspace = temp_dir.path().join("workspace"); + fs::create_dir_all(&workspace).expect("failed to create workspace"); + + let file = workspace.join("test.txt"); + fs::write(&file, "hello world").expect("failed to write file"); + + let context = make_context(SandboxMode::Disabled, &workspace); + let tool = FileEditTool { context }; + + let result = tool + .call(FileEditArgs { + path: file.to_string_lossy().into_owned(), + old_string: "nonexistent".to_string(), + new_string: "replacement".to_string(), + replace_all: false, + }) + .await; + + let error = result.expect_err("should fail when not found").to_string(); + assert!( + error.contains("old_string not found"), + "unexpected error: {error}" + ); + } + + #[tokio::test] + async fn file_edit_blocks_identity_file() { + let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); + let workspace = temp_dir.path().join("workspace"); + fs::create_dir_all(&workspace).expect("failed to create workspace"); + fs::write(workspace.join("SOUL.md"), "original").expect("failed to write file"); + + let context = make_context(SandboxMode::Enabled, &workspace); + let tool = FileEditTool { context }; + + let result = tool + .call(FileEditArgs { + path: workspace.join("SOUL.md").to_string_lossy().into_owned(), + old_string: "original".to_string(), + new_string: "hacked".to_string(), + replace_all: false, + }) + .await; + + let error = result + .expect_err("should block identity file edit") + .to_string(); + assert!( + error.contains("Identity files are protected"), + "unexpected error: {error}" + ); + } + + #[tokio::test] + async fn file_read_with_offset_and_limit() { + let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); + let workspace = temp_dir.path().join("workspace"); + fs::create_dir_all(&workspace).expect("failed to create workspace"); + + let file = workspace.join("lines.txt"); + fs::write(&file, "line1\nline2\nline3\nline4\nline5").expect("failed to write file"); + + let context = make_context(SandboxMode::Disabled, &workspace); + let tool = FileReadTool { context }; + + let result = tool + .call(FileReadArgs { + path: file.to_string_lossy().into_owned(), + offset: Some(2), + limit: Some(2), + }) + .await + .expect("read with offset should succeed"); + + assert!(result.success); + let content = result.content.unwrap(); + assert!(content.contains("2: line2"), "should contain line 2"); + assert!(content.contains("3: line3"), "should contain line 3"); + assert!(!content.contains("line1"), "should not contain line 1"); + assert!(!content.contains("line4"), "should not contain line 4"); + assert!( + content.contains("showing lines 2-3 of 5"), + "should have continuation notice" + ); + } } diff --git a/src/tools/set_status.rs b/src/tools/set_status.rs index 8ad89b35d..ee2e0f092 100644 --- a/src/tools/set_status.rs +++ b/src/tools/set_status.rs @@ -54,6 +54,12 @@ pub struct SetStatusError(String); /// way the worker can describe). Workers **must** emit an `outcome` status /// before finishing; the system will nudge them back to work if they try to /// stop without one. +/// +/// NOTE: The outcome gate only checks *whether* an outcome was signaled, not +/// *whether all task steps are actually complete*. Premature outcome signaling +/// (e.g. after 2 of 7 steps) is handled via prompt-level instructions, not +/// structural enforcement. See the worker prompt for the anti-premature-exit +/// language. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum StatusKind { @@ -110,7 +116,7 @@ impl Tool for SetStatusTool { "type": "string", "enum": ["progress", "outcome"], "default": "progress", - "description": "Use \"progress\" for intermediate updates. Use \"outcome\" when the task has reached a terminal result (success or failure) and you are ready to finish." + "description": "Use \"progress\" for intermediate updates. Use \"outcome\" ONLY when ALL steps of the task have reached a terminal result (success or failure) and you are ready to finish. Do not signal outcome if there are remaining steps — premature outcome signaling causes the task to be incorrectly reported as complete." } }, "required": ["status"] diff --git a/src/tools/spawn_worker.rs b/src/tools/spawn_worker.rs index 31c8ad32d..969bc1975 100644 --- a/src/tools/spawn_worker.rs +++ b/src/tools/spawn_worker.rs @@ -87,7 +87,14 @@ impl Tool for SpawnWorkerTool { let web_search_enabled = rc.brave_search_key.load().is_some(); let opencode_enabled = rc.opencode.load().enabled; - let mut tools_list = vec!["shell", "file", "exec"]; + let mut tools_list = vec![ + "shell", + "file_read", + "file_write", + "file_edit", + "file_list", + "exec", + ]; if browser_enabled { tools_list.push("browser"); }