diff --git a/README.md b/README.md index 2b0f956..3daf0db 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c - **`temperature`** (number): Controls randomness (passed to the engine). - **`max_tokens`** (number): Limits the length of the generated response. - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`). +- **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients. +- **`browseros_mode`** (boolean): Optional strict mode toggle for BrowserOS-like agentic clients. When tools are provided, this mode is **enabled by default** unless you explicitly set `browseros_mode: false`. ## Quick Start @@ -70,6 +72,18 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \ - **Port**: Set via `PORT` environment variable (defaults to 8080). - **Models**: The proxy automatically queries your local Codex installation for available model slugs. +### BrowserOS Configuration + +If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, verify: + +- you are sending `tools` in your `/v1/chat/completions` request body (this auto-enables BrowserOS strict mode) +- optionally set `browseros_mode: true` explicitly for clarity +- keep sending `tool_choice` when your client supports it + +To disable strict BrowserOS behavior for non-agentic use-cases, set `browseros_mode: false`. + +This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side. + ## Architecture This project uses a typed `CodexClient` that manages a persistent `codex app-server` background process. Communication happens over a high-speed JSON-RPC channel on `stdio`, ensuring that the model state remains warm and ready for immediate inference. diff --git a/src/codex-client.ts b/src/codex-client.ts index 8c4fff0..6e400b9 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -121,7 +121,12 @@ export class CodexClient { async *chatCompletionStream( messages: Message[], - options: { model: string; tools?: any[]; tool_choice?: any }, + options: { + model: string; + tools?: any[]; + tool_choice?: any; + browseros_mode?: boolean; + }, ): AsyncGenerator { const hasTools = options.tools && options.tools.length > 0; @@ -151,6 +156,19 @@ export class CodexClient { baseInstructions = (baseInstructions || "") + toolBlock; } + if (hasTools && options.browseros_mode) { + const browserOSToolModeInstructions = + `\n\n## BrowserOS Tool Execution Mode\n\n` + + `You are running as a tool-calling planner inside BrowserOS. ` + + `You can and must control the browser by emitting tool calls. ` + + `Do not claim you cannot access or control the browser/environment. ` + + `If the request is actionable with available tools, your response MUST include at least one block. ` + + `Prefer tool-call-only output for action steps. ` + + `For shopping workflows, adding products to cart is permitted; avoid checkout/payment unless explicitly requested. ` + + `If an action needs multiple steps, emit only the next required tool call(s) for the current step.\n`; + baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions; + } + // --- Format conversation messages into prompt --- let fullPrompt = ""; for (const msg of nonSystemMessages) { @@ -276,6 +294,9 @@ export class CodexClient { } eventQueue.push({ type: "tool_calls", calls: toolCalls }); } else { + console.warn( + `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`, + ); // No tool calls found, emit as plain message eventQueue.push({ type: "message", text: accumulatedText }); } diff --git a/src/codex.ts b/src/codex.ts index fcfe9c7..bf74e12 100644 --- a/src/codex.ts +++ b/src/codex.ts @@ -38,6 +38,7 @@ export interface CodexOptions { signal?: AbortSignal; tools?: any[]; tool_choice?: any; + browseros_mode?: boolean; } export interface ParsedToolCall { @@ -74,27 +75,77 @@ export type CodexStreamEvent = */ export function parseToolCalls(text: string): ParsedToolCall[] { const calls: ParsedToolCall[] = []; - const regex = /([\s\S]*?)<\/tool_call>/g; - let match; + const seen = new Set(); let callIndex = 0; - while ((match = regex.exec(text)) !== null) { + + const pushCall = (raw: any) => { + const name = raw?.name || raw?.toolName || raw?.function?.name || ""; + const argsRaw = + raw?.arguments ?? raw?.input ?? raw?.parameters ?? raw?.function?.arguments; + if (!name) return; + const args = + typeof argsRaw === "string" + ? argsRaw + : JSON.stringify(argsRaw ?? {}); + const key = `${name}::${args}`; + if (seen.has(key)) return; + seen.add(key); + calls.push({ + id: `call_${Date.now()}_${callIndex++}`, + type: "function", + function: { + name, + arguments: args, + }, + }); + }; + + // Format 1: explicit ... blocks. + const taggedRegex = /([\s\S]*?)<\/tool_call>/g; + let match; + while ((match = taggedRegex.exec(text)) !== null) { + try { + pushCall(JSON.parse(match[1].trim())); + } catch { + // Ignore malformed block. + } + } + + // Format 2: JSON fenced blocks that contain a single call, call list, or tool_calls. + const fencedJsonRegex = /```(?:json)?\s*([\s\S]*?)```/g; + while ((match = fencedJsonRegex.exec(text)) !== null) { + const candidate = match[1].trim(); try { - const parsed = JSON.parse(match[1].trim()); - calls.push({ - id: `call_${Date.now()}_${callIndex++}`, - type: "function", - function: { - name: parsed.name || parsed.function?.name || "", - arguments: - typeof parsed.arguments === "string" - ? parsed.arguments - : JSON.stringify(parsed.arguments ?? parsed.parameters ?? {}), - }, - }); + const parsed = JSON.parse(candidate); + if (Array.isArray(parsed)) { + for (const item of parsed) pushCall(item); + } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) { + for (const item of parsed.tool_calls) pushCall(item); + } else { + pushCall(parsed); + } + } catch { + // Not valid JSON; ignore. + } + } + + // Format 3: whole response is a JSON object/array describing tool calls. + const trimmed = text.trim(); + if (trimmed.startsWith("{") || trimmed.startsWith("[")) { + try { + const parsed = JSON.parse(trimmed); + if (Array.isArray(parsed)) { + for (const item of parsed) pushCall(item); + } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) { + for (const item of parsed.tool_calls) pushCall(item); + } else { + pushCall(parsed); + } } catch { - // Skip malformed tool calls + // Not parseable as JSON; ignore. } } + return calls; } @@ -104,7 +155,19 @@ export function parseToolCalls(text: string): ParsedToolCall[] { * are available and the expected output format. */ export function buildToolInstructions(tools: any[], tool_choice?: any): string { - let block = `\n\n## Available Tools\n\nYou have access to the following tools to perform actions. You MUST use these tools to fulfill the user's request. Do NOT describe steps or give instructions — instead, call the appropriate tool.\n\nTo call a tool, output one or more tool calls in this exact format (you may output multiple for parallel execution):\n{"name": "tool_name", "arguments": {"param": "value"}}\n\nIMPORTANT RULES:\n- ALWAYS use tool calls to act. NEVER respond with step-by-step instructions when a tool can do the job.\n- You can call multiple tools in a single response.\n- After a tool call, wait for the result before proceeding.\n- If the user asks you to navigate somewhere, use the navigate tool. If they ask you to click, use the click tool. Etc.\n\nHere are the tools:\n\n`; + let block = + `\n\n## Available Tools\n\n` + + `You are an agentic planner operating through external tools. ` + + `When tools are available, your next action MUST be emitted as tool calls, not prose refusals.\n\n` + + `Tool call output format (required):\n` + + `{"name": "tool_name", "arguments": {"param": "value"}}\n\n` + + `IMPORTANT RULES:\n` + + `- If a user request is actionable with provided tools, emit one or more blocks.\n` + + `- Do not say you cannot access the browser/environment when browser tools are provided.\n` + + `- Keep normal text minimal. Prefer tool-call-only responses for action steps.\n` + + `- After tool results are returned, emit the next tool call(s) needed to continue.\n` + + `- For commerce tasks, adding an item to cart is allowed; do not attempt checkout/payment unless user explicitly requests it.\n\n` + + `Here are the tools:\n\n`; for (const tool of tools) { if (tool.type === "function" && tool.function) { @@ -115,6 +178,16 @@ export function buildToolInstructions(tools: any[], tool_choice?: any): string { block += `Parameters: ${JSON.stringify(fn.parameters)}\n`; } block += `\n`; + } else if (tool?.name) { + // Support alternate tool schemas used by some providers/agents. + block += `### ${tool.name}\n`; + if (tool.description) block += `${tool.description}\n`; + if (tool.input_schema) { + block += `Parameters: ${JSON.stringify(tool.input_schema)}\n`; + } else if (tool.parameters) { + block += `Parameters: ${JSON.stringify(tool.parameters)}\n`; + } + block += `\n`; } } @@ -141,5 +214,6 @@ export async function* execCodexStream( model: options.model, tools: options.tools, tool_choice: options.tool_choice, + browseros_mode: options.browseros_mode, }); } diff --git a/src/index.ts b/src/index.ts index f4bd520..d9d6d57 100644 --- a/src/index.ts +++ b/src/index.ts @@ -56,8 +56,13 @@ Bun.serve({ const temperature = body.temperature; const max_tokens = body.max_tokens; const reasoning_effort = body.reasoning_effort; - const tools = body.tools; - const tool_choice = body.tool_choice; + const tools = Array.isArray(body.tools) ? body.tools : undefined; + // Default to BrowserOS-style strict tool mode whenever tools are supplied, + // unless callers explicitly disable it with browseros_mode: false. + const browseros_mode = + tools && tools.length > 0 ? body.browseros_mode !== false : false; + const tool_choice = + body.tool_choice ?? (browseros_mode ? "required" : undefined); const stream = body.stream === true; @@ -69,6 +74,16 @@ Bun.serve({ if (tools) { console.log(`[Proxy] Tools count: ${tools.length}`); } + if (tools && tools.length > 0) { + console.log( + `[Proxy] BrowserOS mode: ${browseros_mode ? "enabled" : "disabled"}`, + ); + if (body.browseros_mode === undefined && browseros_mode) { + console.log( + `[Proxy] BrowserOS mode auto-enabled because tools were provided`, + ); + } + } if (stream) { const responseId = `chatcmpl-${Date.now()}`; @@ -88,6 +103,7 @@ Bun.serve({ signal: req.signal, tools, tool_choice, + browseros_mode, })) { if (req.signal.aborted) break; @@ -269,6 +285,7 @@ Bun.serve({ signal: req.signal, tools, tool_choice, + browseros_mode, })) { if (req.signal.aborted) break; if (event.type === "message") {