From 1476e2463cfbad30ecc2b79608efe3c735f5b8c0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Mar 2026 13:54:08 +0000 Subject: [PATCH 1/4] feat: add browseros tool-calling mode Agent-Logs-Url: https://github.com/copsys/codex-app-proxy/sessions/41e79ae7-148a-4f6d-970c-4f5ad05386fd Co-authored-by: copsys <31281180+copsys@users.noreply.github.com> --- README.md | 11 +++++++++++ src/codex-client.ts | 18 +++++++++++++++++- src/codex.ts | 2 ++ src/index.ts | 6 ++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2b0f956..918bfd6 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c - **`temperature`** (number): Controls randomness (passed to the engine). - **`max_tokens`** (number): Limits the length of the generated response. - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`). +- **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients. +- **`browseros_mode`** (boolean): Optional mode for BrowserOS-like agentic clients. When `true` (and tools are provided), the proxy adds stronger tool-execution instructions so the model emits tool calls instead of environment-limitation refusals. ## Quick Start @@ -70,6 +72,15 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \ - **Port**: Set via `PORT` environment variable (defaults to 8080). - **Models**: The proxy automatically queries your local Codex installation for available model slugs. +### BrowserOS Configuration + +If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, set: + +- `browseros_mode: true` in your `/v1/chat/completions` request body +- keep sending `tools` and (optionally) `tool_choice` + +This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side. + ## Architecture This project uses a typed `CodexClient` that manages a persistent `codex app-server` background process. Communication happens over a high-speed JSON-RPC channel on `stdio`, ensuring that the model state remains warm and ready for immediate inference. diff --git a/src/codex-client.ts b/src/codex-client.ts index 8c4fff0..21ce40a 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -121,7 +121,12 @@ export class CodexClient { async *chatCompletionStream( messages: Message[], - options: { model: string; tools?: any[]; tool_choice?: any }, + options: { + model: string; + tools?: any[]; + tool_choice?: any; + browseros_mode?: boolean; + }, ): AsyncGenerator { const hasTools = options.tools && options.tools.length > 0; @@ -151,6 +156,17 @@ export class CodexClient { baseInstructions = (baseInstructions || "") + toolBlock; } + if (hasTools && options.browseros_mode) { + const browserOsToolModeInstructions = + `\n\n## BrowserOS Tool Execution Mode\n\n` + + `You are running as a tool-calling planner inside BrowserOS. ` + + `You can and must control the browser by emitting tool calls. ` + + `Do not claim you cannot access or control the browser/environment. ` + + `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` + + `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`; + baseInstructions = (baseInstructions || "") + browserOsToolModeInstructions; + } + // --- Format conversation messages into prompt --- let fullPrompt = ""; for (const msg of nonSystemMessages) { diff --git a/src/codex.ts b/src/codex.ts index fcfe9c7..9528fca 100644 --- a/src/codex.ts +++ b/src/codex.ts @@ -38,6 +38,7 @@ export interface CodexOptions { signal?: AbortSignal; tools?: any[]; tool_choice?: any; + browseros_mode?: boolean; } export interface ParsedToolCall { @@ -141,5 +142,6 @@ export async function* execCodexStream( model: options.model, tools: options.tools, tool_choice: options.tool_choice, + browseros_mode: options.browseros_mode, }); } diff --git a/src/index.ts b/src/index.ts index f4bd520..2d0b979 100644 --- a/src/index.ts +++ b/src/index.ts @@ -58,6 +58,7 @@ Bun.serve({ const reasoning_effort = body.reasoning_effort; const tools = body.tools; const tool_choice = body.tool_choice; + const browseros_mode = body.browseros_mode === true; const stream = body.stream === true; @@ -69,6 +70,9 @@ Bun.serve({ if (tools) { console.log(`[Proxy] Tools count: ${tools.length}`); } + if (browseros_mode) { + console.log(`[Proxy] BrowserOS mode enabled`); + } if (stream) { const responseId = `chatcmpl-${Date.now()}`; @@ -88,6 +92,7 @@ Bun.serve({ signal: req.signal, tools, tool_choice, + browseros_mode, })) { if (req.signal.aborted) break; @@ -269,6 +274,7 @@ Bun.serve({ signal: req.signal, tools, tool_choice, + browseros_mode, })) { if (req.signal.aborted) break; if (event.type === "message") { From 27ad1519e7d21f95e5f4ddfa1a1d03ff3304a3fd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Mar 2026 13:54:31 +0000 Subject: [PATCH 2/4] chore: address code review naming feedback Agent-Logs-Url: https://github.com/copsys/codex-app-proxy/sessions/41e79ae7-148a-4f6d-970c-4f5ad05386fd Co-authored-by: copsys <31281180+copsys@users.noreply.github.com> --- src/codex-client.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codex-client.ts b/src/codex-client.ts index 21ce40a..dbf917c 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -157,14 +157,14 @@ export class CodexClient { } if (hasTools && options.browseros_mode) { - const browserOsToolModeInstructions = + const browserOSToolModeInstructions = `\n\n## BrowserOS Tool Execution Mode\n\n` + `You are running as a tool-calling planner inside BrowserOS. ` + `You can and must control the browser by emitting tool calls. ` + `Do not claim you cannot access or control the browser/environment. ` + `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` + `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`; - baseInstructions = (baseInstructions || "") + browserOsToolModeInstructions; + baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions; } // --- Format conversation messages into prompt --- From 5be8919dda83ee2d238178c92c370b4466903d2e Mon Sep 17 00:00:00 2001 From: Chetan Khobragade <31281180+copsys@users.noreply.github.com> Date: Sat, 28 Mar 2026 15:13:14 +0000 Subject: [PATCH 3/4] new feature --- README.md | 11 +++++++---- src/codex-client.ts | 6 ++++-- src/codex.ts | 24 +++++++++++++++++++++++- src/index.ts | 18 ++++++++++++++---- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 918bfd6..3daf0db 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c - **`max_tokens`** (number): Limits the length of the generated response. - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`). - **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients. -- **`browseros_mode`** (boolean): Optional mode for BrowserOS-like agentic clients. When `true` (and tools are provided), the proxy adds stronger tool-execution instructions so the model emits tool calls instead of environment-limitation refusals. +- **`browseros_mode`** (boolean): Optional strict mode toggle for BrowserOS-like agentic clients. When tools are provided, this mode is **enabled by default** unless you explicitly set `browseros_mode: false`. ## Quick Start @@ -74,10 +74,13 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \ ### BrowserOS Configuration -If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, set: +If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, verify: -- `browseros_mode: true` in your `/v1/chat/completions` request body -- keep sending `tools` and (optionally) `tool_choice` +- you are sending `tools` in your `/v1/chat/completions` request body (this auto-enables BrowserOS strict mode) +- optionally set `browseros_mode: true` explicitly for clarity +- keep sending `tool_choice` when your client supports it + +To disable strict BrowserOS behavior for non-agentic use-cases, set `browseros_mode: false`. This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side. diff --git a/src/codex-client.ts b/src/codex-client.ts index dbf917c..636f830 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -162,8 +162,10 @@ export class CodexClient { `You are running as a tool-calling planner inside BrowserOS. ` + `You can and must control the browser by emitting tool calls. ` + `Do not claim you cannot access or control the browser/environment. ` + - `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` + - `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`; + `If the request is actionable with available tools, your response MUST include at least one block. ` + + `Prefer tool-call-only output for action steps. ` + + `For shopping workflows, adding products to cart is permitted; avoid checkout/payment unless explicitly requested. ` + + `If an action needs multiple steps, emit only the next required tool call(s) for the current step.\n`; baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions; } diff --git a/src/codex.ts b/src/codex.ts index 9528fca..caa23c3 100644 --- a/src/codex.ts +++ b/src/codex.ts @@ -105,7 +105,19 @@ export function parseToolCalls(text: string): ParsedToolCall[] { * are available and the expected output format. */ export function buildToolInstructions(tools: any[], tool_choice?: any): string { - let block = `\n\n## Available Tools\n\nYou have access to the following tools to perform actions. You MUST use these tools to fulfill the user's request. Do NOT describe steps or give instructions — instead, call the appropriate tool.\n\nTo call a tool, output one or more tool calls in this exact format (you may output multiple for parallel execution):\n{"name": "tool_name", "arguments": {"param": "value"}}\n\nIMPORTANT RULES:\n- ALWAYS use tool calls to act. NEVER respond with step-by-step instructions when a tool can do the job.\n- You can call multiple tools in a single response.\n- After a tool call, wait for the result before proceeding.\n- If the user asks you to navigate somewhere, use the navigate tool. If they ask you to click, use the click tool. Etc.\n\nHere are the tools:\n\n`; + let block = + `\n\n## Available Tools\n\n` + + `You are an agentic planner operating through external tools. ` + + `When tools are available, your next action MUST be emitted as tool calls, not prose refusals.\n\n` + + `Tool call output format (required):\n` + + `{"name": "tool_name", "arguments": {"param": "value"}}\n\n` + + `IMPORTANT RULES:\n` + + `- If a user request is actionable with provided tools, emit one or more blocks.\n` + + `- Do not say you cannot access the browser/environment when browser tools are provided.\n` + + `- Keep normal text minimal. Prefer tool-call-only responses for action steps.\n` + + `- After tool results are returned, emit the next tool call(s) needed to continue.\n` + + `- For commerce tasks, adding an item to cart is allowed; do not attempt checkout/payment unless user explicitly requests it.\n\n` + + `Here are the tools:\n\n`; for (const tool of tools) { if (tool.type === "function" && tool.function) { @@ -116,6 +128,16 @@ export function buildToolInstructions(tools: any[], tool_choice?: any): string { block += `Parameters: ${JSON.stringify(fn.parameters)}\n`; } block += `\n`; + } else if (tool?.name) { + // Support alternate tool schemas used by some providers/agents. + block += `### ${tool.name}\n`; + if (tool.description) block += `${tool.description}\n`; + if (tool.input_schema) { + block += `Parameters: ${JSON.stringify(tool.input_schema)}\n`; + } else if (tool.parameters) { + block += `Parameters: ${JSON.stringify(tool.parameters)}\n`; + } + block += `\n`; } } diff --git a/src/index.ts b/src/index.ts index 2d0b979..ae61346 100644 --- a/src/index.ts +++ b/src/index.ts @@ -56,9 +56,12 @@ Bun.serve({ const temperature = body.temperature; const max_tokens = body.max_tokens; const reasoning_effort = body.reasoning_effort; - const tools = body.tools; + const tools = Array.isArray(body.tools) ? body.tools : undefined; const tool_choice = body.tool_choice; - const browseros_mode = body.browseros_mode === true; + // Default to BrowserOS-style strict tool mode whenever tools are supplied, + // unless callers explicitly disable it with browseros_mode: false. + const browseros_mode = + tools && tools.length > 0 ? body.browseros_mode !== false : false; const stream = body.stream === true; @@ -70,8 +73,15 @@ Bun.serve({ if (tools) { console.log(`[Proxy] Tools count: ${tools.length}`); } - if (browseros_mode) { - console.log(`[Proxy] BrowserOS mode enabled`); + if (tools && tools.length > 0) { + console.log( + `[Proxy] BrowserOS mode: ${browseros_mode ? "enabled" : "disabled"}`, + ); + if (body.browseros_mode === undefined && browseros_mode) { + console.log( + `[Proxy] BrowserOS mode auto-enabled because tools were provided`, + ); + } } if (stream) { From dda3e7453318a59658a516dcc87f0bde5423892a Mon Sep 17 00:00:00 2001 From: Chetan Khobragade <31281180+copsys@users.noreply.github.com> Date: Sat, 28 Mar 2026 15:19:07 +0000 Subject: [PATCH 4/4] fixed --- src/codex-client.ts | 3 ++ src/codex.ts | 82 ++++++++++++++++++++++++++++++++++++--------- src/index.ts | 3 +- 3 files changed, 71 insertions(+), 17 deletions(-) diff --git a/src/codex-client.ts b/src/codex-client.ts index 636f830..6e400b9 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -294,6 +294,9 @@ export class CodexClient { } eventQueue.push({ type: "tool_calls", calls: toolCalls }); } else { + console.warn( + `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`, + ); // No tool calls found, emit as plain message eventQueue.push({ type: "message", text: accumulatedText }); } diff --git a/src/codex.ts b/src/codex.ts index caa23c3..bf74e12 100644 --- a/src/codex.ts +++ b/src/codex.ts @@ -75,27 +75,77 @@ export type CodexStreamEvent = */ export function parseToolCalls(text: string): ParsedToolCall[] { const calls: ParsedToolCall[] = []; - const regex = /([\s\S]*?)<\/tool_call>/g; - let match; + const seen = new Set(); let callIndex = 0; - while ((match = regex.exec(text)) !== null) { + + const pushCall = (raw: any) => { + const name = raw?.name || raw?.toolName || raw?.function?.name || ""; + const argsRaw = + raw?.arguments ?? raw?.input ?? raw?.parameters ?? raw?.function?.arguments; + if (!name) return; + const args = + typeof argsRaw === "string" + ? argsRaw + : JSON.stringify(argsRaw ?? {}); + const key = `${name}::${args}`; + if (seen.has(key)) return; + seen.add(key); + calls.push({ + id: `call_${Date.now()}_${callIndex++}`, + type: "function", + function: { + name, + arguments: args, + }, + }); + }; + + // Format 1: explicit ... blocks. + const taggedRegex = /([\s\S]*?)<\/tool_call>/g; + let match; + while ((match = taggedRegex.exec(text)) !== null) { + try { + pushCall(JSON.parse(match[1].trim())); + } catch { + // Ignore malformed block. + } + } + + // Format 2: JSON fenced blocks that contain a single call, call list, or tool_calls. + const fencedJsonRegex = /```(?:json)?\s*([\s\S]*?)```/g; + while ((match = fencedJsonRegex.exec(text)) !== null) { + const candidate = match[1].trim(); try { - const parsed = JSON.parse(match[1].trim()); - calls.push({ - id: `call_${Date.now()}_${callIndex++}`, - type: "function", - function: { - name: parsed.name || parsed.function?.name || "", - arguments: - typeof parsed.arguments === "string" - ? parsed.arguments - : JSON.stringify(parsed.arguments ?? parsed.parameters ?? {}), - }, - }); + const parsed = JSON.parse(candidate); + if (Array.isArray(parsed)) { + for (const item of parsed) pushCall(item); + } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) { + for (const item of parsed.tool_calls) pushCall(item); + } else { + pushCall(parsed); + } } catch { - // Skip malformed tool calls + // Not valid JSON; ignore. } } + + // Format 3: whole response is a JSON object/array describing tool calls. + const trimmed = text.trim(); + if (trimmed.startsWith("{") || trimmed.startsWith("[")) { + try { + const parsed = JSON.parse(trimmed); + if (Array.isArray(parsed)) { + for (const item of parsed) pushCall(item); + } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) { + for (const item of parsed.tool_calls) pushCall(item); + } else { + pushCall(parsed); + } + } catch { + // Not parseable as JSON; ignore. + } + } + return calls; } diff --git a/src/index.ts b/src/index.ts index ae61346..d9d6d57 100644 --- a/src/index.ts +++ b/src/index.ts @@ -57,11 +57,12 @@ Bun.serve({ const max_tokens = body.max_tokens; const reasoning_effort = body.reasoning_effort; const tools = Array.isArray(body.tools) ? body.tools : undefined; - const tool_choice = body.tool_choice; // Default to BrowserOS-style strict tool mode whenever tools are supplied, // unless callers explicitly disable it with browseros_mode: false. const browseros_mode = tools && tools.length > 0 ? body.browseros_mode !== false : false; + const tool_choice = + body.tool_choice ?? (browseros_mode ? "required" : undefined); const stream = body.stream === true;