From 1476e2463cfbad30ecc2b79608efe3c735f5b8c0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 28 Mar 2026 13:54:08 +0000
Subject: [PATCH 1/4] feat: add browseros tool-calling mode

Agent-Logs-Url: https://github.com/copsys/codex-app-proxy/sessions/41e79ae7-148a-4f6d-970c-4f5ad05386fd

Co-authored-by: copsys <31281180+copsys@users.noreply.github.com>
---
 README.md           | 11 +++++++++++
 src/codex-client.ts | 18 +++++++++++++++++-
 src/codex.ts        |  2 ++
 src/index.ts        |  6 ++++++
 4 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 2b0f956..918bfd6 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,8 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c
 - **`temperature`** (number): Controls randomness (passed to the engine).
 - **`max_tokens`** (number): Limits the length of the generated response.
 - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`).
+- **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients.
+- **`browseros_mode`** (boolean): Optional mode for BrowserOS-like agentic clients. When `true` (and tools are provided), the proxy adds stronger tool-execution instructions so the model emits tool calls instead of environment-limitation refusals.
 
 ## Quick Start
 
@@ -70,6 +72,15 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \
 - **Port**: Set via `PORT` environment variable (defaults to 8080).
 - **Models**: The proxy automatically queries your local Codex installation for available model slugs.
 
+### BrowserOS Configuration
+
+If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, set:
+
+- `browseros_mode: true` in your `/v1/chat/completions` request body
+- keep sending `tools` and (optionally) `tool_choice`
+
+This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side.
+
 ## Architecture
 
 This project uses a typed `CodexClient` that manages a persistent `codex app-server` background process. Communication happens over a high-speed JSON-RPC channel on `stdio`, ensuring that the model state remains warm and ready for immediate inference.
diff --git a/src/codex-client.ts b/src/codex-client.ts
index 8c4fff0..21ce40a 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -121,7 +121,12 @@ export class CodexClient {
 
   async *chatCompletionStream(
     messages: Message[],
-    options: { model: string; tools?: any[]; tool_choice?: any },
+    options: {
+      model: string;
+      tools?: any[];
+      tool_choice?: any;
+      browseros_mode?: boolean;
+    },
   ): AsyncGenerator<CodexStreamEvent> {
     const hasTools = options.tools && options.tools.length > 0;
 
@@ -151,6 +156,17 @@ export class CodexClient {
       baseInstructions = (baseInstructions || "") + toolBlock;
     }
 
+    if (hasTools && options.browseros_mode) {
+      const browserOsToolModeInstructions =
+        `\n\n## BrowserOS Tool Execution Mode\n\n` +
+        `You are running as a tool-calling planner inside BrowserOS. ` +
+        `You can and must control the browser by emitting tool calls. ` +
+        `Do not claim you cannot access or control the browser/environment. ` +
+        `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` +
+        `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`;
+      baseInstructions = (baseInstructions || "") + browserOsToolModeInstructions;
+    }
+
     // --- Format conversation messages into prompt ---
     let fullPrompt = "";
     for (const msg of nonSystemMessages) {
diff --git a/src/codex.ts b/src/codex.ts
index fcfe9c7..9528fca 100644
--- a/src/codex.ts
+++ b/src/codex.ts
@@ -38,6 +38,7 @@ export interface CodexOptions {
   signal?: AbortSignal;
   tools?: any[];
   tool_choice?: any;
+  browseros_mode?: boolean;
 }
 
 export interface ParsedToolCall {
@@ -141,5 +142,6 @@ export async function* execCodexStream(
     model: options.model,
     tools: options.tools,
     tool_choice: options.tool_choice,
+    browseros_mode: options.browseros_mode,
   });
 }
diff --git a/src/index.ts b/src/index.ts
index f4bd520..2d0b979 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -58,6 +58,7 @@ Bun.serve({
         const reasoning_effort = body.reasoning_effort;
         const tools = body.tools;
         const tool_choice = body.tool_choice;
+        const browseros_mode = body.browseros_mode === true;
 
         const stream = body.stream === true;
 
@@ -69,6 +70,9 @@ Bun.serve({
         if (tools) {
           console.log(`[Proxy] Tools count: ${tools.length}`);
         }
+        if (browseros_mode) {
+          console.log(`[Proxy] BrowserOS mode enabled`);
+        }
 
         if (stream) {
           const responseId = `chatcmpl-${Date.now()}`;
@@ -88,6 +92,7 @@ Bun.serve({
                   signal: req.signal,
                   tools,
                   tool_choice,
+                  browseros_mode,
                 })) {
                   if (req.signal.aborted) break;
 
@@ -269,6 +274,7 @@ Bun.serve({
             signal: req.signal,
             tools,
             tool_choice,
+            browseros_mode,
           })) {
             if (req.signal.aborted) break;
             if (event.type === "message") {

From 27ad1519e7d21f95e5f4ddfa1a1d03ff3304a3fd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 28 Mar 2026 13:54:31 +0000
Subject: [PATCH 2/4] chore: address code review naming feedback

Agent-Logs-Url: https://github.com/copsys/codex-app-proxy/sessions/41e79ae7-148a-4f6d-970c-4f5ad05386fd

Co-authored-by: copsys <31281180+copsys@users.noreply.github.com>
---
 src/codex-client.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/codex-client.ts b/src/codex-client.ts
index 21ce40a..dbf917c 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -157,14 +157,14 @@ export class CodexClient {
     }
 
     if (hasTools && options.browseros_mode) {
-      const browserOsToolModeInstructions =
+      const browserOSToolModeInstructions =
         `\n\n## BrowserOS Tool Execution Mode\n\n` +
         `You are running as a tool-calling planner inside BrowserOS. ` +
         `You can and must control the browser by emitting tool calls. ` +
         `Do not claim you cannot access or control the browser/environment. ` +
         `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` +
         `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`;
-      baseInstructions = (baseInstructions || "") + browserOsToolModeInstructions;
+      baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions;
     }
 
     // --- Format conversation messages into prompt ---

From 5be8919dda83ee2d238178c92c370b4466903d2e Mon Sep 17 00:00:00 2001
From: Chetan Khobragade <31281180+copsys@users.noreply.github.com>
Date: Sat, 28 Mar 2026 15:13:14 +0000
Subject: [PATCH 3/4] new feature

---
 README.md           | 11 +++++++----
 src/codex-client.ts |  6 ++++--
 src/codex.ts        | 24 +++++++++++++++++++++++-
 src/index.ts        | 18 ++++++++++++++----
 4 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 918bfd6..3daf0db 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c
 - **`max_tokens`** (number): Limits the length of the generated response.
 - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`).
 - **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients.
-- **`browseros_mode`** (boolean): Optional mode for BrowserOS-like agentic clients. When `true` (and tools are provided), the proxy adds stronger tool-execution instructions so the model emits tool calls instead of environment-limitation refusals.
+- **`browseros_mode`** (boolean): Optional strict mode toggle for BrowserOS-like agentic clients. When tools are provided, this mode is **enabled by default** unless you explicitly set `browseros_mode: false`.
 
 ## Quick Start
 
@@ -74,10 +74,13 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \
 
 ### BrowserOS Configuration
 
-If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, set:
+If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, verify:
 
-- `browseros_mode: true` in your `/v1/chat/completions` request body
-- keep sending `tools` and (optionally) `tool_choice`
+- you are sending `tools` in your `/v1/chat/completions` request body (this auto-enables BrowserOS strict mode)
+- optionally set `browseros_mode: true` explicitly for clarity
+- keep sending `tool_choice` when your client supports it
+
+To disable strict BrowserOS behavior for non-agentic use-cases, set `browseros_mode: false`.
 
 This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side.
 
diff --git a/src/codex-client.ts b/src/codex-client.ts
index dbf917c..636f830 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -162,8 +162,10 @@ export class CodexClient {
         `You are running as a tool-calling planner inside BrowserOS. ` +
         `You can and must control the browser by emitting tool calls. ` +
         `Do not claim you cannot access or control the browser/environment. ` +
-        `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` +
-        `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`;
+        `If the request is actionable with available tools, your response MUST include at least one <tool_call> block. ` +
+        `Prefer tool-call-only output for action steps. ` +
+        `For shopping workflows, adding products to cart is permitted; avoid checkout/payment unless explicitly requested. ` +
+        `If an action needs multiple steps, emit only the next required tool call(s) for the current step.\n`;
       baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions;
     }
 
diff --git a/src/codex.ts b/src/codex.ts
index 9528fca..caa23c3 100644
--- a/src/codex.ts
+++ b/src/codex.ts
@@ -105,7 +105,19 @@ export function parseToolCalls(text: string): ParsedToolCall[] {
  * are available and the expected output format.
  */
 export function buildToolInstructions(tools: any[], tool_choice?: any): string {
-  let block = `\n\n## Available Tools\n\nYou have access to the following tools to perform actions. You MUST use these tools to fulfill the user's request. Do NOT describe steps or give instructions — instead, call the appropriate tool.\n\nTo call a tool, output one or more tool calls in this exact format (you may output multiple for parallel execution):\n<tool_call>{"name": "tool_name", "arguments": {"param": "value"}}</tool_call>\n\nIMPORTANT RULES:\n- ALWAYS use tool calls to act. NEVER respond with step-by-step instructions when a tool can do the job.\n- You can call multiple tools in a single response.\n- After a tool call, wait for the result before proceeding.\n- If the user asks you to navigate somewhere, use the navigate tool. If they ask you to click, use the click tool. Etc.\n\nHere are the tools:\n\n`;
+  let block =
+    `\n\n## Available Tools\n\n` +
+    `You are an agentic planner operating through external tools. ` +
+    `When tools are available, your next action MUST be emitted as tool calls, not prose refusals.\n\n` +
+    `Tool call output format (required):\n` +
+    `<tool_call>{"name": "tool_name", "arguments": {"param": "value"}}</tool_call>\n\n` +
+    `IMPORTANT RULES:\n` +
+    `- If a user request is actionable with provided tools, emit one or more <tool_call> blocks.\n` +
+    `- Do not say you cannot access the browser/environment when browser tools are provided.\n` +
+    `- Keep normal text minimal. Prefer tool-call-only responses for action steps.\n` +
+    `- After tool results are returned, emit the next tool call(s) needed to continue.\n` +
+    `- For commerce tasks, adding an item to cart is allowed; do not attempt checkout/payment unless user explicitly requests it.\n\n` +
+    `Here are the tools:\n\n`;
 
   for (const tool of tools) {
     if (tool.type === "function" && tool.function) {
@@ -116,6 +128,16 @@ export function buildToolInstructions(tools: any[], tool_choice?: any): string {
         block += `Parameters: ${JSON.stringify(fn.parameters)}\n`;
       }
       block += `\n`;
+    } else if (tool?.name) {
+      // Support alternate tool schemas used by some providers/agents.
+      block += `### ${tool.name}\n`;
+      if (tool.description) block += `${tool.description}\n`;
+      if (tool.input_schema) {
+        block += `Parameters: ${JSON.stringify(tool.input_schema)}\n`;
+      } else if (tool.parameters) {
+        block += `Parameters: ${JSON.stringify(tool.parameters)}\n`;
+      }
+      block += `\n`;
     }
   }
 
diff --git a/src/index.ts b/src/index.ts
index 2d0b979..ae61346 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -56,9 +56,12 @@ Bun.serve({
         const temperature = body.temperature;
         const max_tokens = body.max_tokens;
         const reasoning_effort = body.reasoning_effort;
-        const tools = body.tools;
+        const tools = Array.isArray(body.tools) ? body.tools : undefined;
         const tool_choice = body.tool_choice;
-        const browseros_mode = body.browseros_mode === true;
+        // Default to BrowserOS-style strict tool mode whenever tools are supplied,
+        // unless callers explicitly disable it with browseros_mode: false.
+        const browseros_mode =
+          tools && tools.length > 0 ? body.browseros_mode !== false : false;
 
         const stream = body.stream === true;
 
@@ -70,8 +73,15 @@ Bun.serve({
         if (tools) {
           console.log(`[Proxy] Tools count: ${tools.length}`);
         }
-        if (browseros_mode) {
-          console.log(`[Proxy] BrowserOS mode enabled`);
+        if (tools && tools.length > 0) {
+          console.log(
+            `[Proxy] BrowserOS mode: ${browseros_mode ? "enabled" : "disabled"}`,
+          );
+          if (body.browseros_mode === undefined && browseros_mode) {
+            console.log(
+              `[Proxy] BrowserOS mode auto-enabled because tools were provided`,
+            );
+          }
         }
 
         if (stream) {

From dda3e7453318a59658a516dcc87f0bde5423892a Mon Sep 17 00:00:00 2001
From: Chetan Khobragade <31281180+copsys@users.noreply.github.com>
Date: Sat, 28 Mar 2026 15:19:07 +0000
Subject: [PATCH 4/4] fixed

---
 src/codex-client.ts |  3 ++
 src/codex.ts        | 82 ++++++++++++++++++++++++++++++++++++---------
 src/index.ts        |  3 +-
 3 files changed, 71 insertions(+), 17 deletions(-)

diff --git a/src/codex-client.ts b/src/codex-client.ts
index 636f830..6e400b9 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -294,6 +294,9 @@ export class CodexClient {
                 }
                 eventQueue.push({ type: "tool_calls", calls: toolCalls });
               } else {
+                console.warn(
+                  `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`,
+                );
                 // No tool calls found, emit as plain message
                 eventQueue.push({ type: "message", text: accumulatedText });
               }
diff --git a/src/codex.ts b/src/codex.ts
index caa23c3..bf74e12 100644
--- a/src/codex.ts
+++ b/src/codex.ts
@@ -75,27 +75,77 @@ export type CodexStreamEvent =
  */
 export function parseToolCalls(text: string): ParsedToolCall[] {
   const calls: ParsedToolCall[] = [];
-  const regex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
-  let match;
+  const seen = new Set<string>();
   let callIndex = 0;
-  while ((match = regex.exec(text)) !== null) {
+
+  const pushCall = (raw: any) => {
+    const name = raw?.name || raw?.toolName || raw?.function?.name || "";
+    const argsRaw =
+      raw?.arguments ?? raw?.input ?? raw?.parameters ?? raw?.function?.arguments;
+    if (!name) return;
+    const args =
+      typeof argsRaw === "string"
+        ? argsRaw
+        : JSON.stringify(argsRaw ?? {});
+    const key = `${name}::${args}`;
+    if (seen.has(key)) return;
+    seen.add(key);
+    calls.push({
+      id: `call_${Date.now()}_${callIndex++}`,
+      type: "function",
+      function: {
+        name,
+        arguments: args,
+      },
+    });
+  };
+
+  // Format 1: explicit <tool_call>...</tool_call> blocks.
+  const taggedRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
+  let match;
+  while ((match = taggedRegex.exec(text)) !== null) {
+    try {
+      pushCall(JSON.parse(match[1].trim()));
+    } catch {
+      // Ignore malformed block.
+    }
+  }
+
+  // Format 2: JSON fenced blocks that contain a single call, call list, or tool_calls.
+  const fencedJsonRegex = /```(?:json)?\s*([\s\S]*?)```/g;
+  while ((match = fencedJsonRegex.exec(text)) !== null) {
+    const candidate = match[1].trim();
     try {
-      const parsed = JSON.parse(match[1].trim());
-      calls.push({
-        id: `call_${Date.now()}_${callIndex++}`,
-        type: "function",
-        function: {
-          name: parsed.name || parsed.function?.name || "",
-          arguments:
-            typeof parsed.arguments === "string"
-              ? parsed.arguments
-              : JSON.stringify(parsed.arguments ?? parsed.parameters ?? {}),
-        },
-      });
+      const parsed = JSON.parse(candidate);
+      if (Array.isArray(parsed)) {
+        for (const item of parsed) pushCall(item);
+      } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) {
+        for (const item of parsed.tool_calls) pushCall(item);
+      } else {
+        pushCall(parsed);
+      }
     } catch {
-      // Skip malformed tool calls
+      // Not valid JSON; ignore.
     }
   }
+
+  // Format 3: whole response is a JSON object/array describing tool calls.
+  const trimmed = text.trim();
+  if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
+    try {
+      const parsed = JSON.parse(trimmed);
+      if (Array.isArray(parsed)) {
+        for (const item of parsed) pushCall(item);
+      } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) {
+        for (const item of parsed.tool_calls) pushCall(item);
+      } else {
+        pushCall(parsed);
+      }
+    } catch {
+      // Not parseable as JSON; ignore.
+    }
+  }
+
   return calls;
 }
 
diff --git a/src/index.ts b/src/index.ts
index ae61346..d9d6d57 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -57,11 +57,12 @@ Bun.serve({
         const max_tokens = body.max_tokens;
         const reasoning_effort = body.reasoning_effort;
         const tools = Array.isArray(body.tools) ? body.tools : undefined;
-        const tool_choice = body.tool_choice;
         // Default to BrowserOS-style strict tool mode whenever tools are supplied,
         // unless callers explicitly disable it with browseros_mode: false.
         const browseros_mode =
           tools && tools.length > 0 ? body.browseros_mode !== false : false;
+        const tool_choice =
+          body.tool_choice ?? (browseros_mode ? "required" : undefined);
 
         const stream = body.stream === true;