From b69e48e564e8660d2982bf279f31d7079dda8ff7 Mon Sep 17 00:00:00 2001
From: kiyo-e <kiyo-e@users.noreply.github.com>
Date: Wed, 6 Aug 2025 21:41:13 +0900
Subject: [PATCH 1/2] feat: add REASONING_EFFORT configuration option for
 enhanced reasoning control

---
 CLAUDE.md        |   1 +
 README.md        |   5 +
 src/index.ts     | 260 +++++++++++++++++-----------------
 src/transform.ts | 353 +++++++++++++++++++++++++++++++++++++++++++++++
 wrangler.toml    |   1 +
 5 files changed, 489 insertions(+), 131 deletions(-)
 create mode 100644 src/transform.ts

diff --git a/CLAUDE.md b/CLAUDE.md
index cad6001..ecebcf7 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -59,6 +59,7 @@ Configure via `wrangler.toml` or environment:
 - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1)
 - `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional)
 - `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional)
+- `REASONING_EFFORT` - Reasoning effort level for reasoning model (optional, e.g., "low", "medium", "high")
 - `DEBUG` - Enable debug logging (default: false)
 - `PORT` - Server port for Node.js mode (default: 3000)
 
diff --git a/README.md b/README.md
index 78ab55a..1e4e833 100644
--- a/README.md
+++ b/README.md
@@ -58,6 +58,7 @@ docker run -d -p 3000:3000 \
   -e ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1 \
   -e REASONING_MODEL=deepseek/deepseek-r1-0528:free \
   -e COMPLETION_MODEL=deepseek/deepseek-r1-0528:free \
+  -e REASONING_EFFORT=high \
   ghcr.io/kiyo-e/claude-code-proxy:latest
 
 # Use with Claude Code
@@ -75,6 +76,7 @@ REASONING_MODEL=deepseek/deepseek-r1-0528:free
 COMPLETION_MODEL=deepseek/deepseek-r1-0528:free
 REASONING_MAX_TOKENS=4096
 COMPLETION_MAX_TOKENS=2048
+REASONING_EFFORT=high
 DEBUG=false
 EOF
 
@@ -190,6 +192,7 @@ npm publish
 - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1)
 - `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional)
 - `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional)
+- `REASONING_EFFORT` - Reasoning effort level for reasoning model (optional, e.g., "low", "medium", "high")
 - `DEBUG` - Enable debug logging (default: false)
 - `PORT` - Server port for CLI mode (default: 3000)
 
@@ -205,6 +208,7 @@ npx wrangler secret put ANTHROPIC_PROXY_BASE_URL
 # Set regular environment variables
 npx wrangler env put REASONING_MODEL "deepseek/deepseek-r1-0528:free"
 npx wrangler env put COMPLETION_MODEL "deepseek/deepseek-r1-0528:free"
+npx wrangler env put REASONING_EFFORT "high"
 npx wrangler env put DEBUG "false"
 ```
 
@@ -214,6 +218,7 @@ Alternatively, configure via `wrangler.toml`:
 [env.production.vars]
 REASONING_MODEL = "deepseek/deepseek-r1-0528:free"
 COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free"
+REASONING_EFFORT = "high"
 DEBUG = "false"
 ```
 
diff --git a/src/index.ts b/src/index.ts
index 0e86b1e..240837b 100755
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,8 +1,10 @@
 import { Hono } from 'hono'
 import { env } from 'hono/adapter'
+import { transformOpenAIToClaude, transformClaudeToOpenAI, removeUriFormat } from './transform'
 
 const app = new Hono<{
   Bindings: {
+    REASONING_EFFORT?: string
     ANTHROPIC_PROXY_BASE_URL?: string
     CLAUDE_CODE_PROXY_API_KEY?: string
     REASONING_MODEL?: string
@@ -18,7 +20,7 @@ const defaultModel = 'openai/gpt-4.1'
 
 // Health check endpoint
 app.get('/', (c) => {
-  const { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS} = env(c)
+  const { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS, REASONING_EFFORT } = env(c)
 
   return c.json({
     status: 'ok',
@@ -28,14 +30,15 @@ app.get('/', (c) => {
       REASONING_MODEL,
       COMPLETION_MODEL,
       REASONING_MAX_TOKENS,
-      COMPLETION_MAX_TOKENS
+      COMPLETION_MAX_TOKENS,
+      REASONING_EFFORT
     }
   })
 })
 
 app.post('/v1/messages', async (c) => {
   // Get environment variables from context
-  const { CLAUDE_CODE_PROXY_API_KEY, ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS, DEBUG } = env(c)
+  const { CLAUDE_CODE_PROXY_API_KEY, ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS, DEBUG, REASONING_EFFORT } = env(c)
 
   try {
     const baseUrl = ANTHROPIC_PROXY_BASE_URL || 'https://models.github.ai/inference'
@@ -54,106 +57,75 @@ app.post('/v1/messages', async (c) => {
       return value.replace(/Bearer\s+(\S+)/g, 'Bearer ********')
     }
 
-    const payload = await c.req.json()
-
-    // Helper to normalize a message's content
-    const normalizeContent = (content: any): string | null => {
-      if (typeof content === 'string') return content
-      if (Array.isArray(content)) {
-        return content.map(item => item.text).join(' ')
-      }
-      return null
-    }
+    const claudePayload = await c.req.json()
+    
+    // Transform Claude format to OpenAI format
+    const { claudeRequest, droppedParams } = transformOpenAIToClaude(claudePayload)
 
-    // Build messages array for the OpenAI payload
+    // Convert messages from Claude to OpenAI format for upstream API
     const messages: any[] = []
-    if (payload.system && Array.isArray(payload.system)) {
-      payload.system.forEach((sysMsg: any) => {
-        const normalized = normalizeContent(sysMsg.text || sysMsg.content)
-        if (normalized) {
-          messages.push({
-            role: 'system',
-            content: normalized
-          })
-        }
+    
+    // Add system messages
+    if (claudeRequest.system) {
+      messages.push({
+        role: 'system',
+        content: claudeRequest.system
       })
     }
-
-    // Then add user (or other) messages
-    if (payload.messages && Array.isArray(payload.messages)) {
-      payload.messages.forEach((msg: any) => {
-        // Skip messages with unsupported roles for some APIs
-        if (!['user', 'assistant', 'system', 'tool', 'function'].includes(msg.role)) {
-          console.warn(`Skipping message with unsupported role: ${msg.role}`)
-          return
-        }
-        const toolCalls = (Array.isArray(msg.content) ? msg.content : [])
-          .filter((item: any) => item.type === 'tool_use')
-          .map((toolCall: any) => ({
-            id: toolCall.id,
-            type: 'function',
-            function: {
-              name: toolCall.name,
-              arguments: JSON.stringify(toolCall.input),
+    
+    // Process regular messages
+    if (claudeRequest.messages && Array.isArray(claudeRequest.messages)) {
+      for (const msg of claudeRequest.messages) {
+        if (msg.role === 'user' || msg.role === 'assistant') {
+          // Handle content blocks
+          if (Array.isArray(msg.content)) {
+            const textParts: string[] = []
+            const toolCalls: any[] = []
+            const toolResults: any[] = []
+            
+            for (const block of msg.content) {
+              if (block.type === 'text') {
+                textParts.push(block.text)
+              } else if (block.type === 'tool_use') {
+                toolCalls.push({
+                  id: block.id,
+                  type: 'function',
+                  function: {
+                    name: block.name,
+                    arguments: JSON.stringify(block.input)
+                  }
+                })
+              } else if (block.type === 'tool_result') {
+                toolResults.push({
+                  role: 'tool',
+                  content: block.content || '',
+                  tool_call_id: block.tool_use_id
+                })
+              }
             }
-          }))
-
-        const newMsg: any = { role: msg.role }
-        const normalized = normalizeContent(msg.content)
-        if (normalized) newMsg.content = normalized
-        if (toolCalls.length > 0) newMsg.tool_calls = toolCalls
-        if (newMsg.content || newMsg.tool_calls) messages.push(newMsg)
-
-        if (Array.isArray(msg.content)) {
-          const toolResults = msg.content.filter((item: any) => item.type === 'tool_result')
-          toolResults.forEach((toolResult: any) => {
+            
+            // Add main message if it has content
+            if (textParts.length > 0 || toolCalls.length > 0) {
+              const openAIMsg: any = { role: msg.role }
+              if (textParts.length > 0) openAIMsg.content = textParts.join(' ')
+              if (toolCalls.length > 0) openAIMsg.tool_calls = toolCalls
+              messages.push(openAIMsg)
+            }
+            
+            // Add tool result messages
+            messages.push(...toolResults)
+          } else if (typeof msg.content === 'string') {
             messages.push({
-              role: 'tool',
-              content: toolResult.text || toolResult.content,
-              tool_call_id: toolResult.tool_use_id,
+              role: msg.role,
+              content: msg.content
             })
-          })
-        }
-      })
-    }
-
-    // Helper function to recursively traverse JSON schema and remove format: 'uri'
-    const removeUriFormat = (schema: any): any => {
-      if (!schema || typeof schema !== 'object') return schema
-
-      // If this is a string type with uri format, remove the format
-      if (schema.type === 'string' && schema.format === 'uri') {
-        const { format, ...rest } = schema
-        return rest
-      }
-
-      // Handle array of schemas
-      if (Array.isArray(schema)) {
-        return schema.map(item => removeUriFormat(item))
-      }
-
-      // Recursively process all properties
-      const result: any = {}
-      for (const key in schema) {
-        if (key === 'properties' && typeof schema[key] === 'object') {
-          result[key] = {}
-          for (const propKey in schema[key]) {
-            result[key][propKey] = removeUriFormat(schema[key][propKey])
           }
-        } else if (key === 'items' && typeof schema[key] === 'object') {
-          result[key] = removeUriFormat(schema[key])
-        } else if (key === 'additionalProperties' && typeof schema[key] === 'object') {
-          result[key] = removeUriFormat(schema[key])
-        } else if (['anyOf', 'allOf', 'oneOf'].includes(key) && Array.isArray(schema[key])) {
-          result[key] = schema[key].map((item: any) => removeUriFormat(item))
-        } else {
-          result[key] = removeUriFormat(schema[key])
         }
       }
-      return result
     }
 
-    const tools = (payload.tools || [])
+    // Process tools
+    const tools = (claudeRequest.tools || [])
       .filter((tool: any) => !['BatchTool'].includes(tool.name))
       .map((tool: any) => ({
         type: 'function',
@@ -165,19 +137,21 @@ app.post('/v1/messages', async (c) => {
       }))
 
     const openaiPayload: any = {
-      model: payload.thinking ? models.reasoning : models.completion,
+      // Existing fields kept as before
+
+      model: claudePayload.thinking ? models.reasoning : models.completion,
       messages,
-      temperature: payload.temperature !== undefined ? payload.temperature : 1,
-      stream: payload.stream === true,
+      temperature: claudeRequest.temperature !== undefined ? claudeRequest.temperature : 1,
+      stream: claudeRequest.stream === true,
     }
     
     // Only add max_tokens if it's provided and not null/undefined
-    if (payload.max_tokens !== null && payload.max_tokens !== undefined) {
-      openaiPayload.max_tokens = payload.max_tokens
+    if (claudeRequest.max_tokens !== null && claudeRequest.max_tokens !== undefined) {
+      openaiPayload.max_tokens = claudeRequest.max_tokens
     }
 
     // Apply max_tokens override if configured
-    const selectedModel = payload.thinking ? models.reasoning : models.completion
+    const selectedModel = claudePayload.thinking ? models.reasoning : models.completion
     const reasoningMaxTokens = REASONING_MAX_TOKENS ? parseInt(REASONING_MAX_TOKENS) : undefined
     const completionMaxTokens = COMPLETION_MAX_TOKENS ? parseInt(COMPLETION_MAX_TOKENS) : undefined
     
@@ -186,9 +160,28 @@ app.post('/v1/messages', async (c) => {
     } else if (selectedModel === models.completion && completionMaxTokens) {
       openaiPayload.max_tokens = completionMaxTokens
     }
+    
+    // Apply reasoning_effort if configured and model is reasoning
+    if (selectedModel === models.reasoning && REASONING_EFFORT) {
+      openaiPayload.reasoning_effort = REASONING_EFFORT
+    }
+    // Add tool_choice if present
+    if (claudeRequest.tool_choice) {
+      if (claudeRequest.tool_choice.type === 'none') {
+        openaiPayload.tool_choice = 'none'
+      } else if (claudeRequest.tool_choice.type === 'auto') {
+        openaiPayload.tool_choice = 'auto'
+      } else if (claudeRequest.tool_choice.type === 'tool' && claudeRequest.tool_choice.name) {
+        openaiPayload.tool_choice = {
+          type: 'function',
+          function: { name: claudeRequest.tool_choice.name }
+        }
+      }
+    }
+    
     if (tools.length > 0) openaiPayload.tools = tools
     
-    debug('OpenAI payload:', openaiPayload)
+    debug('OpenAI payload:', JSON.stringify(openaiPayload, null, 2))
 
     const headers: Record<string, string> = {
       'Content-Type': 'application/json'
@@ -214,63 +207,67 @@ app.post('/v1/messages', async (c) => {
       body: JSON.stringify(openaiPayload)
     })
 
+    // Add X-Dropped-Params header if any params were dropped
+    if (droppedParams.length > 0) {
+      c.header('X-Dropped-Params', droppedParams.join(', '))
+    }
+
     if (!openaiResponse.ok) {
       const errorDetails = await openaiResponse.text()
       console.error(`OpenAI API error (${openaiResponse.status}):`, errorDetails)
       console.error('Failed request payload:', JSON.stringify(openaiPayload, null, 2))
+      console.error('Dropped parameters:', droppedParams)
       return c.json({ error: errorDetails }, openaiResponse.status as any)
     }
 
     // If stream is not enabled, process the complete response
     if (!openaiPayload.stream) {
       const data: any = await openaiResponse.json()
-      debug('OpenAI response:', data)
+      debug('OpenAI response:', JSON.stringify(data, null, 2))
       if (data.error) {
         throw new Error(data.error.message)
       }
 
+      // Create Claude response from OpenAI data
       const choice = data.choices[0]
       const openaiMessage = choice.message
-
-      // Map finish_reason to anthropic stop_reason
-      const stopReason = mapStopReason(choice.finish_reason)
-      const toolCalls = openaiMessage.tool_calls || []
-
-      // Create a message id
-      const messageId = data.id
-        ? data.id.replace('chatcmpl', 'msg')
-        : 'msg_' + Math.random().toString(36).substring(2, 26)
-
-      const anthropicResponse = {
-        content: [
-          {
-            text: openaiMessage.content,
-            type: 'text'
-          },
-          ...toolCalls.map((toolCall: any) => ({
+      
+      // Build content blocks
+      const content: any[] = []
+      if (openaiMessage.content) {
+        content.push({
+          type: 'text',
+          text: openaiMessage.content
+        })
+      }
+      
+      if (openaiMessage.tool_calls) {
+        for (const toolCall of openaiMessage.tool_calls) {
+          content.push({
             type: 'tool_use',
             id: toolCall.id,
             name: toolCall.function.name,
-            input: JSON.parse(toolCall.function.arguments),
-          })),
-        ],
-        id: messageId,
+            input: JSON.parse(toolCall.function.arguments)
+          })
+        }
+      }
+      
+      const claudeResponse = {
+        id: data.id ? data.id.replace('chatcmpl', 'msg') : 'msg_' + Math.random().toString(36).substring(2, 26),
+        type: 'message',
+        role: 'assistant',
         model: openaiPayload.model,
-        role: openaiMessage.role,
-        stop_reason: stopReason,
+        content,
+        stop_reason: mapStopReason(choice.finish_reason),
         stop_sequence: null,
-        type: 'message',
         usage: {
-          input_tokens: data.usage
-            ? data.usage.prompt_tokens
-            : messages.reduce((acc, msg) => acc + (msg.content?.split(' ').length || 0), 0),
-          output_tokens: data.usage
-            ? data.usage.completion_tokens
-            : openaiMessage.content.split(' ').length,
+          input_tokens: data.usage?.prompt_tokens || 0,
+          output_tokens: data.usage?.completion_tokens || 0
         }
       }
-
-      return c.json(anthropicResponse)
+      
+      debug('Claude response:', JSON.stringify(claudeResponse, null, 2))
+      return c.json(claudeResponse)
     }
 
     // Streaming response using Server-Sent Events
@@ -279,6 +276,7 @@ app.post('/v1/messages', async (c) => {
         const encoder = new TextEncoder()
 
         const sendSSE = (event: string, data: any) => {
+          debug('Sending SSE:', { event, data: JSON.stringify(data, null, 2) })
           const sseMessage = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`
           controller.enqueue(encoder.encode(sseMessage))
         }
diff --git a/src/transform.ts b/src/transform.ts
new file mode 100644
index 0000000..9765695
--- /dev/null
+++ b/src/transform.ts
@@ -0,0 +1,353 @@
+/**
+ * Transform module for converting between OpenAI and Claude API formats
+ * Design document reference: https://github.com/kiyo-e/claude-code-proxy/issues
+ * Related classes: src/index.ts - Main proxy service implementation
+ */
+
+// OpenAI-specific parameters that Claude doesn't support
+const DROP_KEYS = [
+  'n',
+  'presence_penalty',
+  'frequency_penalty',
+  'best_of',
+  'logit_bias',
+  'seed',
+  'stream_options',
+  'logprobs',
+  'top_logprobs',
+  'user',
+  'response_format',
+  'service_tier',
+  'parallel_tool_calls',
+  'functions',
+  'function_call'
+]
+
+interface DroppedParams {
+  keys: string[]
+}
+
+/**
+ * Sanitize root-level parameters from OpenAI to Claude format
+ */
+export function sanitizeRoot(req: any): DroppedParams {
+  const dropped: string[] = []
+  
+  // Rename stop → stop_sequences
+  if (req.stop !== undefined) {
+    req.stop_sequences = Array.isArray(req.stop) ? req.stop : [req.stop]
+    delete req.stop
+    
+  }
+  
+  // Convert user → metadata.user_id
+  if (req.user) {
+    req.metadata = { ...req.metadata, user_id: req.user }
+    dropped.push('user')
+    delete req.user
+  }
+  
+  // Drop all unsupported OpenAI parameters
+  for (const key of DROP_KEYS) {
+    if (key in req) {
+      dropped.push(key)
+      delete req[key]
+    }
+  }
+  
+  // Ensure max_tokens is set (Claude requirement)
+  if (req.max_tokens == null) {
+    req.max_tokens = 4096 // Default max tokens
+  }
+  
+  return { keys: dropped }
+}
+
+/**
+ * Map OpenAI tools/functions to Claude tools format
+ */
+export function mapTools(req: any): void {
+  // Combine tools and functions into a unified array
+  const openAITools = (req.tools ?? [])
+    .concat((req.functions ?? []).map((f: any) => ({
+      type: 'function',
+      function: f
+    })))
+  
+  // Convert to Claude tool format
+  req.tools = openAITools.map((t: any) => ({
+    name: t.function?.name ?? t.name,
+    description: t.function?.description ?? t.description,
+    input_schema: removeUriFormat(t.function?.parameters ?? t.input_schema)
+  }))
+  
+  // Clean up original fields
+  delete req.functions
+}
+
+/**
+ * Map OpenAI function_call to Claude tool_choice
+ */
+export function mapToolChoice(req: any): void {
+  if (!req.function_call) return
+  
+  const fc = req.function_call
+  
+  // Convert to Claude tool_choice format
+  if (typeof fc === 'string') {
+    // Handle string values: 'auto', 'none'
+    req.tool_choice = {
+      type: fc === 'none' ? 'none' : 'auto'
+    }
+  } else if (fc && typeof fc === 'object' && fc.name) {
+    // Handle specific function call
+    req.tool_choice = {
+      type: 'tool',
+      name: fc.name
+    }
+  }
+  
+  delete req.function_call
+}
+
+/**
+ * Transform messages from OpenAI to Claude format
+ */
+export function transformMessages(req: any): void {
+  if (!req.messages || !Array.isArray(req.messages)) return
+  
+  const transformedMessages: any[] = []
+  let systemMessages: string[] = []
+  
+  for (const msg of req.messages) {
+    // Extract system messages
+    if (msg.role === 'system') {
+      systemMessages.push(msg.content)
+      continue
+    }
+    
+    // Handle function role → user role with tool_result
+    if (msg.role === 'function') {
+      transformedMessages.push({
+        role: 'user',
+        content: [{
+          type: 'tool_result',
+          tool_use_id: msg.tool_call_id || msg.name,
+          content: msg.content
+        }]
+      })
+      continue
+    }
+    
+    // Handle assistant messages with function_call
+    if (msg.role === 'assistant' && msg.function_call) {
+      const content: any[] = []
+      
+      // Add text content if present
+      if (msg.content) {
+        content.push({
+          type: 'text',
+          text: msg.content
+        })
+      }
+      
+      // Add tool_use block
+      content.push({
+        type: 'tool_use',
+        id: msg.function_call.id || `call_${Math.random().toString(36).substring(2, 10)}`,
+        name: msg.function_call.name,
+        input: typeof msg.function_call.arguments === 'string' 
+          ? JSON.parse(msg.function_call.arguments)
+          : msg.function_call.arguments
+      })
+      
+      transformedMessages.push({
+        role: 'assistant',
+        content
+      })
+      continue
+    }
+    
+    // Handle assistant messages with tool_calls
+    if (msg.role === 'assistant' && msg.tool_calls) {
+      const content: any[] = []
+      
+      // Add text content if present
+      if (msg.content) {
+        content.push({
+          type: 'text',
+          text: msg.content
+        })
+      }
+      
+      // Add tool_use blocks
+      for (const toolCall of msg.tool_calls) {
+        content.push({
+          type: 'tool_use',
+          id: toolCall.id,
+          name: toolCall.function.name,
+          input: typeof toolCall.function.arguments === 'string'
+            ? JSON.parse(toolCall.function.arguments)
+            : toolCall.function.arguments
+        })
+      }
+      
+      transformedMessages.push({
+        role: 'assistant',
+        content
+      })
+      continue
+    }
+    
+    // Handle tool role → user role with tool_result
+    if (msg.role === 'tool') {
+      transformedMessages.push({
+        role: 'user',
+        content: [{
+          type: 'tool_result',
+          tool_use_id: msg.tool_call_id,
+          content: msg.content
+        }]
+      })
+      continue
+    }
+    
+    // Pass through other messages
+    transformedMessages.push(msg)
+  }
+  
+  // Set system message (Claude takes a single system string, not array)
+  if (systemMessages.length > 0) {
+    req.system = systemMessages.join('\n\n')
+  }
+  
+  req.messages = transformedMessages
+}
+
+/**
+ * Recursively remove format: 'uri' from JSON schemas
+ */
+export function removeUriFormat(schema: any): any {
+  if (!schema || typeof schema !== 'object') return schema
+  
+  // If this is a string type with uri format, remove the format
+  if (schema.type === 'string' && schema.format === 'uri') {
+    const { format, ...rest } = schema
+    return rest
+  }
+  
+  // Handle array of schemas
+  if (Array.isArray(schema)) {
+    return schema.map(item => removeUriFormat(item))
+  }
+  
+  // Recursively process all properties
+  const result: any = {}
+  for (const key in schema) {
+    if (key === 'properties' && typeof schema[key] === 'object') {
+      result[key] = {}
+      for (const propKey in schema[key]) {
+        result[key][propKey] = removeUriFormat(schema[key][propKey])
+      }
+    } else if (key === 'items' && typeof schema[key] === 'object') {
+      result[key] = removeUriFormat(schema[key])
+    } else if (key === 'additionalProperties' && typeof schema[key] === 'object') {
+      result[key] = removeUriFormat(schema[key])
+    } else if (['anyOf', 'allOf', 'oneOf'].includes(key) && Array.isArray(schema[key])) {
+      result[key] = schema[key].map((item: any) => removeUriFormat(item))
+    } else {
+      result[key] = removeUriFormat(schema[key])
+    }
+  }
+  return result
+}
+
+/**
+ * Main transformation function from OpenAI to Claude format
+ */
+export function transformOpenAIToClaude(openAIRequest: any): { claudeRequest: any, droppedParams: string[] } {
+  // Deep clone to avoid mutating original
+  const req = JSON.parse(JSON.stringify(openAIRequest))
+  
+  // Apply transformations in order
+  const dropped = sanitizeRoot(req)
+  mapTools(req)
+  mapToolChoice(req)
+  transformMessages(req)
+  
+  return {
+    claudeRequest: req,
+    droppedParams: dropped.keys
+  }
+}
+
+/**
+ * Transform Claude response back to OpenAI format
+ */
+export function transformClaudeToOpenAI(claudeResponse: any, model: string): any {
+  // Handle non-streaming response
+  const openAIResponse: any = {
+    id: claudeResponse.id || `chatcmpl-${Math.random().toString(36).substring(2, 15)}`,
+    object: 'chat.completion',
+    created: Math.floor(Date.now() / 1000),
+    model: model,
+    choices: [],
+    usage: {
+      prompt_tokens: claudeResponse.usage?.input_tokens || 0,
+      completion_tokens: claudeResponse.usage?.output_tokens || 0,
+      total_tokens: (claudeResponse.usage?.input_tokens || 0) + (claudeResponse.usage?.output_tokens || 0)
+    }
+  }
+  
+  // Build the message from Claude content blocks
+  const message: any = {
+    role: 'assistant',
+    content: null
+  }
+  
+  const textParts: string[] = []
+  const toolCalls: any[] = []
+  
+  if (Array.isArray(claudeResponse.content)) {
+    for (const block of claudeResponse.content) {
+      if (block.type === 'text') {
+        textParts.push(block.text)
+      } else if (block.type === 'tool_use') {
+        toolCalls.push({
+          id: block.id,
+          type: 'function',
+          function: {
+            name: block.name,
+            arguments: JSON.stringify(block.input)
+          }
+        })
+      }
+    }
+  } else if (typeof claudeResponse.content === 'string') {
+    textParts.push(claudeResponse.content)
+  }
+  
+  // Set content and tool_calls
+  message.content = textParts.join('')
+  if (toolCalls.length > 0) {
+    message.tool_calls = toolCalls
+  }
+  
+  // Map stop_reason to finish_reason
+  let finishReason = 'stop'
+  if (claudeResponse.stop_reason === 'tool_use') {
+    finishReason = 'tool_calls'
+  } else if (claudeResponse.stop_reason === 'max_tokens') {
+    finishReason = 'length'
+  } else if (claudeResponse.stop_reason === 'end_turn') {
+    finishReason = 'stop'
+  }
+  
+  openAIResponse.choices.push({
+    index: 0,
+    message,
+    finish_reason: finishReason
+  })
+  
+  return openAIResponse
+}
\ No newline at end of file
diff --git a/wrangler.toml b/wrangler.toml
index 933d81c..895acc8 100644
--- a/wrangler.toml
+++ b/wrangler.toml
@@ -7,3 +7,4 @@ compatibility_flags = ["nodejs_compat"]
 # DEBUG = "true"
 # REASONING_MODEL = "deepseek/deepseek-r1-0528:free"
 # COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free"
+# REASONING_EFFORT = "high"

From 3d3140673136d2b2cb77a7399f16f400b283ec4e Mon Sep 17 00:00:00 2001
From: kiyo-e <kiyo-e@users.noreply.github.com>
Date: Wed, 6 Aug 2025 22:09:25 +0900
Subject: [PATCH 2/2] feat: update reasoning and completion models to
 z-ai/glm-4.5-air for improved performance

---
 .dev.vars.example |   4 +-
 .env.example      |   4 +-
 CLAUDE.md         | 146 ++++++++++++++++++++++++++++++++++++++--------
 README.md         |  20 +++----
 src/index.ts      |  17 ++----
 wrangler.toml     |   4 +-
 6 files changed, 143 insertions(+), 52 deletions(-)

diff --git a/.dev.vars.example b/.dev.vars.example
index 3c67657..a0aa6b5 100644
--- a/.dev.vars.example
+++ b/.dev.vars.example
@@ -6,8 +6,8 @@
 # ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1
 
 # Model configuration (optional)
-# REASONING_MODEL=deepseek/deepseek-r1-0528:free
-# COMPLETION_MODEL=deepseek/deepseek-r1-0528:free
+# REASONING_MODEL=z-ai/glm-4.5-air:free
+# COMPLETION_MODEL=z-ai/glm-4.5-air:free
 
 # Enable debug logging (optional)
 # DEBUG=true
diff --git a/.env.example b/.env.example
index 650104c..6da4575 100644
--- a/.env.example
+++ b/.env.example
@@ -6,8 +6,8 @@
 # ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1
 
 # Model configuration (optional)
-# REASONING_MODEL=deepseek/deepseek-r1-0528:free
-# COMPLETION_MODEL=deepseek/deepseek-r1-0528:free
+# REASONING_MODEL=z-ai/glm-4.5-air:free
+# COMPLETION_MODEL=z-ai/glm-4.5-air:free
 
 # Enable debug logging (optional)
 # DEBUG=true
diff --git a/CLAUDE.md b/CLAUDE.md
index ecebcf7..53299dd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -9,17 +9,28 @@ This is a Claude Code proxy service that translates between Anthropic's Claude A
 ## Architecture
 
 ### Core Components
-- **`src/index.ts`** - Main Hono application with API proxy logic
+- **`src/index.ts`** - Main Hono application with API proxy logic (src/index.ts:39-516)
 - **`src/server.ts`** - Node.js server wrapper for CLI distribution with argument parsing
+- **`src/transform.ts`** - API format transformation utilities between OpenAI and Claude formats
 
-### API Translation Logic
-The proxy service handles (in `src/index.ts:32-450`):
+### API Translation Logic (src/index.ts:39-516)
+The proxy handles `/v1/messages` POST requests and transforms between formats:
 - **Message normalization**: Converts Claude's nested content arrays to OpenAI's flat structure
 - **Tool call mapping**: Transforms Claude's `tool_use`/`tool_result` to OpenAI's `tool_calls`/`tool` roles
 - **Schema transformation**: Removes `format: 'uri'` constraints from JSON schemas for compatibility
-- **Model routing**: Dynamically selects models based on request type (reasoning vs completion)
+- **Model routing**: Dynamically selects models based on `thinking` flag in request
 - **Streaming support**: Handles both streaming and non-streaming responses with SSE
 
+### Transformation Module (src/transform.ts)
+Key exported functions:
+- `transformOpenAIToClaude()`: Main transformation from OpenAI to Claude format
+- `sanitizeRoot()`: Drops unsupported OpenAI parameters and ensures Claude requirements
+- `mapTools()`: Converts OpenAI tools/functions to Claude tool format
+- `mapToolChoice()`: Maps OpenAI function_call to Claude tool_choice
+- `transformMessages()`: Converts message roles and content blocks
+- `removeUriFormat()`: Recursively removes format:'uri' from JSON schemas
+- `transformClaudeToOpenAI()`: Converts Claude responses back to OpenAI format
+
 ### Dual Runtime Support
 - **Cloudflare Workers**: Uses Hono's built-in fetch handler (`src/index.ts`)
 - **Node.js**: Uses `@hono/node-server` adapter (`src/server.ts`)
@@ -30,39 +41,66 @@ The proxy service handles (in `src/index.ts:32-450`):
 # Install dependencies
 bun install
 
-# Local development server (hot reload)
+# Local development server (hot reload on port 3000)
 bun run start
 
-# Cloudflare Workers development
+# Cloudflare Workers development (local testing)
 bun run dev
 
-# Build CLI package
+# Build CLI binary to ./bin
 bun run build
 
+# Test the built CLI
+bun run bin --help
+./bin --help
+./bin -p 8080  # Run on different port
+
 # Deploy to Cloudflare Workers
 bun run deploy
+
+# Set environment variables for Cloudflare Workers
+npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY
+npx wrangler env put REASONING_MODEL "z-ai/glm-4.5-air:free"
+
+# Publishing to npm
+npm version patch  # or minor/major
+npm publish
 ```
 
 ## CLI Package
 
 The project builds to an executable CLI via `bun run build`:
-- **Output**: `./bin` - Standalone Node.js executable
-- **Version management**: Reads from `package.json` dynamically
-- **CLI flags**: `-v/--version`, `--help`, `-p/--port`
+- **Output**: `./bin` - Standalone Node.js executable with ES module format
+- **Version management**: Reads dynamically from `package.json`
+- **CLI flags**: 
+  - `-v/--version`: Show version
+  - `--help`: Show help information
+  - `-p/--port <PORT>`: Set server port (default: 3000)
+- **Build process**: Uses Bun's native TypeScript compilation with executable permission
 
 ## Environment Variables
 
 Configure via `wrangler.toml` or environment:
-- `CLAUDE_CODE_PROXY_API_KEY` - Bearer token for upstream API
+
+### Required
+- `CLAUDE_CODE_PROXY_API_KEY` - Bearer token for upstream API authentication
+
+### Optional Configuration
 - `ANTHROPIC_PROXY_BASE_URL` - Upstream API URL (default: https://models.github.ai/inference)
 - `REASONING_MODEL` - Model for reasoning requests (default: openai/gpt-4.1)
 - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1)
-- `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional)
-- `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional)
-- `REASONING_EFFORT` - Reasoning effort level for reasoning model (optional, e.g., "low", "medium", "high")
+- `REASONING_MAX_TOKENS` - Max tokens for reasoning model (overrides request setting)
+- `COMPLETION_MAX_TOKENS` - Max tokens for completion model (overrides request setting)
+- `REASONING_EFFORT` - Reasoning effort level for reasoning model (values: "low", "medium", "high")
 - `DEBUG` - Enable debug logging (default: false)
 - `PORT` - Server port for Node.js mode (default: 3000)
 
+### Model Selection Logic
+- When request contains `thinking: true`, uses `REASONING_MODEL`
+- Otherwise uses `COMPLETION_MODEL`
+- `REASONING_EFFORT` only applies when using reasoning models
+- Max tokens overrides take precedence over request-provided values
+
 ## Deployment Options
 
 ### Cloudflare Workers
@@ -72,10 +110,19 @@ bun run deploy
 ```
 
 ### Docker
-Multi-stage build with production optimization:
+Multi-stage build with production optimization using distroless image:
 ```bash
+# Build image
 docker build -t claude-code-proxy .
-docker run -d -p 3000:3000 claude-code-proxy
+
+# Run with environment variables
+docker run -d -p 3000:3000 \
+  -e CLAUDE_CODE_PROXY_API_KEY=your_token \
+  -e ANTHROPIC_PROXY_BASE_URL=https://models.github.ai/inference \
+  claude-code-proxy
+
+# Development with hot reload (using compose.yml)
+docker compose up
 ```
 
 ### NPM Package
@@ -87,14 +134,28 @@ claude-code-proxy --help
 
 ## GitHub Actions Integration
 
-Service container setup for `@claude` mentions:
+Service container setup for `@claude` mentions in GitHub Actions:
 ```yaml
-services:
-  claude-code-proxy:
-    image: ghcr.io/kiyo-e/claude-code-proxy:latest
-    ports: [3000:3000]
-    env:
-      CLAUDE_CODE_PROXY_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+jobs:
+  review:
+    runs-on: ubuntu-latest
+    services:
+      claude-code-proxy:
+        image: ghcr.io/kiyo-e/claude-code-proxy:latest
+        ports:
+          - 3000:3000
+        env:
+          CLAUDE_CODE_PROXY_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          ANTHROPIC_PROXY_BASE_URL: https://models.github.ai/inference
+          REASONING_MODEL: openai/gpt-4.1
+          COMPLETION_MODEL: openai/gpt-4.1
+    
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run Claude Code
+        run: |
+          export ANTHROPIC_BASE_URL=http://localhost:3000
+          claude "Review the changes in this PR"
 ```
 
 ## Local Usage with Claude Code
@@ -121,6 +182,41 @@ ANTHROPIC_BASE_URL=http://localhost:3000 claude "Review the API code and suggest
 ```bash
 # Using environment file
 echo "ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1" > .env
-echo "REASONING_MODEL=deepseek/deepseek-r1-0528:free" >> .env
+echo "REASONING_MODEL=z-ai/glm-4.5-air:free" >> .env
+echo "COMPLETION_MODEL=z-ai/glm-4.5-air:free" >> .env
+echo "REASONING_EFFORT=high" >> .env
 docker run -d -p 3000:3000 --env-file .env ghcr.io/kiyo-e/claude-code-proxy:latest
-```
\ No newline at end of file
+```
+
+### Development with Local Claude Code
+```bash
+# Start the proxy
+bun run start
+
+# In another terminal, use with Claude Code
+ANTHROPIC_BASE_URL=http://localhost:3000 \
+CLAUDE_CODE_PROXY_API_KEY=your_token \
+claude "Review this code and suggest improvements"
+
+# Debug mode
+DEBUG=true bun run start
+```
+
+## Important Implementation Notes
+
+### Request Flow
+1. Client sends Claude API format request to `/v1/messages`
+2. Proxy transforms to OpenAI format using `transformOpenAIToClaude()`
+3. Request forwarded to upstream API (configured via `ANTHROPIC_PROXY_BASE_URL`)
+4. Response transformed back to Claude format
+5. Streaming responses handled with Server-Sent Events (SSE)
+
+### Error Handling
+- HTTP errors from upstream API: Returns same status code with error details
+- API errors in response body: Returns 500 with error message
+- Dropped parameters tracked in `X-Dropped-Params` header
+
+### Debugging
+- Enable `DEBUG=true` to log request/response payloads
+- Bearer tokens are automatically masked in logs
+- Check health endpoint at `/` for configuration status
\ No newline at end of file
diff --git a/README.md b/README.md
index 1e4e833..6bb04c1 100644
--- a/README.md
+++ b/README.md
@@ -56,8 +56,8 @@ docker run -d -p 3000:3000 -e CLAUDE_CODE_PROXY_API_KEY=your_github_token ghcr.i
 docker run -d -p 3000:3000 \
   -e CLAUDE_CODE_PROXY_API_KEY=your_openrouter_key \
   -e ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1 \
-  -e REASONING_MODEL=deepseek/deepseek-r1-0528:free \
-  -e COMPLETION_MODEL=deepseek/deepseek-r1-0528:free \
+  -e REASONING_MODEL=z-ai/glm-4.5-air:free \
+  -e COMPLETION_MODEL=z-ai/glm-4.5-air:free \
   -e REASONING_EFFORT=high \
   ghcr.io/kiyo-e/claude-code-proxy:latest
 
@@ -72,8 +72,8 @@ ANTHROPIC_BASE_URL=http://localhost:3000 claude "Help me review this code"
 cat > .env << EOF
 CLAUDE_CODE_PROXY_API_KEY=your_api_key
 ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1
-REASONING_MODEL=deepseek/deepseek-r1-0528:free
-COMPLETION_MODEL=deepseek/deepseek-r1-0528:free
+REASONING_MODEL=z-ai/glm-4.5-air:free
+COMPLETION_MODEL=z-ai/glm-4.5-air:free
 REASONING_MAX_TOKENS=4096
 COMPLETION_MAX_TOKENS=2048
 REASONING_EFFORT=high
@@ -131,9 +131,9 @@ npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY
 npx wrangler secret put ANTHROPIC_PROXY_BASE_URL
 # Enter: https://openrouter.ai/api/v1
 npx wrangler secret put REASONING_MODEL
-# Enter: deepseek/deepseek-r1-0528:free
+# Enter: z-ai/glm-4.5-air:free
 npx wrangler secret put COMPLETION_MODEL
-# Enter: deepseek/deepseek-r1-0528:free
+# Enter: z-ai/glm-4.5-air:free
 ```
 
 3. **Test the deployment:**
@@ -206,8 +206,8 @@ npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY
 npx wrangler secret put ANTHROPIC_PROXY_BASE_URL
 
 # Set regular environment variables
-npx wrangler env put REASONING_MODEL "deepseek/deepseek-r1-0528:free"
-npx wrangler env put COMPLETION_MODEL "deepseek/deepseek-r1-0528:free"
+npx wrangler env put REASONING_MODEL "z-ai/glm-4.5-air:free"
+npx wrangler env put COMPLETION_MODEL "z-ai/glm-4.5-air:free"
 npx wrangler env put REASONING_EFFORT "high"
 npx wrangler env put DEBUG "false"
 ```
@@ -216,8 +216,8 @@ Alternatively, configure via `wrangler.toml`:
 
 ```toml
 [env.production.vars]
-REASONING_MODEL = "deepseek/deepseek-r1-0528:free"
-COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free"
+REASONING_MODEL = "z-ai/glm-4.5-air:free"
+COMPLETION_MODEL = "z-ai/glm-4.5-air:free"
 REASONING_EFFORT = "high"
 DEBUG = "false"
 ```
diff --git a/src/index.ts b/src/index.ts
index 240837b..bf20ea3 100755
--- a/src/index.ts
+++ b/src/index.ts
@@ -167,16 +167,10 @@ app.post('/v1/messages', async (c) => {
     }
     // Add tool_choice if present
     if (claudeRequest.tool_choice) {
-      if (claudeRequest.tool_choice.type === 'none') {
-        openaiPayload.tool_choice = 'none'
-      } else if (claudeRequest.tool_choice.type === 'auto') {
-        openaiPayload.tool_choice = 'auto'
-      } else if (claudeRequest.tool_choice.type === 'tool' && claudeRequest.tool_choice.name) {
-        openaiPayload.tool_choice = {
-          type: 'function',
-          function: { name: claudeRequest.tool_choice.name }
-        }
-      }
+      const { type, name } = claudeRequest.tool_choice
+      openaiPayload.tool_choice = 
+        type === 'tool' && name ? { type: 'function', function: { name } } :
+        type === 'none' || type === 'auto' ? type : undefined
     }
     
     if (tools.length > 0) openaiPayload.tools = tools
@@ -225,7 +219,8 @@ app.post('/v1/messages', async (c) => {
       const data: any = await openaiResponse.json()
       debug('OpenAI response:', JSON.stringify(data, null, 2))
       if (data.error) {
-        throw new Error(data.error.message)
+        console.error('OpenAI API returned error in response body:', data.error)
+        return c.json({ error: data.error.message || 'Unknown error' }, 500)
       }
 
       // Create Claude response from OpenAI data
diff --git a/wrangler.toml b/wrangler.toml
index 895acc8..4b426cc 100644
--- a/wrangler.toml
+++ b/wrangler.toml
@@ -5,6 +5,6 @@ compatibility_flags = ["nodejs_compat"]
 
 [vars]
 # DEBUG = "true"
-# REASONING_MODEL = "deepseek/deepseek-r1-0528:free"
-# COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free"
+# REASONING_MODEL = "z-ai/glm-4.5-air:free"
+# COMPLETION_MODEL = "z-ai/glm-4.5-air:free"
 # REASONING_EFFORT = "high"