From b69e48e564e8660d2982bf279f31d7079dda8ff7 Mon Sep 17 00:00:00 2001 From: kiyo-e Date: Wed, 6 Aug 2025 21:41:13 +0900 Subject: [PATCH 1/2] feat: add REASONING_EFFORT configuration option for enhanced reasoning control --- CLAUDE.md | 1 + README.md | 5 + src/index.ts | 260 +++++++++++++++++----------------- src/transform.ts | 353 +++++++++++++++++++++++++++++++++++++++++++++++ wrangler.toml | 1 + 5 files changed, 489 insertions(+), 131 deletions(-) create mode 100644 src/transform.ts diff --git a/CLAUDE.md b/CLAUDE.md index cad6001..ecebcf7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -59,6 +59,7 @@ Configure via `wrangler.toml` or environment: - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1) - `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional) - `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional) +- `REASONING_EFFORT` - Reasoning effort level for reasoning model (optional, e.g., "low", "medium", "high") - `DEBUG` - Enable debug logging (default: false) - `PORT` - Server port for Node.js mode (default: 3000) diff --git a/README.md b/README.md index 78ab55a..1e4e833 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ docker run -d -p 3000:3000 \ -e ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1 \ -e REASONING_MODEL=deepseek/deepseek-r1-0528:free \ -e COMPLETION_MODEL=deepseek/deepseek-r1-0528:free \ + -e REASONING_EFFORT=high \ ghcr.io/kiyo-e/claude-code-proxy:latest # Use with Claude Code @@ -75,6 +76,7 @@ REASONING_MODEL=deepseek/deepseek-r1-0528:free COMPLETION_MODEL=deepseek/deepseek-r1-0528:free REASONING_MAX_TOKENS=4096 COMPLETION_MAX_TOKENS=2048 +REASONING_EFFORT=high DEBUG=false EOF @@ -190,6 +192,7 @@ npm publish - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1) - `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional) - `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional) +- `REASONING_EFFORT` - Reasoning effort level for reasoning model (optional, e.g., "low", "medium", "high") - `DEBUG` - Enable debug logging (default: false) - `PORT` - Server port for CLI mode (default: 3000) @@ -205,6 +208,7 @@ npx wrangler secret put ANTHROPIC_PROXY_BASE_URL # Set regular environment variables npx wrangler env put REASONING_MODEL "deepseek/deepseek-r1-0528:free" npx wrangler env put COMPLETION_MODEL "deepseek/deepseek-r1-0528:free" +npx wrangler env put REASONING_EFFORT "high" npx wrangler env put DEBUG "false" ``` @@ -214,6 +218,7 @@ Alternatively, configure via `wrangler.toml`: [env.production.vars] REASONING_MODEL = "deepseek/deepseek-r1-0528:free" COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free" +REASONING_EFFORT = "high" DEBUG = "false" ``` diff --git a/src/index.ts b/src/index.ts index 0e86b1e..240837b 100755 --- a/src/index.ts +++ b/src/index.ts @@ -1,8 +1,10 @@ import { Hono } from 'hono' import { env } from 'hono/adapter' +import { transformOpenAIToClaude, transformClaudeToOpenAI, removeUriFormat } from './transform' const app = new Hono<{ Bindings: { + REASONING_EFFORT?: string ANTHROPIC_PROXY_BASE_URL?: string CLAUDE_CODE_PROXY_API_KEY?: string REASONING_MODEL?: string @@ -18,7 +20,7 @@ const defaultModel = 'openai/gpt-4.1' // Health check endpoint app.get('/', (c) => { - const { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS} = env(c) + const { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS, REASONING_EFFORT } = env(c) return c.json({ status: 'ok', @@ -28,14 +30,15 @@ app.get('/', (c) => { REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, - COMPLETION_MAX_TOKENS + COMPLETION_MAX_TOKENS, + REASONING_EFFORT } }) }) app.post('/v1/messages', async (c) => { // Get environment variables from context - const { CLAUDE_CODE_PROXY_API_KEY, ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS, DEBUG } = env(c) + const { CLAUDE_CODE_PROXY_API_KEY, ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS, DEBUG, REASONING_EFFORT } = env(c) try { const baseUrl = ANTHROPIC_PROXY_BASE_URL || 'https://models.github.ai/inference' @@ -54,106 +57,75 @@ app.post('/v1/messages', async (c) => { return value.replace(/Bearer\s+(\S+)/g, 'Bearer ********') } - const payload = await c.req.json() - - // Helper to normalize a message's content - const normalizeContent = (content: any): string | null => { - if (typeof content === 'string') return content - if (Array.isArray(content)) { - return content.map(item => item.text).join(' ') - } - return null - } + const claudePayload = await c.req.json() + + // Transform Claude format to OpenAI format + const { claudeRequest, droppedParams } = transformOpenAIToClaude(claudePayload) - // Build messages array for the OpenAI payload + // Convert messages from Claude to OpenAI format for upstream API const messages: any[] = [] - if (payload.system && Array.isArray(payload.system)) { - payload.system.forEach((sysMsg: any) => { - const normalized = normalizeContent(sysMsg.text || sysMsg.content) - if (normalized) { - messages.push({ - role: 'system', - content: normalized - }) - } + + // Add system messages + if (claudeRequest.system) { + messages.push({ + role: 'system', + content: claudeRequest.system }) } - - // Then add user (or other) messages - if (payload.messages && Array.isArray(payload.messages)) { - payload.messages.forEach((msg: any) => { - // Skip messages with unsupported roles for some APIs - if (!['user', 'assistant', 'system', 'tool', 'function'].includes(msg.role)) { - console.warn(`Skipping message with unsupported role: ${msg.role}`) - return - } - const toolCalls = (Array.isArray(msg.content) ? msg.content : []) - .filter((item: any) => item.type === 'tool_use') - .map((toolCall: any) => ({ - id: toolCall.id, - type: 'function', - function: { - name: toolCall.name, - arguments: JSON.stringify(toolCall.input), + + // Process regular messages + if (claudeRequest.messages && Array.isArray(claudeRequest.messages)) { + for (const msg of claudeRequest.messages) { + if (msg.role === 'user' || msg.role === 'assistant') { + // Handle content blocks + if (Array.isArray(msg.content)) { + const textParts: string[] = [] + const toolCalls: any[] = [] + const toolResults: any[] = [] + + for (const block of msg.content) { + if (block.type === 'text') { + textParts.push(block.text) + } else if (block.type === 'tool_use') { + toolCalls.push({ + id: block.id, + type: 'function', + function: { + name: block.name, + arguments: JSON.stringify(block.input) + } + }) + } else if (block.type === 'tool_result') { + toolResults.push({ + role: 'tool', + content: block.content || '', + tool_call_id: block.tool_use_id + }) + } } - })) - - const newMsg: any = { role: msg.role } - const normalized = normalizeContent(msg.content) - if (normalized) newMsg.content = normalized - if (toolCalls.length > 0) newMsg.tool_calls = toolCalls - if (newMsg.content || newMsg.tool_calls) messages.push(newMsg) - - if (Array.isArray(msg.content)) { - const toolResults = msg.content.filter((item: any) => item.type === 'tool_result') - toolResults.forEach((toolResult: any) => { + + // Add main message if it has content + if (textParts.length > 0 || toolCalls.length > 0) { + const openAIMsg: any = { role: msg.role } + if (textParts.length > 0) openAIMsg.content = textParts.join(' ') + if (toolCalls.length > 0) openAIMsg.tool_calls = toolCalls + messages.push(openAIMsg) + } + + // Add tool result messages + messages.push(...toolResults) + } else if (typeof msg.content === 'string') { messages.push({ - role: 'tool', - content: toolResult.text || toolResult.content, - tool_call_id: toolResult.tool_use_id, + role: msg.role, + content: msg.content }) - }) - } - }) - } - - // Helper function to recursively traverse JSON schema and remove format: 'uri' - const removeUriFormat = (schema: any): any => { - if (!schema || typeof schema !== 'object') return schema - - // If this is a string type with uri format, remove the format - if (schema.type === 'string' && schema.format === 'uri') { - const { format, ...rest } = schema - return rest - } - - // Handle array of schemas - if (Array.isArray(schema)) { - return schema.map(item => removeUriFormat(item)) - } - - // Recursively process all properties - const result: any = {} - for (const key in schema) { - if (key === 'properties' && typeof schema[key] === 'object') { - result[key] = {} - for (const propKey in schema[key]) { - result[key][propKey] = removeUriFormat(schema[key][propKey]) } - } else if (key === 'items' && typeof schema[key] === 'object') { - result[key] = removeUriFormat(schema[key]) - } else if (key === 'additionalProperties' && typeof schema[key] === 'object') { - result[key] = removeUriFormat(schema[key]) - } else if (['anyOf', 'allOf', 'oneOf'].includes(key) && Array.isArray(schema[key])) { - result[key] = schema[key].map((item: any) => removeUriFormat(item)) - } else { - result[key] = removeUriFormat(schema[key]) } } - return result } - const tools = (payload.tools || []) + // Process tools + const tools = (claudeRequest.tools || []) .filter((tool: any) => !['BatchTool'].includes(tool.name)) .map((tool: any) => ({ type: 'function', @@ -165,19 +137,21 @@ app.post('/v1/messages', async (c) => { })) const openaiPayload: any = { - model: payload.thinking ? models.reasoning : models.completion, + // Existing fields kept as before + + model: claudePayload.thinking ? models.reasoning : models.completion, messages, - temperature: payload.temperature !== undefined ? payload.temperature : 1, - stream: payload.stream === true, + temperature: claudeRequest.temperature !== undefined ? claudeRequest.temperature : 1, + stream: claudeRequest.stream === true, } // Only add max_tokens if it's provided and not null/undefined - if (payload.max_tokens !== null && payload.max_tokens !== undefined) { - openaiPayload.max_tokens = payload.max_tokens + if (claudeRequest.max_tokens !== null && claudeRequest.max_tokens !== undefined) { + openaiPayload.max_tokens = claudeRequest.max_tokens } // Apply max_tokens override if configured - const selectedModel = payload.thinking ? models.reasoning : models.completion + const selectedModel = claudePayload.thinking ? models.reasoning : models.completion const reasoningMaxTokens = REASONING_MAX_TOKENS ? parseInt(REASONING_MAX_TOKENS) : undefined const completionMaxTokens = COMPLETION_MAX_TOKENS ? parseInt(COMPLETION_MAX_TOKENS) : undefined @@ -186,9 +160,28 @@ app.post('/v1/messages', async (c) => { } else if (selectedModel === models.completion && completionMaxTokens) { openaiPayload.max_tokens = completionMaxTokens } + + // Apply reasoning_effort if configured and model is reasoning + if (selectedModel === models.reasoning && REASONING_EFFORT) { + openaiPayload.reasoning_effort = REASONING_EFFORT + } + // Add tool_choice if present + if (claudeRequest.tool_choice) { + if (claudeRequest.tool_choice.type === 'none') { + openaiPayload.tool_choice = 'none' + } else if (claudeRequest.tool_choice.type === 'auto') { + openaiPayload.tool_choice = 'auto' + } else if (claudeRequest.tool_choice.type === 'tool' && claudeRequest.tool_choice.name) { + openaiPayload.tool_choice = { + type: 'function', + function: { name: claudeRequest.tool_choice.name } + } + } + } + if (tools.length > 0) openaiPayload.tools = tools - debug('OpenAI payload:', openaiPayload) + debug('OpenAI payload:', JSON.stringify(openaiPayload, null, 2)) const headers: Record = { 'Content-Type': 'application/json' @@ -214,63 +207,67 @@ app.post('/v1/messages', async (c) => { body: JSON.stringify(openaiPayload) }) + // Add X-Dropped-Params header if any params were dropped + if (droppedParams.length > 0) { + c.header('X-Dropped-Params', droppedParams.join(', ')) + } + if (!openaiResponse.ok) { const errorDetails = await openaiResponse.text() console.error(`OpenAI API error (${openaiResponse.status}):`, errorDetails) console.error('Failed request payload:', JSON.stringify(openaiPayload, null, 2)) + console.error('Dropped parameters:', droppedParams) return c.json({ error: errorDetails }, openaiResponse.status as any) } // If stream is not enabled, process the complete response if (!openaiPayload.stream) { const data: any = await openaiResponse.json() - debug('OpenAI response:', data) + debug('OpenAI response:', JSON.stringify(data, null, 2)) if (data.error) { throw new Error(data.error.message) } + // Create Claude response from OpenAI data const choice = data.choices[0] const openaiMessage = choice.message - - // Map finish_reason to anthropic stop_reason - const stopReason = mapStopReason(choice.finish_reason) - const toolCalls = openaiMessage.tool_calls || [] - - // Create a message id - const messageId = data.id - ? data.id.replace('chatcmpl', 'msg') - : 'msg_' + Math.random().toString(36).substring(2, 26) - - const anthropicResponse = { - content: [ - { - text: openaiMessage.content, - type: 'text' - }, - ...toolCalls.map((toolCall: any) => ({ + + // Build content blocks + const content: any[] = [] + if (openaiMessage.content) { + content.push({ + type: 'text', + text: openaiMessage.content + }) + } + + if (openaiMessage.tool_calls) { + for (const toolCall of openaiMessage.tool_calls) { + content.push({ type: 'tool_use', id: toolCall.id, name: toolCall.function.name, - input: JSON.parse(toolCall.function.arguments), - })), - ], - id: messageId, + input: JSON.parse(toolCall.function.arguments) + }) + } + } + + const claudeResponse = { + id: data.id ? data.id.replace('chatcmpl', 'msg') : 'msg_' + Math.random().toString(36).substring(2, 26), + type: 'message', + role: 'assistant', model: openaiPayload.model, - role: openaiMessage.role, - stop_reason: stopReason, + content, + stop_reason: mapStopReason(choice.finish_reason), stop_sequence: null, - type: 'message', usage: { - input_tokens: data.usage - ? data.usage.prompt_tokens - : messages.reduce((acc, msg) => acc + (msg.content?.split(' ').length || 0), 0), - output_tokens: data.usage - ? data.usage.completion_tokens - : openaiMessage.content.split(' ').length, + input_tokens: data.usage?.prompt_tokens || 0, + output_tokens: data.usage?.completion_tokens || 0 } } - - return c.json(anthropicResponse) + + debug('Claude response:', JSON.stringify(claudeResponse, null, 2)) + return c.json(claudeResponse) } // Streaming response using Server-Sent Events @@ -279,6 +276,7 @@ app.post('/v1/messages', async (c) => { const encoder = new TextEncoder() const sendSSE = (event: string, data: any) => { + debug('Sending SSE:', { event, data: JSON.stringify(data, null, 2) }) const sseMessage = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n` controller.enqueue(encoder.encode(sseMessage)) } diff --git a/src/transform.ts b/src/transform.ts new file mode 100644 index 0000000..9765695 --- /dev/null +++ b/src/transform.ts @@ -0,0 +1,353 @@ +/** + * Transform module for converting between OpenAI and Claude API formats + * Design document reference: https://github.com/kiyo-e/claude-code-proxy/issues + * Related classes: src/index.ts - Main proxy service implementation + */ + +// OpenAI-specific parameters that Claude doesn't support +const DROP_KEYS = [ + 'n', + 'presence_penalty', + 'frequency_penalty', + 'best_of', + 'logit_bias', + 'seed', + 'stream_options', + 'logprobs', + 'top_logprobs', + 'user', + 'response_format', + 'service_tier', + 'parallel_tool_calls', + 'functions', + 'function_call' +] + +interface DroppedParams { + keys: string[] +} + +/** + * Sanitize root-level parameters from OpenAI to Claude format + */ +export function sanitizeRoot(req: any): DroppedParams { + const dropped: string[] = [] + + // Rename stop → stop_sequences + if (req.stop !== undefined) { + req.stop_sequences = Array.isArray(req.stop) ? req.stop : [req.stop] + delete req.stop + + } + + // Convert user → metadata.user_id + if (req.user) { + req.metadata = { ...req.metadata, user_id: req.user } + dropped.push('user') + delete req.user + } + + // Drop all unsupported OpenAI parameters + for (const key of DROP_KEYS) { + if (key in req) { + dropped.push(key) + delete req[key] + } + } + + // Ensure max_tokens is set (Claude requirement) + if (req.max_tokens == null) { + req.max_tokens = 4096 // Default max tokens + } + + return { keys: dropped } +} + +/** + * Map OpenAI tools/functions to Claude tools format + */ +export function mapTools(req: any): void { + // Combine tools and functions into a unified array + const openAITools = (req.tools ?? []) + .concat((req.functions ?? []).map((f: any) => ({ + type: 'function', + function: f + }))) + + // Convert to Claude tool format + req.tools = openAITools.map((t: any) => ({ + name: t.function?.name ?? t.name, + description: t.function?.description ?? t.description, + input_schema: removeUriFormat(t.function?.parameters ?? t.input_schema) + })) + + // Clean up original fields + delete req.functions +} + +/** + * Map OpenAI function_call to Claude tool_choice + */ +export function mapToolChoice(req: any): void { + if (!req.function_call) return + + const fc = req.function_call + + // Convert to Claude tool_choice format + if (typeof fc === 'string') { + // Handle string values: 'auto', 'none' + req.tool_choice = { + type: fc === 'none' ? 'none' : 'auto' + } + } else if (fc && typeof fc === 'object' && fc.name) { + // Handle specific function call + req.tool_choice = { + type: 'tool', + name: fc.name + } + } + + delete req.function_call +} + +/** + * Transform messages from OpenAI to Claude format + */ +export function transformMessages(req: any): void { + if (!req.messages || !Array.isArray(req.messages)) return + + const transformedMessages: any[] = [] + let systemMessages: string[] = [] + + for (const msg of req.messages) { + // Extract system messages + if (msg.role === 'system') { + systemMessages.push(msg.content) + continue + } + + // Handle function role → user role with tool_result + if (msg.role === 'function') { + transformedMessages.push({ + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: msg.tool_call_id || msg.name, + content: msg.content + }] + }) + continue + } + + // Handle assistant messages with function_call + if (msg.role === 'assistant' && msg.function_call) { + const content: any[] = [] + + // Add text content if present + if (msg.content) { + content.push({ + type: 'text', + text: msg.content + }) + } + + // Add tool_use block + content.push({ + type: 'tool_use', + id: msg.function_call.id || `call_${Math.random().toString(36).substring(2, 10)}`, + name: msg.function_call.name, + input: typeof msg.function_call.arguments === 'string' + ? JSON.parse(msg.function_call.arguments) + : msg.function_call.arguments + }) + + transformedMessages.push({ + role: 'assistant', + content + }) + continue + } + + // Handle assistant messages with tool_calls + if (msg.role === 'assistant' && msg.tool_calls) { + const content: any[] = [] + + // Add text content if present + if (msg.content) { + content.push({ + type: 'text', + text: msg.content + }) + } + + // Add tool_use blocks + for (const toolCall of msg.tool_calls) { + content.push({ + type: 'tool_use', + id: toolCall.id, + name: toolCall.function.name, + input: typeof toolCall.function.arguments === 'string' + ? JSON.parse(toolCall.function.arguments) + : toolCall.function.arguments + }) + } + + transformedMessages.push({ + role: 'assistant', + content + }) + continue + } + + // Handle tool role → user role with tool_result + if (msg.role === 'tool') { + transformedMessages.push({ + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: msg.tool_call_id, + content: msg.content + }] + }) + continue + } + + // Pass through other messages + transformedMessages.push(msg) + } + + // Set system message (Claude takes a single system string, not array) + if (systemMessages.length > 0) { + req.system = systemMessages.join('\n\n') + } + + req.messages = transformedMessages +} + +/** + * Recursively remove format: 'uri' from JSON schemas + */ +export function removeUriFormat(schema: any): any { + if (!schema || typeof schema !== 'object') return schema + + // If this is a string type with uri format, remove the format + if (schema.type === 'string' && schema.format === 'uri') { + const { format, ...rest } = schema + return rest + } + + // Handle array of schemas + if (Array.isArray(schema)) { + return schema.map(item => removeUriFormat(item)) + } + + // Recursively process all properties + const result: any = {} + for (const key in schema) { + if (key === 'properties' && typeof schema[key] === 'object') { + result[key] = {} + for (const propKey in schema[key]) { + result[key][propKey] = removeUriFormat(schema[key][propKey]) + } + } else if (key === 'items' && typeof schema[key] === 'object') { + result[key] = removeUriFormat(schema[key]) + } else if (key === 'additionalProperties' && typeof schema[key] === 'object') { + result[key] = removeUriFormat(schema[key]) + } else if (['anyOf', 'allOf', 'oneOf'].includes(key) && Array.isArray(schema[key])) { + result[key] = schema[key].map((item: any) => removeUriFormat(item)) + } else { + result[key] = removeUriFormat(schema[key]) + } + } + return result +} + +/** + * Main transformation function from OpenAI to Claude format + */ +export function transformOpenAIToClaude(openAIRequest: any): { claudeRequest: any, droppedParams: string[] } { + // Deep clone to avoid mutating original + const req = JSON.parse(JSON.stringify(openAIRequest)) + + // Apply transformations in order + const dropped = sanitizeRoot(req) + mapTools(req) + mapToolChoice(req) + transformMessages(req) + + return { + claudeRequest: req, + droppedParams: dropped.keys + } +} + +/** + * Transform Claude response back to OpenAI format + */ +export function transformClaudeToOpenAI(claudeResponse: any, model: string): any { + // Handle non-streaming response + const openAIResponse: any = { + id: claudeResponse.id || `chatcmpl-${Math.random().toString(36).substring(2, 15)}`, + object: 'chat.completion', + created: Math.floor(Date.now() / 1000), + model: model, + choices: [], + usage: { + prompt_tokens: claudeResponse.usage?.input_tokens || 0, + completion_tokens: claudeResponse.usage?.output_tokens || 0, + total_tokens: (claudeResponse.usage?.input_tokens || 0) + (claudeResponse.usage?.output_tokens || 0) + } + } + + // Build the message from Claude content blocks + const message: any = { + role: 'assistant', + content: null + } + + const textParts: string[] = [] + const toolCalls: any[] = [] + + if (Array.isArray(claudeResponse.content)) { + for (const block of claudeResponse.content) { + if (block.type === 'text') { + textParts.push(block.text) + } else if (block.type === 'tool_use') { + toolCalls.push({ + id: block.id, + type: 'function', + function: { + name: block.name, + arguments: JSON.stringify(block.input) + } + }) + } + } + } else if (typeof claudeResponse.content === 'string') { + textParts.push(claudeResponse.content) + } + + // Set content and tool_calls + message.content = textParts.join('') + if (toolCalls.length > 0) { + message.tool_calls = toolCalls + } + + // Map stop_reason to finish_reason + let finishReason = 'stop' + if (claudeResponse.stop_reason === 'tool_use') { + finishReason = 'tool_calls' + } else if (claudeResponse.stop_reason === 'max_tokens') { + finishReason = 'length' + } else if (claudeResponse.stop_reason === 'end_turn') { + finishReason = 'stop' + } + + openAIResponse.choices.push({ + index: 0, + message, + finish_reason: finishReason + }) + + return openAIResponse +} \ No newline at end of file diff --git a/wrangler.toml b/wrangler.toml index 933d81c..895acc8 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -7,3 +7,4 @@ compatibility_flags = ["nodejs_compat"] # DEBUG = "true" # REASONING_MODEL = "deepseek/deepseek-r1-0528:free" # COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free" +# REASONING_EFFORT = "high" From 3d3140673136d2b2cb77a7399f16f400b283ec4e Mon Sep 17 00:00:00 2001 From: kiyo-e Date: Wed, 6 Aug 2025 22:09:25 +0900 Subject: [PATCH 2/2] feat: update reasoning and completion models to z-ai/glm-4.5-air for improved performance --- .dev.vars.example | 4 +- .env.example | 4 +- CLAUDE.md | 146 ++++++++++++++++++++++++++++++++++++++-------- README.md | 20 +++---- src/index.ts | 17 ++---- wrangler.toml | 4 +- 6 files changed, 143 insertions(+), 52 deletions(-) diff --git a/.dev.vars.example b/.dev.vars.example index 3c67657..a0aa6b5 100644 --- a/.dev.vars.example +++ b/.dev.vars.example @@ -6,8 +6,8 @@ # ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1 # Model configuration (optional) -# REASONING_MODEL=deepseek/deepseek-r1-0528:free -# COMPLETION_MODEL=deepseek/deepseek-r1-0528:free +# REASONING_MODEL=z-ai/glm-4.5-air:free +# COMPLETION_MODEL=z-ai/glm-4.5-air:free # Enable debug logging (optional) # DEBUG=true diff --git a/.env.example b/.env.example index 650104c..6da4575 100644 --- a/.env.example +++ b/.env.example @@ -6,8 +6,8 @@ # ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1 # Model configuration (optional) -# REASONING_MODEL=deepseek/deepseek-r1-0528:free -# COMPLETION_MODEL=deepseek/deepseek-r1-0528:free +# REASONING_MODEL=z-ai/glm-4.5-air:free +# COMPLETION_MODEL=z-ai/glm-4.5-air:free # Enable debug logging (optional) # DEBUG=true diff --git a/CLAUDE.md b/CLAUDE.md index ecebcf7..53299dd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,17 +9,28 @@ This is a Claude Code proxy service that translates between Anthropic's Claude A ## Architecture ### Core Components -- **`src/index.ts`** - Main Hono application with API proxy logic +- **`src/index.ts`** - Main Hono application with API proxy logic (src/index.ts:39-516) - **`src/server.ts`** - Node.js server wrapper for CLI distribution with argument parsing +- **`src/transform.ts`** - API format transformation utilities between OpenAI and Claude formats -### API Translation Logic -The proxy service handles (in `src/index.ts:32-450`): +### API Translation Logic (src/index.ts:39-516) +The proxy handles `/v1/messages` POST requests and transforms between formats: - **Message normalization**: Converts Claude's nested content arrays to OpenAI's flat structure - **Tool call mapping**: Transforms Claude's `tool_use`/`tool_result` to OpenAI's `tool_calls`/`tool` roles - **Schema transformation**: Removes `format: 'uri'` constraints from JSON schemas for compatibility -- **Model routing**: Dynamically selects models based on request type (reasoning vs completion) +- **Model routing**: Dynamically selects models based on `thinking` flag in request - **Streaming support**: Handles both streaming and non-streaming responses with SSE +### Transformation Module (src/transform.ts) +Key exported functions: +- `transformOpenAIToClaude()`: Main transformation from OpenAI to Claude format +- `sanitizeRoot()`: Drops unsupported OpenAI parameters and ensures Claude requirements +- `mapTools()`: Converts OpenAI tools/functions to Claude tool format +- `mapToolChoice()`: Maps OpenAI function_call to Claude tool_choice +- `transformMessages()`: Converts message roles and content blocks +- `removeUriFormat()`: Recursively removes format:'uri' from JSON schemas +- `transformClaudeToOpenAI()`: Converts Claude responses back to OpenAI format + ### Dual Runtime Support - **Cloudflare Workers**: Uses Hono's built-in fetch handler (`src/index.ts`) - **Node.js**: Uses `@hono/node-server` adapter (`src/server.ts`) @@ -30,39 +41,66 @@ The proxy service handles (in `src/index.ts:32-450`): # Install dependencies bun install -# Local development server (hot reload) +# Local development server (hot reload on port 3000) bun run start -# Cloudflare Workers development +# Cloudflare Workers development (local testing) bun run dev -# Build CLI package +# Build CLI binary to ./bin bun run build +# Test the built CLI +bun run bin --help +./bin --help +./bin -p 8080 # Run on different port + # Deploy to Cloudflare Workers bun run deploy + +# Set environment variables for Cloudflare Workers +npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY +npx wrangler env put REASONING_MODEL "z-ai/glm-4.5-air:free" + +# Publishing to npm +npm version patch # or minor/major +npm publish ``` ## CLI Package The project builds to an executable CLI via `bun run build`: -- **Output**: `./bin` - Standalone Node.js executable -- **Version management**: Reads from `package.json` dynamically -- **CLI flags**: `-v/--version`, `--help`, `-p/--port` +- **Output**: `./bin` - Standalone Node.js executable with ES module format +- **Version management**: Reads dynamically from `package.json` +- **CLI flags**: + - `-v/--version`: Show version + - `--help`: Show help information + - `-p/--port `: Set server port (default: 3000) +- **Build process**: Uses Bun's native TypeScript compilation with executable permission ## Environment Variables Configure via `wrangler.toml` or environment: -- `CLAUDE_CODE_PROXY_API_KEY` - Bearer token for upstream API + +### Required +- `CLAUDE_CODE_PROXY_API_KEY` - Bearer token for upstream API authentication + +### Optional Configuration - `ANTHROPIC_PROXY_BASE_URL` - Upstream API URL (default: https://models.github.ai/inference) - `REASONING_MODEL` - Model for reasoning requests (default: openai/gpt-4.1) - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1) -- `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional) -- `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional) -- `REASONING_EFFORT` - Reasoning effort level for reasoning model (optional, e.g., "low", "medium", "high") +- `REASONING_MAX_TOKENS` - Max tokens for reasoning model (overrides request setting) +- `COMPLETION_MAX_TOKENS` - Max tokens for completion model (overrides request setting) +- `REASONING_EFFORT` - Reasoning effort level for reasoning model (values: "low", "medium", "high") - `DEBUG` - Enable debug logging (default: false) - `PORT` - Server port for Node.js mode (default: 3000) +### Model Selection Logic +- When request contains `thinking: true`, uses `REASONING_MODEL` +- Otherwise uses `COMPLETION_MODEL` +- `REASONING_EFFORT` only applies when using reasoning models +- Max tokens overrides take precedence over request-provided values + ## Deployment Options ### Cloudflare Workers @@ -72,10 +110,19 @@ bun run deploy ``` ### Docker -Multi-stage build with production optimization: +Multi-stage build with production optimization using distroless image: ```bash +# Build image docker build -t claude-code-proxy . -docker run -d -p 3000:3000 claude-code-proxy + +# Run with environment variables +docker run -d -p 3000:3000 \ + -e CLAUDE_CODE_PROXY_API_KEY=your_token \ + -e ANTHROPIC_PROXY_BASE_URL=https://models.github.ai/inference \ + claude-code-proxy + +# Development with hot reload (using compose.yml) +docker compose up ``` ### NPM Package @@ -87,14 +134,28 @@ claude-code-proxy --help ## GitHub Actions Integration -Service container setup for `@claude` mentions: +Service container setup for `@claude` mentions in GitHub Actions: ```yaml -services: - claude-code-proxy: - image: ghcr.io/kiyo-e/claude-code-proxy:latest - ports: [3000:3000] - env: - CLAUDE_CODE_PROXY_API_KEY: ${{ secrets.GITHUB_TOKEN }} +jobs: + review: + runs-on: ubuntu-latest + services: + claude-code-proxy: + image: ghcr.io/kiyo-e/claude-code-proxy:latest + ports: + - 3000:3000 + env: + CLAUDE_CODE_PROXY_API_KEY: ${{ secrets.GITHUB_TOKEN }} + ANTHROPIC_PROXY_BASE_URL: https://models.github.ai/inference + REASONING_MODEL: openai/gpt-4.1 + COMPLETION_MODEL: openai/gpt-4.1 + + steps: + - uses: actions/checkout@v4 + - name: Run Claude Code + run: | + export ANTHROPIC_BASE_URL=http://localhost:3000 + claude "Review the changes in this PR" ``` ## Local Usage with Claude Code @@ -121,6 +182,41 @@ ANTHROPIC_BASE_URL=http://localhost:3000 claude "Review the API code and suggest ```bash # Using environment file echo "ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1" > .env -echo "REASONING_MODEL=deepseek/deepseek-r1-0528:free" >> .env +echo "REASONING_MODEL=z-ai/glm-4.5-air:free" >> .env +echo "COMPLETION_MODEL=z-ai/glm-4.5-air:free" >> .env +echo "REASONING_EFFORT=high" >> .env docker run -d -p 3000:3000 --env-file .env ghcr.io/kiyo-e/claude-code-proxy:latest -``` \ No newline at end of file +``` + +### Development with Local Claude Code +```bash +# Start the proxy +bun run start + +# In another terminal, use with Claude Code +ANTHROPIC_BASE_URL=http://localhost:3000 \ +CLAUDE_CODE_PROXY_API_KEY=your_token \ +claude "Review this code and suggest improvements" + +# Debug mode +DEBUG=true bun run start +``` + +## Important Implementation Notes + +### Request Flow +1. Client sends Claude API format request to `/v1/messages` +2. Proxy transforms to OpenAI format using `transformOpenAIToClaude()` +3. Request forwarded to upstream API (configured via `ANTHROPIC_PROXY_BASE_URL`) +4. Response transformed back to Claude format +5. Streaming responses handled with Server-Sent Events (SSE) + +### Error Handling +- HTTP errors from upstream API: Returns same status code with error details +- API errors in response body: Returns 500 with error message +- Dropped parameters tracked in `X-Dropped-Params` header + +### Debugging +- Enable `DEBUG=true` to log request/response payloads +- Bearer tokens are automatically masked in logs +- Check health endpoint at `/` for configuration status \ No newline at end of file diff --git a/README.md b/README.md index 1e4e833..6bb04c1 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,8 @@ docker run -d -p 3000:3000 -e CLAUDE_CODE_PROXY_API_KEY=your_github_token ghcr.i docker run -d -p 3000:3000 \ -e CLAUDE_CODE_PROXY_API_KEY=your_openrouter_key \ -e ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1 \ - -e REASONING_MODEL=deepseek/deepseek-r1-0528:free \ - -e COMPLETION_MODEL=deepseek/deepseek-r1-0528:free \ + -e REASONING_MODEL=z-ai/glm-4.5-air:free \ + -e COMPLETION_MODEL=z-ai/glm-4.5-air:free \ -e REASONING_EFFORT=high \ ghcr.io/kiyo-e/claude-code-proxy:latest @@ -72,8 +72,8 @@ ANTHROPIC_BASE_URL=http://localhost:3000 claude "Help me review this code" cat > .env << EOF CLAUDE_CODE_PROXY_API_KEY=your_api_key ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1 -REASONING_MODEL=deepseek/deepseek-r1-0528:free -COMPLETION_MODEL=deepseek/deepseek-r1-0528:free +REASONING_MODEL=z-ai/glm-4.5-air:free +COMPLETION_MODEL=z-ai/glm-4.5-air:free REASONING_MAX_TOKENS=4096 COMPLETION_MAX_TOKENS=2048 REASONING_EFFORT=high @@ -131,9 +131,9 @@ npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY npx wrangler secret put ANTHROPIC_PROXY_BASE_URL # Enter: https://openrouter.ai/api/v1 npx wrangler secret put REASONING_MODEL -# Enter: deepseek/deepseek-r1-0528:free +# Enter: z-ai/glm-4.5-air:free npx wrangler secret put COMPLETION_MODEL -# Enter: deepseek/deepseek-r1-0528:free +# Enter: z-ai/glm-4.5-air:free ``` 3. **Test the deployment:** @@ -206,8 +206,8 @@ npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY npx wrangler secret put ANTHROPIC_PROXY_BASE_URL # Set regular environment variables -npx wrangler env put REASONING_MODEL "deepseek/deepseek-r1-0528:free" -npx wrangler env put COMPLETION_MODEL "deepseek/deepseek-r1-0528:free" +npx wrangler env put REASONING_MODEL "z-ai/glm-4.5-air:free" +npx wrangler env put COMPLETION_MODEL "z-ai/glm-4.5-air:free" npx wrangler env put REASONING_EFFORT "high" npx wrangler env put DEBUG "false" ``` @@ -216,8 +216,8 @@ Alternatively, configure via `wrangler.toml`: ```toml [env.production.vars] -REASONING_MODEL = "deepseek/deepseek-r1-0528:free" -COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free" +REASONING_MODEL = "z-ai/glm-4.5-air:free" +COMPLETION_MODEL = "z-ai/glm-4.5-air:free" REASONING_EFFORT = "high" DEBUG = "false" ``` diff --git a/src/index.ts b/src/index.ts index 240837b..bf20ea3 100755 --- a/src/index.ts +++ b/src/index.ts @@ -167,16 +167,10 @@ app.post('/v1/messages', async (c) => { } // Add tool_choice if present if (claudeRequest.tool_choice) { - if (claudeRequest.tool_choice.type === 'none') { - openaiPayload.tool_choice = 'none' - } else if (claudeRequest.tool_choice.type === 'auto') { - openaiPayload.tool_choice = 'auto' - } else if (claudeRequest.tool_choice.type === 'tool' && claudeRequest.tool_choice.name) { - openaiPayload.tool_choice = { - type: 'function', - function: { name: claudeRequest.tool_choice.name } - } - } + const { type, name } = claudeRequest.tool_choice + openaiPayload.tool_choice = + type === 'tool' && name ? { type: 'function', function: { name } } : + type === 'none' || type === 'auto' ? type : undefined } if (tools.length > 0) openaiPayload.tools = tools @@ -225,7 +219,8 @@ app.post('/v1/messages', async (c) => { const data: any = await openaiResponse.json() debug('OpenAI response:', JSON.stringify(data, null, 2)) if (data.error) { - throw new Error(data.error.message) + console.error('OpenAI API returned error in response body:', data.error) + return c.json({ error: data.error.message || 'Unknown error' }, 500) } // Create Claude response from OpenAI data diff --git a/wrangler.toml b/wrangler.toml index 895acc8..4b426cc 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -5,6 +5,6 @@ compatibility_flags = ["nodejs_compat"] [vars] # DEBUG = "true" -# REASONING_MODEL = "deepseek/deepseek-r1-0528:free" -# COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free" +# REASONING_MODEL = "z-ai/glm-4.5-air:free" +# COMPLETION_MODEL = "z-ai/glm-4.5-air:free" # REASONING_EFFORT = "high"