From 97fa164bb2f663e1138f7d462c0ca89df5d236e9 Mon Sep 17 00:00:00 2001
From: kiyo-e <kiyo-e@users.noreply.github.com>
Date: Tue, 3 Jun 2025 23:54:26 +0900
Subject: [PATCH] fix: add max tokens configuration for reasoning and
 completion models

---
 CLAUDE.md    |   2 +
 README.md    | 146 +++++++++++++++++++++++++++++++++++++++++++++++++--
 src/index.ts |  21 ++++++--
 3 files changed, 162 insertions(+), 7 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index e930e29..cad6001 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -57,6 +57,8 @@ Configure via `wrangler.toml` or environment:
 - `ANTHROPIC_PROXY_BASE_URL` - Upstream API URL (default: https://models.github.ai/inference)
 - `REASONING_MODEL` - Model for reasoning requests (default: openai/gpt-4.1)
 - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1)
+- `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional)
+- `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional)
 - `DEBUG` - Enable debug logging (default: false)
 - `PORT` - Server port for Node.js mode (default: 3000)
 
diff --git a/README.md b/README.md
index ee64839..e26d859 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,8 @@ CLAUDE_CODE_PROXY_API_KEY=your_api_key
 ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1
 REASONING_MODEL=deepseek/deepseek-r1-0528:free
 COMPLETION_MODEL=deepseek/deepseek-r1-0528:free
+REASONING_MAX_TOKENS=4096
+COMPLETION_MAX_TOKENS=2048
 DEBUG=false
 EOF
 
@@ -61,6 +63,75 @@ EOF
 docker run -d -p 3000:3000 --env-file .env ghcr.io/kiyo-e/claude-code-proxy:latest
 ```
 
+### Cloudflare Workers
+
+```bash
+# Deploy to Cloudflare Workers
+bun run deploy
+
+# Configure environment variables in Workers dashboard
+# Or set them via wrangler CLI:
+npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY
+npx wrangler secret put ANTHROPIC_PROXY_BASE_URL
+```
+
+After deployment, your proxy will be available at `https://your-worker-name.your-subdomain.workers.dev`
+
+#### Using with Claude Code
+
+```bash
+# Set your deployed Worker URL as the base URL
+export ANTHROPIC_BASE_URL=https://your-worker-name.your-subdomain.workers.dev
+
+# Now use Claude Code normally
+claude "Help me review this code"
+claude "Explain this function and suggest improvements"
+```
+
+#### Complete Setup Example
+
+1. **Deploy the proxy:**
+```bash
+git clone https://github.com/kiyo-e/claude-code-proxy
+cd claude-code-proxy
+bun install
+bun run deploy
+```
+
+2. **Set environment variables:**
+```bash
+# For GitHub Models (recommended)
+npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY
+# Enter your GitHub Personal Access Token
+
+# For OpenRouter
+npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY
+# Enter your OpenRouter API key
+npx wrangler secret put ANTHROPIC_PROXY_BASE_URL
+# Enter: https://openrouter.ai/api/v1
+npx wrangler secret put REASONING_MODEL
+# Enter: deepseek/deepseek-r1-0528:free
+npx wrangler secret put COMPLETION_MODEL
+# Enter: deepseek/deepseek-r1-0528:free
+```
+
+3. **Test the deployment:**
+```bash
+curl https://your-worker-name.your-subdomain.workers.dev
+```
+
+4. **Use with Claude Code:**
+```bash
+# Install Claude Code if not already installed
+npm install -g @anthropics/claude-code
+
+# Set the proxy URL
+export ANTHROPIC_BASE_URL=https://your-worker-name.your-subdomain.workers.dev
+
+# Use Claude Code
+claude "Review this TypeScript code and suggest improvements"
+```
+
 ## Development
 
 ### Local Development
@@ -82,12 +153,9 @@ bun run build
 ./bin --help
 ```
 
-### Deploy
+### Build and Publish
 
 ```bash
-# Deploy to Cloudflare Workers
-bun run deploy
-
 # Build and publish npm package
 bun run build
 npm publish
@@ -101,9 +169,35 @@ npm publish
 - `ANTHROPIC_PROXY_BASE_URL` - Upstream API URL (default: https://models.github.ai/inference)
 - `REASONING_MODEL` - Model for reasoning requests (default: openai/gpt-4.1)
 - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1)
+- `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional)
+- `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional)
 - `DEBUG` - Enable debug logging (default: false)
 - `PORT` - Server port for CLI mode (default: 3000)
 
+### Cloudflare Workers Configuration
+
+For Cloudflare Workers deployment, set environment variables using the Workers dashboard or wrangler CLI:
+
+```bash
+# Set secrets (recommended for sensitive data)
+npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY
+npx wrangler secret put ANTHROPIC_PROXY_BASE_URL
+
+# Set regular environment variables
+npx wrangler env put REASONING_MODEL "deepseek/deepseek-r1-0528:free"
+npx wrangler env put COMPLETION_MODEL "deepseek/deepseek-r1-0528:free"
+npx wrangler env put DEBUG "false"
+```
+
+Alternatively, configure via `wrangler.toml`:
+
+```toml
+[env.production.vars]
+REASONING_MODEL = "deepseek/deepseek-r1-0528:free"
+COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free"
+DEBUG = "false"
+```
+
 ### CLI Options
 
 ```bash
@@ -148,6 +242,50 @@ jobs:
           ANTHROPIC_BASE_URL: http://localhost:3000
 ```
 
+## Usage Examples
+
+### Claude Code with Cloudflare Workers
+
+Once you have deployed the proxy to Cloudflare Workers:
+
+```bash
+# Set your Worker URL as the API base
+export ANTHROPIC_BASE_URL=https://claude-proxy.your-subdomain.workers.dev
+
+# Use Claude Code for various tasks
+claude "Review this JavaScript function for potential bugs"
+claude "Generate TypeScript interfaces for this API response"
+claude "Optimize this React component for better performance"
+claude "Explain what this complex regex pattern does"
+
+# Use with specific files
+claude "Check this package.json for security vulnerabilities" package.json
+claude "Suggest improvements for this README" README.md
+```
+
+### Direct API Usage
+
+You can also use the proxy directly with HTTP requests:
+
+```bash
+# Health check
+curl https://claude-proxy.your-subdomain.workers.dev
+
+# Send a message (example)
+curl -X POST https://claude-proxy.your-subdomain.workers.dev/v1/messages \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-3-sonnet-20240229",
+    "max_tokens": 1024,
+    "messages": [
+      {
+        "role": "user",
+        "content": "Hello, Claude!"
+      }
+    ]
+  }'
+```
+
 ## API Endpoints
 
 - `GET /` - Health check and configuration status
diff --git a/src/index.ts b/src/index.ts
index c80d674..b678955 100755
--- a/src/index.ts
+++ b/src/index.ts
@@ -7,6 +7,8 @@ const app = new Hono<{
     CLAUDE_CODE_PROXY_API_KEY?: string
     REASONING_MODEL?: string
     COMPLETION_MODEL?: string
+    REASONING_MAX_TOKENS?: string
+    COMPLETION_MAX_TOKENS?: string
     DEBUG?: string
   }
 }>()
@@ -16,7 +18,7 @@ const defaultModel = 'openai/gpt-4.1'
 
 // Health check endpoint
 app.get('/', (c) => {
-  const { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL} = env(c)
+  const { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS} = env(c)
 
   return c.json({
     status: 'ok',
@@ -24,14 +26,16 @@ app.get('/', (c) => {
     config: {
       ANTHROPIC_PROXY_BASE_URL,
       REASONING_MODEL,
-      COMPLETION_MODEL
+      COMPLETION_MODEL,
+      REASONING_MAX_TOKENS,
+      COMPLETION_MAX_TOKENS
     }
   })
 })
 
 app.post('/v1/messages', async (c) => {
   // Get environment variables from context
-  const { CLAUDE_CODE_PROXY_API_KEY, ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, DEBUG } = env(c)
+  const { CLAUDE_CODE_PROXY_API_KEY, ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS, DEBUG } = env(c)
 
   try {
     const baseUrl = ANTHROPIC_PROXY_BASE_URL || 'https://models.github.ai/inference'
@@ -160,6 +164,17 @@ app.post('/v1/messages', async (c) => {
       temperature: payload.temperature !== undefined ? payload.temperature : 1,
       stream: payload.stream === true,
     }
+
+    // Apply max_tokens override if configured
+    const selectedModel = payload.thinking ? models.reasoning : models.completion
+    const reasoningMaxTokens = REASONING_MAX_TOKENS ? parseInt(REASONING_MAX_TOKENS) : undefined
+    const completionMaxTokens = COMPLETION_MAX_TOKENS ? parseInt(COMPLETION_MAX_TOKENS) : undefined
+    
+    if (selectedModel === models.reasoning && reasoningMaxTokens) {
+      openaiPayload.max_tokens = reasoningMaxTokens
+    } else if (selectedModel === models.completion && completionMaxTokens) {
+      openaiPayload.max_tokens = completionMaxTokens
+    }
     if (tools.length > 0) openaiPayload.tools = tools
     debug('OpenAI payload:', openaiPayload)