From 97fa164bb2f663e1138f7d462c0ca89df5d236e9 Mon Sep 17 00:00:00 2001 From: kiyo-e Date: Tue, 3 Jun 2025 23:54:26 +0900 Subject: [PATCH] fix: add max tokens configuration for reasoning and completion models --- CLAUDE.md | 2 + README.md | 146 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/index.ts | 21 ++++++-- 3 files changed, 162 insertions(+), 7 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index e930e29..cad6001 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -57,6 +57,8 @@ Configure via `wrangler.toml` or environment: - `ANTHROPIC_PROXY_BASE_URL` - Upstream API URL (default: https://models.github.ai/inference) - `REASONING_MODEL` - Model for reasoning requests (default: openai/gpt-4.1) - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1) +- `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional) +- `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional) - `DEBUG` - Enable debug logging (default: false) - `PORT` - Server port for Node.js mode (default: 3000) diff --git a/README.md b/README.md index ee64839..e26d859 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ CLAUDE_CODE_PROXY_API_KEY=your_api_key ANTHROPIC_PROXY_BASE_URL=https://openrouter.ai/api/v1 REASONING_MODEL=deepseek/deepseek-r1-0528:free COMPLETION_MODEL=deepseek/deepseek-r1-0528:free +REASONING_MAX_TOKENS=4096 +COMPLETION_MAX_TOKENS=2048 DEBUG=false EOF @@ -61,6 +63,75 @@ EOF docker run -d -p 3000:3000 --env-file .env ghcr.io/kiyo-e/claude-code-proxy:latest ``` +### Cloudflare Workers + +```bash +# Deploy to Cloudflare Workers +bun run deploy + +# Configure environment variables in Workers dashboard +# Or set them via wrangler CLI: +npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY +npx wrangler secret put ANTHROPIC_PROXY_BASE_URL +``` + +After deployment, your proxy will be available at `https://your-worker-name.your-subdomain.workers.dev` + +#### Using with Claude Code + +```bash +# Set your deployed Worker URL as the base URL +export ANTHROPIC_BASE_URL=https://your-worker-name.your-subdomain.workers.dev + +# Now use Claude Code normally +claude "Help me review this code" +claude "Explain this function and suggest improvements" +``` + +#### Complete Setup Example + +1. **Deploy the proxy:** +```bash +git clone https://github.com/kiyo-e/claude-code-proxy +cd claude-code-proxy +bun install +bun run deploy +``` + +2. **Set environment variables:** +```bash +# For GitHub Models (recommended) +npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY +# Enter your GitHub Personal Access Token + +# For OpenRouter +npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY +# Enter your OpenRouter API key +npx wrangler secret put ANTHROPIC_PROXY_BASE_URL +# Enter: https://openrouter.ai/api/v1 +npx wrangler secret put REASONING_MODEL +# Enter: deepseek/deepseek-r1-0528:free +npx wrangler secret put COMPLETION_MODEL +# Enter: deepseek/deepseek-r1-0528:free +``` + +3. **Test the deployment:** +```bash +curl https://your-worker-name.your-subdomain.workers.dev +``` + +4. **Use with Claude Code:** +```bash +# Install Claude Code if not already installed +npm install -g @anthropics/claude-code + +# Set the proxy URL +export ANTHROPIC_BASE_URL=https://your-worker-name.your-subdomain.workers.dev + +# Use Claude Code +claude "Review this TypeScript code and suggest improvements" +``` + ## Development ### Local Development @@ -82,12 +153,9 @@ bun run build ./bin --help ``` -### Deploy +### Build and Publish ```bash -# Deploy to Cloudflare Workers -bun run deploy - # Build and publish npm package bun run build npm publish @@ -101,9 +169,35 @@ npm publish - `ANTHROPIC_PROXY_BASE_URL` - Upstream API URL (default: https://models.github.ai/inference) - `REASONING_MODEL` - Model for reasoning requests (default: openai/gpt-4.1) - `COMPLETION_MODEL` - Model for completion requests (default: openai/gpt-4.1) +- `REASONING_MAX_TOKENS` - Max tokens for reasoning model (optional) +- `COMPLETION_MAX_TOKENS` - Max tokens for completion model (optional) - `DEBUG` - Enable debug logging (default: false) - `PORT` - Server port for CLI mode (default: 3000) +### Cloudflare Workers Configuration + +For Cloudflare Workers deployment, set environment variables using the Workers dashboard or wrangler CLI: + +```bash +# Set secrets (recommended for sensitive data) +npx wrangler secret put CLAUDE_CODE_PROXY_API_KEY +npx wrangler secret put ANTHROPIC_PROXY_BASE_URL + +# Set regular environment variables +npx wrangler env put REASONING_MODEL "deepseek/deepseek-r1-0528:free" +npx wrangler env put COMPLETION_MODEL "deepseek/deepseek-r1-0528:free" +npx wrangler env put DEBUG "false" +``` + +Alternatively, configure via `wrangler.toml`: + +```toml +[env.production.vars] +REASONING_MODEL = "deepseek/deepseek-r1-0528:free" +COMPLETION_MODEL = "deepseek/deepseek-r1-0528:free" +DEBUG = "false" +``` + ### CLI Options ```bash @@ -148,6 +242,50 @@ jobs: ANTHROPIC_BASE_URL: http://localhost:3000 ``` +## Usage Examples + +### Claude Code with Cloudflare Workers + +Once you have deployed the proxy to Cloudflare Workers: + +```bash +# Set your Worker URL as the API base +export ANTHROPIC_BASE_URL=https://claude-proxy.your-subdomain.workers.dev + +# Use Claude Code for various tasks +claude "Review this JavaScript function for potential bugs" +claude "Generate TypeScript interfaces for this API response" +claude "Optimize this React component for better performance" +claude "Explain what this complex regex pattern does" + +# Use with specific files +claude "Check this package.json for security vulnerabilities" package.json +claude "Suggest improvements for this README" README.md +``` + +### Direct API Usage + +You can also use the proxy directly with HTTP requests: + +```bash +# Health check +curl https://claude-proxy.your-subdomain.workers.dev + +# Send a message (example) +curl -X POST https://claude-proxy.your-subdomain.workers.dev/v1/messages \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-3-sonnet-20240229", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "Hello, Claude!" + } + ] + }' +``` + ## API Endpoints - `GET /` - Health check and configuration status diff --git a/src/index.ts b/src/index.ts index c80d674..b678955 100755 --- a/src/index.ts +++ b/src/index.ts @@ -7,6 +7,8 @@ const app = new Hono<{ CLAUDE_CODE_PROXY_API_KEY?: string REASONING_MODEL?: string COMPLETION_MODEL?: string + REASONING_MAX_TOKENS?: string + COMPLETION_MAX_TOKENS?: string DEBUG?: string } }>() @@ -16,7 +18,7 @@ const defaultModel = 'openai/gpt-4.1' // Health check endpoint app.get('/', (c) => { - const { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL} = env(c) + const { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS} = env(c) return c.json({ status: 'ok', @@ -24,14 +26,16 @@ app.get('/', (c) => { config: { ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, - COMPLETION_MODEL + COMPLETION_MODEL, + REASONING_MAX_TOKENS, + COMPLETION_MAX_TOKENS } }) }) app.post('/v1/messages', async (c) => { // Get environment variables from context - const { CLAUDE_CODE_PROXY_API_KEY, ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, DEBUG } = env(c) + const { CLAUDE_CODE_PROXY_API_KEY, ANTHROPIC_PROXY_BASE_URL, REASONING_MODEL, COMPLETION_MODEL, REASONING_MAX_TOKENS, COMPLETION_MAX_TOKENS, DEBUG } = env(c) try { const baseUrl = ANTHROPIC_PROXY_BASE_URL || 'https://models.github.ai/inference' @@ -160,6 +164,17 @@ app.post('/v1/messages', async (c) => { temperature: payload.temperature !== undefined ? payload.temperature : 1, stream: payload.stream === true, } + + // Apply max_tokens override if configured + const selectedModel = payload.thinking ? models.reasoning : models.completion + const reasoningMaxTokens = REASONING_MAX_TOKENS ? parseInt(REASONING_MAX_TOKENS) : undefined + const completionMaxTokens = COMPLETION_MAX_TOKENS ? parseInt(COMPLETION_MAX_TOKENS) : undefined + + if (selectedModel === models.reasoning && reasoningMaxTokens) { + openaiPayload.max_tokens = reasoningMaxTokens + } else if (selectedModel === models.completion && completionMaxTokens) { + openaiPayload.max_tokens = completionMaxTokens + } if (tools.length > 0) openaiPayload.tools = tools debug('OpenAI payload:', openaiPayload)