From 96b8587c5bdee686ea25b3b377a271ec784fa181 Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Tue, 7 Apr 2026 08:27:03 +0800 Subject: [PATCH 1/4] Fix token usage propagation and remove stale env docs --- README.MD | 1 - README.en.md | 1 - internal/adapter/openai/prompt_build_test.go | 9 +- internal/adapter/openai/responses_handler.go | 13 ++- .../openai/responses_stream_runtime_core.go | 18 +++- internal/adapter/openai/stream_status_test.go | 94 +++++++++++++++++++ tests/node/chat-stream.test.js | 22 ++++- 7 files changed, 143 insertions(+), 15 deletions(-) diff --git a/README.MD b/README.MD index 6e5411d4..5d2ee4e1 100644 --- a/README.MD +++ b/README.MD @@ -344,7 +344,6 @@ cp opencode.json.example opencode.json | `DS2API_CONFIG_PATH` | 配置文件路径 | `config.json` | | `DS2API_CONFIG_JSON` | 直接注入配置(JSON 或 Base64) | — | | `DS2API_ENV_WRITEBACK` | 环境变量模式下自动写回配置文件并切换文件模式(`1/true/yes/on`) | 关闭 | -| `DS2API_POW_CONCURRENCY` | PoW 并行计算协程数(可选) | 默认 CPU 核心数 | | `DS2API_STATIC_ADMIN_DIR` | 管理台静态文件目录 | `static/admin` | | `DS2API_AUTO_BUILD_WEBUI` | 启动时自动构建 WebUI | 本地开启,Vercel 关闭 | | `DS2API_DEV_PACKET_CAPTURE` | 本地开发抓包开关(记录最近会话请求/响应体) | 本地非 Vercel 默认开启 | diff --git a/README.en.md b/README.en.md index df764ab3..6753bc02 100644 --- a/README.en.md +++ b/README.en.md @@ -344,7 +344,6 @@ cp opencode.json.example opencode.json | `DS2API_CONFIG_PATH` | Config file path | `config.json` | | `DS2API_CONFIG_JSON` | Inline config (JSON or Base64) | — | | `DS2API_ENV_WRITEBACK` | Auto-write env-backed config to file and transition to file mode (`1/true/yes/on`) | Disabled | -| `DS2API_POW_CONCURRENCY` | PoW parallel solver goroutine count (optional) | Default CPU core count | | `DS2API_STATIC_ADMIN_DIR` | Admin static assets dir | `static/admin` | | `DS2API_AUTO_BUILD_WEBUI` | Auto-build WebUI on startup | Enabled locally, disabled on Vercel | | `DS2API_ACCOUNT_MAX_INFLIGHT` | Max in-flight requests per account | `2` | diff --git a/internal/adapter/openai/prompt_build_test.go b/internal/adapter/openai/prompt_build_test.go index 223689b9..390cbd48 100644 --- a/internal/adapter/openai/prompt_build_test.go +++ b/internal/adapter/openai/prompt_build_test.go @@ -74,16 +74,13 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t * } finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "") - if !strings.Contains(finalPrompt, "After receiving a tool result, use it directly.") { - t.Fatalf("vercel prepare finalPrompt missing final-answer instruction: %q", finalPrompt) - } - if !strings.Contains(finalPrompt, "Only call another tool if the result is insufficient.") { - t.Fatalf("vercel prepare finalPrompt missing retry guard instruction: %q", finalPrompt) + if !strings.Contains(finalPrompt, "Remember: Output ONLY the ... XML block when calling tools.") { + t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt) } if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") { t.Fatalf("vercel prepare finalPrompt missing xml format instruction: %q", finalPrompt) } - if !strings.Contains(finalPrompt, "Do NOT wrap the XML in markdown code fences") { + if !strings.Contains(finalPrompt, "Do NOT wrap XML in markdown fences") { t.Fatalf("vercel prepare finalPrompt missing no-fence xml instruction: %q", finalPrompt) } if strings.Contains(finalPrompt, "```json") { diff --git a/internal/adapter/openai/responses_handler.go b/internal/adapter/openai/responses_handler.go index 7cb7ec30..1bd5b1c2 100644 --- a/internal/adapter/openai/responses_handler.go +++ b/internal/adapter/openai/responses_handler.go @@ -130,12 +130,17 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res } responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, toolNames) - if result.OutputTokens > 0 { + if result.PromptTokens > 0 || result.OutputTokens > 0 { if usage, ok := responseObj["usage"].(map[string]any); ok { - usage["output_tokens"] = result.OutputTokens - if input, ok := usage["input_tokens"].(int); ok { - usage["total_tokens"] = input + result.OutputTokens + if result.PromptTokens > 0 { + usage["input_tokens"] = result.PromptTokens } + if result.OutputTokens > 0 { + usage["output_tokens"] = result.OutputTokens + } + input, _ := usage["input_tokens"].(int) + output, _ := usage["output_tokens"].(int) + usage["total_tokens"] = input + output } } h.getResponseStore().put(owner, responseID, responseObj) diff --git a/internal/adapter/openai/responses_stream_runtime_core.go b/internal/adapter/openai/responses_stream_runtime_core.go index 8072ccbb..ff9ea269 100644 --- a/internal/adapter/openai/responses_stream_runtime_core.go +++ b/internal/adapter/openai/responses_stream_runtime_core.go @@ -51,6 +51,7 @@ type responsesStreamRuntime struct { messagePartAdded bool sequence int failed bool + promptTokens int outputTokens int persistResponse func(obj map[string]any) @@ -152,9 +153,19 @@ func (s *responsesStreamRuntime) finalize() { if s.outputTokens > 0 { if usage, ok := obj["usage"].(map[string]any); ok { usage["output_tokens"] = s.outputTokens - if input, ok := usage["input_tokens"].(int); ok { - usage["total_tokens"] = input + s.outputTokens + } + } + if s.promptTokens > 0 || s.outputTokens > 0 { + if usage, ok := obj["usage"].(map[string]any); ok { + if s.promptTokens > 0 { + usage["input_tokens"] = s.promptTokens } + if s.outputTokens > 0 { + usage["output_tokens"] = s.outputTokens + } + input, _ := usage["input_tokens"].(int) + output, _ := usage["output_tokens"].(int) + usage["total_tokens"] = input + output } } if s.persistResponse != nil { @@ -185,6 +196,9 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa if !parsed.Parsed { return streamengine.ParsedDecision{} } + if parsed.PromptTokens > 0 { + s.promptTokens = parsed.PromptTokens + } if parsed.OutputTokens > 0 { s.outputTokens = parsed.OutputTokens } diff --git a/internal/adapter/openai/stream_status_test.go b/internal/adapter/openai/stream_status_test.go index 6352141e..1601a7ce 100644 --- a/internal/adapter/openai/stream_status_test.go +++ b/internal/adapter/openai/stream_status_test.go @@ -238,3 +238,97 @@ func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T t.Fatalf("expected finish_reason=stop for content-filter upstream stop, got %#v", choice["finish_reason"]) } } + +func TestResponsesStreamUsageOverridesFromBatchAccumulatedTokenUsage(t *testing.T) { + statuses := make([]int, 0, 1) + h := &Handler{ + Store: mockOpenAIConfig{wideInput: true}, + Auth: streamStatusAuthStub{}, + DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse( + `data: {"p":"response/content","v":"hello"}`, + `data: {"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":190},{"p":"quasi_status","v":"FINISHED"}]}`, + )}, + } + r := chi.NewRouter() + r.Use(captureStatusMiddleware(&statuses)) + RegisterRoutes(r, h) + + reqBody := `{"model":"deepseek-chat","input":"hi","stream":true}` + req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody)) + req.Header.Set("Authorization", "Bearer direct-token") + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if len(statuses) != 1 || statuses[0] != http.StatusOK { + t.Fatalf("expected captured status 200, got %#v", statuses) + } + frames, done := parseSSEDataFrames(t, rec.Body.String()) + if !done { + t.Fatalf("expected [DONE], body=%s", rec.Body.String()) + } + if len(frames) == 0 { + t.Fatalf("expected at least one json frame, body=%s", rec.Body.String()) + } + last := frames[len(frames)-1] + resp, _ := last["response"].(map[string]any) + if resp == nil { + t.Fatalf("expected response payload in final frame, got %#v", last) + } + usage, _ := resp["usage"].(map[string]any) + if usage == nil { + t.Fatalf("expected usage in response payload, got %#v", resp) + } + if got, _ := usage["output_tokens"].(float64); int(got) != 190 { + t.Fatalf("expected output_tokens=190, got %#v", usage["output_tokens"]) + } +} + +func TestResponsesNonStreamUsageOverridesPromptAndOutputTokenUsage(t *testing.T) { + statuses := make([]int, 0, 1) + h := &Handler{ + Store: mockOpenAIConfig{wideInput: true}, + Auth: streamStatusAuthStub{}, + DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse( + `data: {"p":"response/content","v":"ok"}`, + `data: {"p":"response","o":"BATCH","v":[{"p":"token_usage","v":{"prompt_tokens":11,"completion_tokens":29}},{"p":"quasi_status","v":"FINISHED"}]}`, + )}, + } + r := chi.NewRouter() + r.Use(captureStatusMiddleware(&statuses)) + RegisterRoutes(r, h) + + reqBody := `{"model":"deepseek-chat","input":"hi","stream":false}` + req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody)) + req.Header.Set("Authorization", "Bearer direct-token") + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if len(statuses) != 1 || statuses[0] != http.StatusOK { + t.Fatalf("expected captured status 200, got %#v", statuses) + } + var out map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil { + t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String()) + } + usage, _ := out["usage"].(map[string]any) + if usage == nil { + t.Fatalf("expected usage object, got %#v", out) + } + if got, _ := usage["input_tokens"].(float64); int(got) != 11 { + t.Fatalf("expected input_tokens=11, got %#v", usage["input_tokens"]) + } + if got, _ := usage["output_tokens"].(float64); int(got) != 29 { + t.Fatalf("expected output_tokens=29, got %#v", usage["output_tokens"]) + } + if got, _ := usage["total_tokens"].(float64); int(got) != 40 { + t.Fatalf("expected total_tokens=40, got %#v", usage["total_tokens"]) + } +} diff --git a/tests/node/chat-stream.test.js b/tests/node/chat-stream.test.js index 96818430..d1cc8595 100644 --- a/tests/node/chat-stream.test.js +++ b/tests/node/chat-stream.test.js @@ -275,7 +275,7 @@ test('parseChunkForContent keeps error branches distinct from content_filter sta assert.equal(parsed.finished, true); assert.equal(parsed.contentFilter, false); assert.equal(parsed.errorMessage.length > 0, true); - assert.equal(parsed.outputTokens, 0); + assert.equal(parsed.outputTokens, 88); assert.deepEqual(parsed.parts, []); }); @@ -292,6 +292,26 @@ test('parseChunkForContent preserves output tokens on FINISHED lines', () => { assert.deepEqual(parsed.parts, []); }); +test('parseChunkForContent captures output tokens from response BATCH status snapshots', () => { + const parsed = parseChunkForContent( + { + p: 'response', + o: 'BATCH', + v: [ + { p: 'accumulated_token_usage', v: 190 }, + { p: 'quasi_status', v: 'FINISHED' }, + ], + }, + false, + 'text', + ); + assert.equal(parsed.parsed, true); + assert.equal(parsed.finished, false); + assert.equal(parsed.contentFilter, false); + assert.equal(parsed.outputTokens, 190); + assert.deepEqual(parsed.parts, []); +}); + test('parseChunkForContent matches FINISHED case-insensitively on status paths', () => { const parsed = parseChunkForContent( { p: 'response/status', v: ' finished ', accumulated_token_usage: 190 }, From 5bcea3d727c5f70cb290be10e9722edbb62bf86c Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Tue, 7 Apr 2026 10:16:00 +0800 Subject: [PATCH 2/4] Propagate upstream token usage across Gemini usage metadata --- API.en.md | 2 ++ API.md | 2 ++ internal/adapter/gemini/handler_generate.go | 11 +++++--- .../adapter/gemini/handler_stream_runtime.go | 6 ++++- internal/adapter/gemini/handler_test.go | 26 +++++++++++++++++++ 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/API.en.md b/API.en.md index 7276c867..2b83245a 100644 --- a/API.en.md +++ b/API.en.md @@ -267,6 +267,7 @@ data: [DONE] - `deepseek-reasoner` / `deepseek-reasoner-search` models emit `delta.reasoning_content` - Text emits `delta.content` - Last chunk includes `finish_reason` and `usage` +- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent #### Tool Calls @@ -535,6 +536,7 @@ Returns SSE (`text/event-stream`), each chunk as `data: `: - regular text: incremental text chunks - `tools` mode: buffered and emitted as `functionCall` at finalize phase - final chunk: includes `finishReason: "STOP"` and `usageMetadata` +- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent --- diff --git a/API.md b/API.md index 85520524..1caa9845 100644 --- a/API.md +++ b/API.md @@ -267,6 +267,7 @@ data: [DONE] - `deepseek-reasoner` / `deepseek-reasoner-search` 模型输出 `delta.reasoning_content` - 普通文本输出 `delta.content` - 最后一段包含 `finish_reason` 和 `usage` +- token 计数优先透传上游 DeepSeek SSE(如 `accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算 #### Tool Calls @@ -541,6 +542,7 @@ data: {"type":"message_stop"} - 常规文本:持续返回增量文本 chunk - `tools` 场景:会缓冲并在结束时输出 `functionCall` 结构 - 结束 chunk:包含 `finishReason: "STOP"` 与 `usageMetadata` +- token 计数优先透传上游 DeepSeek SSE(如 `accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算 --- diff --git a/internal/adapter/gemini/handler_generate.go b/internal/adapter/gemini/handler_generate.go index b03b3ead..56cc0e63 100644 --- a/internal/adapter/gemini/handler_generate.go +++ b/internal/adapter/gemini/handler_generate.go @@ -149,14 +149,15 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht cleanVisibleOutput(result.Thinking, stripReferenceMarkers), cleanVisibleOutput(result.Text, stripReferenceMarkers), toolNames, + result.PromptTokens, result.OutputTokens, )) } //nolint:unused // retained for native Gemini non-stream handling path. -func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any { +func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, promptTokens, outputTokens int) map[string]any { parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames) - usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens) + usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, promptTokens, outputTokens) return map[string]any{ "candidates": []map[string]any{ { @@ -174,8 +175,10 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final } //nolint:unused // retained for native Gemini non-stream handling path. -func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any { - promptTokens := util.EstimateTokens(finalPrompt) +func buildGeminiUsage(finalPrompt, finalThinking, finalText string, promptTokens, outputTokens int) map[string]any { + if promptTokens <= 0 { + promptTokens = util.EstimateTokens(finalPrompt) + } reasoningTokens := util.EstimateTokens(finalThinking) completionTokens := util.EstimateTokens(finalText) if outputTokens > 0 { diff --git a/internal/adapter/gemini/handler_stream_runtime.go b/internal/adapter/gemini/handler_stream_runtime.go index e7c9b87c..b8d2701a 100644 --- a/internal/adapter/gemini/handler_stream_runtime.go +++ b/internal/adapter/gemini/handler_stream_runtime.go @@ -67,6 +67,7 @@ type geminiStreamRuntime struct { thinking strings.Builder text strings.Builder + promptTokens int outputTokens int } @@ -112,6 +113,9 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse if !parsed.Parsed { return streamengine.ParsedDecision{} } + if parsed.PromptTokens > 0 { + s.promptTokens = parsed.PromptTokens + } if parsed.OutputTokens > 0 { s.outputTokens = parsed.OutputTokens } @@ -198,6 +202,6 @@ func (s *geminiStreamRuntime) finalize() { }, }, "modelVersion": s.model, - "usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens), + "usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.promptTokens, s.outputTokens), }) } diff --git a/internal/adapter/gemini/handler_test.go b/internal/adapter/gemini/handler_test.go index b7aea1bb..aa3ae465 100644 --- a/internal/adapter/gemini/handler_test.go +++ b/internal/adapter/gemini/handler_test.go @@ -296,6 +296,32 @@ func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) { } } +func TestBuildGeminiUsageOverridesPromptAndOutputTokensWhenProvided(t *testing.T) { + usage := buildGeminiUsage("prompt", "thinking", "answer", 11, 29) + if got, _ := usage["promptTokenCount"].(int); got != 11 { + t.Fatalf("expected promptTokenCount=11, got %#v", usage["promptTokenCount"]) + } + if got, _ := usage["candidatesTokenCount"].(int); got != 29 { + t.Fatalf("expected candidatesTokenCount=29, got %#v", usage["candidatesTokenCount"]) + } + if got, _ := usage["totalTokenCount"].(int); got != 40 { + t.Fatalf("expected totalTokenCount=40, got %#v", usage["totalTokenCount"]) + } +} + +func TestBuildGeminiUsageFallsBackToEstimateWhenNoUpstreamUsage(t *testing.T) { + usage := buildGeminiUsage("abcdef", "", "ghijkl", 0, 0) + if got, _ := usage["promptTokenCount"].(int); got <= 0 { + t.Fatalf("expected positive promptTokenCount estimate, got %#v", usage["promptTokenCount"]) + } + if got, _ := usage["candidatesTokenCount"].(int); got <= 0 { + t.Fatalf("expected positive candidatesTokenCount estimate, got %#v", usage["candidatesTokenCount"]) + } + if got, _ := usage["totalTokenCount"].(int); got <= 0 { + t.Fatalf("expected positive totalTokenCount estimate, got %#v", usage["totalTokenCount"]) + } +} + func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any { t.Helper() scanner := bufio.NewScanner(strings.NewReader(body)) From 668b9c26bdbf01772d19f81bd160cc6df2019b26 Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Tue, 7 Apr 2026 10:16:23 +0800 Subject: [PATCH 3/4] Unify token usage pass-through on OpenAI translate pipeline --- API.en.md | 2 + API.md | 2 + internal/adapter/gemini/handler_generate.go | 11 +-- .../adapter/gemini/handler_stream_runtime.go | 6 +- internal/adapter/gemini/handler_test.go | 26 ----- internal/translatorcliproxy/bridge_test.go | 20 ++++ internal/translatorcliproxy/stream_writer.go | 97 +++++++++++++++++++ .../translatorcliproxy/stream_writer_test.go | 8 ++ 8 files changed, 134 insertions(+), 38 deletions(-) diff --git a/API.en.md b/API.en.md index 2b83245a..0238a428 100644 --- a/API.en.md +++ b/API.en.md @@ -384,6 +384,7 @@ Business auth required. Returns OpenAI-compatible embeddings shape. ## Claude-Compatible API Besides `/anthropic/v1/*`, DS2API also supports shortcut paths: `/v1/messages`, `/messages`, `/v1/messages/count_tokens`, `/messages/count_tokens`. +Implementation-wise this path is unified on the OpenAI Chat Completions parse-and-translate pipeline to avoid maintaining divergent parsing chains. ### `GET /anthropic/v1/models` @@ -518,6 +519,7 @@ Supported paths: - `/v1/models/{model}:streamGenerateContent` (compat path) Authentication is the same as other business routes (`Authorization: Bearer ` or `x-api-key`). +Implementation-wise this path is unified on the OpenAI Chat Completions parse-and-translate pipeline to avoid maintaining divergent parsing chains. ### `POST /v1beta/models/{model}:generateContent` diff --git a/API.md b/API.md index 1caa9845..d2eb1f04 100644 --- a/API.md +++ b/API.md @@ -390,6 +390,7 @@ data: [DONE] ## Claude 兼容接口 除标准路径 `/anthropic/v1/*` 外,还支持快捷路径 `/v1/messages`、`/messages`、`/v1/messages/count_tokens`、`/messages/count_tokens`。 +实现上统一走 OpenAI Chat Completions 解析与回译链路,避免多套解析逻辑分叉维护。 ### `GET /anthropic/v1/models` @@ -524,6 +525,7 @@ data: {"type":"message_stop"} - `/v1/models/{model}:streamGenerateContent`(兼容路径) 鉴权方式同业务接口(`Authorization: Bearer ` 或 `x-api-key`)。 +实现上统一走 OpenAI Chat Completions 解析与回译链路,避免多套解析逻辑分叉维护。 ### `POST /v1beta/models/{model}:generateContent` diff --git a/internal/adapter/gemini/handler_generate.go b/internal/adapter/gemini/handler_generate.go index 56cc0e63..b03b3ead 100644 --- a/internal/adapter/gemini/handler_generate.go +++ b/internal/adapter/gemini/handler_generate.go @@ -149,15 +149,14 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht cleanVisibleOutput(result.Thinking, stripReferenceMarkers), cleanVisibleOutput(result.Text, stripReferenceMarkers), toolNames, - result.PromptTokens, result.OutputTokens, )) } //nolint:unused // retained for native Gemini non-stream handling path. -func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, promptTokens, outputTokens int) map[string]any { +func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any { parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames) - usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, promptTokens, outputTokens) + usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens) return map[string]any{ "candidates": []map[string]any{ { @@ -175,10 +174,8 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final } //nolint:unused // retained for native Gemini non-stream handling path. -func buildGeminiUsage(finalPrompt, finalThinking, finalText string, promptTokens, outputTokens int) map[string]any { - if promptTokens <= 0 { - promptTokens = util.EstimateTokens(finalPrompt) - } +func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any { + promptTokens := util.EstimateTokens(finalPrompt) reasoningTokens := util.EstimateTokens(finalThinking) completionTokens := util.EstimateTokens(finalText) if outputTokens > 0 { diff --git a/internal/adapter/gemini/handler_stream_runtime.go b/internal/adapter/gemini/handler_stream_runtime.go index b8d2701a..e7c9b87c 100644 --- a/internal/adapter/gemini/handler_stream_runtime.go +++ b/internal/adapter/gemini/handler_stream_runtime.go @@ -67,7 +67,6 @@ type geminiStreamRuntime struct { thinking strings.Builder text strings.Builder - promptTokens int outputTokens int } @@ -113,9 +112,6 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse if !parsed.Parsed { return streamengine.ParsedDecision{} } - if parsed.PromptTokens > 0 { - s.promptTokens = parsed.PromptTokens - } if parsed.OutputTokens > 0 { s.outputTokens = parsed.OutputTokens } @@ -202,6 +198,6 @@ func (s *geminiStreamRuntime) finalize() { }, }, "modelVersion": s.model, - "usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.promptTokens, s.outputTokens), + "usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens), }) } diff --git a/internal/adapter/gemini/handler_test.go b/internal/adapter/gemini/handler_test.go index aa3ae465..b7aea1bb 100644 --- a/internal/adapter/gemini/handler_test.go +++ b/internal/adapter/gemini/handler_test.go @@ -296,32 +296,6 @@ func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) { } } -func TestBuildGeminiUsageOverridesPromptAndOutputTokensWhenProvided(t *testing.T) { - usage := buildGeminiUsage("prompt", "thinking", "answer", 11, 29) - if got, _ := usage["promptTokenCount"].(int); got != 11 { - t.Fatalf("expected promptTokenCount=11, got %#v", usage["promptTokenCount"]) - } - if got, _ := usage["candidatesTokenCount"].(int); got != 29 { - t.Fatalf("expected candidatesTokenCount=29, got %#v", usage["candidatesTokenCount"]) - } - if got, _ := usage["totalTokenCount"].(int); got != 40 { - t.Fatalf("expected totalTokenCount=40, got %#v", usage["totalTokenCount"]) - } -} - -func TestBuildGeminiUsageFallsBackToEstimateWhenNoUpstreamUsage(t *testing.T) { - usage := buildGeminiUsage("abcdef", "", "ghijkl", 0, 0) - if got, _ := usage["promptTokenCount"].(int); got <= 0 { - t.Fatalf("expected positive promptTokenCount estimate, got %#v", usage["promptTokenCount"]) - } - if got, _ := usage["candidatesTokenCount"].(int); got <= 0 { - t.Fatalf("expected positive candidatesTokenCount estimate, got %#v", usage["candidatesTokenCount"]) - } - if got, _ := usage["totalTokenCount"].(int); got <= 0 { - t.Fatalf("expected positive totalTokenCount estimate, got %#v", usage["totalTokenCount"]) - } -} - func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any { t.Helper() scanner := bufio.NewScanner(strings.NewReader(body)) diff --git a/internal/translatorcliproxy/bridge_test.go b/internal/translatorcliproxy/bridge_test.go index 5f0979f1..cdd9cf70 100644 --- a/internal/translatorcliproxy/bridge_test.go +++ b/internal/translatorcliproxy/bridge_test.go @@ -26,6 +26,26 @@ func TestFromOpenAINonStreamClaude(t *testing.T) { } } +func TestFromOpenAINonStreamClaudePreservesUsageFromOpenAI(t *testing.T) { + original := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`) + translatedReq := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`) + openaibody := []byte(`{"id":"chatcmpl_1","object":"chat.completion","created":1,"model":"claude-sonnet-4-5","choices":[{"index":0,"message":{"role":"assistant","content":"hello"},"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":29,"total_tokens":40}}`) + got := string(FromOpenAINonStream(sdktranslator.FormatClaude, "claude-sonnet-4-5", original, translatedReq, openaibody)) + if !strings.Contains(got, `"input_tokens":11`) || !strings.Contains(got, `"output_tokens":29`) { + t.Fatalf("expected claude usage to preserve prompt/completion tokens, got: %s", got) + } +} + +func TestFromOpenAINonStreamGeminiPreservesUsageFromOpenAI(t *testing.T) { + original := []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`) + translatedReq := []byte(`{"model":"gemini-2.5-pro","messages":[{"role":"user","content":"hi"}],"stream":false}`) + openaibody := []byte(`{"id":"chatcmpl_1","object":"chat.completion","created":1,"model":"gemini-2.5-pro","choices":[{"index":0,"message":{"role":"assistant","content":"hello"},"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":29,"total_tokens":40}}`) + got := string(FromOpenAINonStream(sdktranslator.FormatGemini, "gemini-2.5-pro", original, translatedReq, openaibody)) + if !strings.Contains(got, `"promptTokenCount":11`) || !strings.Contains(got, `"candidatesTokenCount":29`) || !strings.Contains(got, `"totalTokenCount":40`) { + t.Fatalf("expected gemini usageMetadata to preserve prompt/completion tokens, got: %s", got) + } +} + func TestParseFormatAliases(t *testing.T) { cases := map[string]sdktranslator.Format{ "responses": sdktranslator.FormatOpenAIResponse, diff --git a/internal/translatorcliproxy/stream_writer.go b/internal/translatorcliproxy/stream_writer.go index 07c4bcb9..b1b87478 100644 --- a/internal/translatorcliproxy/stream_writer.go +++ b/internal/translatorcliproxy/stream_writer.go @@ -3,7 +3,9 @@ package translatorcliproxy import ( "bytes" "context" + "encoding/json" "net/http" + "strings" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" ) @@ -77,7 +79,13 @@ func (w *OpenAIStreamTranslatorWriter) Write(p []byte) (int, error) { if !bytes.HasPrefix(trimmed, []byte("data:")) { continue } + usage, hasUsage := extractOpenAIUsage(trimmed) chunks := sdktranslator.TranslateStream(context.Background(), sdktranslator.FormatOpenAI, w.target, w.model, w.originalReq, w.translatedReq, trimmed, &w.param) + if hasUsage { + for i := range chunks { + chunks[i] = injectStreamUsageMetadata(chunks[i], w.target, usage) + } + } for i := range chunks { if len(chunks[i]) == 0 { continue @@ -118,3 +126,92 @@ func (w *OpenAIStreamTranslatorWriter) readOneLine() ([]byte, bool) { w.lineBuf.Next(idx + 1) return line, true } + +type openAIUsage struct { + PromptTokens int + CompletionTokens int + TotalTokens int +} + +func extractOpenAIUsage(line []byte) (openAIUsage, bool) { + raw := strings.TrimSpace(strings.TrimPrefix(string(line), "data:")) + if raw == "" || raw == "[DONE]" { + return openAIUsage{}, false + } + var payload map[string]any + if err := json.Unmarshal([]byte(raw), &payload); err != nil { + return openAIUsage{}, false + } + usageObj, _ := payload["usage"].(map[string]any) + if usageObj == nil { + return openAIUsage{}, false + } + p := toInt(usageObj["prompt_tokens"]) + c := toInt(usageObj["completion_tokens"]) + t := toInt(usageObj["total_tokens"]) + if p <= 0 && c <= 0 && t <= 0 { + return openAIUsage{}, false + } + if t <= 0 { + t = p + c + } + return openAIUsage{PromptTokens: p, CompletionTokens: c, TotalTokens: t}, true +} + +func injectStreamUsageMetadata(chunk []byte, target sdktranslator.Format, usage openAIUsage) []byte { + if target != sdktranslator.FormatGemini { + return chunk + } + text := strings.TrimSpace(string(chunk)) + if text == "" { + return chunk + } + var ( + hasDataPrefix bool + jsonText = text + ) + if strings.HasPrefix(jsonText, "data:") { + hasDataPrefix = true + jsonText = strings.TrimSpace(strings.TrimPrefix(jsonText, "data:")) + } + if jsonText == "" || jsonText == "[DONE]" { + return chunk + } + obj := map[string]any{} + if err := json.Unmarshal([]byte(jsonText), &obj); err != nil { + return chunk + } + if _, ok := obj["candidates"]; !ok { + return chunk + } + obj["usageMetadata"] = map[string]any{ + "promptTokenCount": usage.PromptTokens, + "candidatesTokenCount": usage.CompletionTokens, + "totalTokenCount": usage.TotalTokens, + } + b, err := json.Marshal(obj) + if err != nil { + return chunk + } + if hasDataPrefix { + return []byte("data: " + string(b)) + } + return b +} + +func toInt(v any) int { + switch x := v.(type) { + case int: + return x + case int32: + return int(x) + case int64: + return int(x) + case float64: + return int(x) + case float32: + return int(x) + default: + return 0 + } +} diff --git a/internal/translatorcliproxy/stream_writer_test.go b/internal/translatorcliproxy/stream_writer_test.go index 979d36e5..77d2936c 100644 --- a/internal/translatorcliproxy/stream_writer_test.go +++ b/internal/translatorcliproxy/stream_writer_test.go @@ -18,12 +18,16 @@ func TestOpenAIStreamTranslatorWriterClaude(t *testing.T) { w.WriteHeader(200) _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"claude-sonnet-4-5\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\"},\"finish_reason\":null}]}\n\n")) _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"claude-sonnet-4-5\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hi\"},\"finish_reason\":null}]}\n\n")) + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"claude-sonnet-4-5\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":29,\"total_tokens\":40}}\n\n")) _, _ = w.Write([]byte("data: [DONE]\n\n")) body := rec.Body.String() if !strings.Contains(body, "event: message_start") { t.Fatalf("expected claude message_start event, got: %s", body) } + if !strings.Contains(body, `"output_tokens":29`) { + t.Fatalf("expected claude stream usage to preserve output tokens, got: %s", body) + } } func TestOpenAIStreamTranslatorWriterGemini(t *testing.T) { @@ -35,12 +39,16 @@ func TestOpenAIStreamTranslatorWriterGemini(t *testing.T) { w.Header().Set("Content-Type", "text/event-stream") w.WriteHeader(200) _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"gemini-2.5-pro\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hi\"},\"finish_reason\":null}]}\n\n")) + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"gemini-2.5-pro\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":29,\"total_tokens\":40}}\n\n")) _, _ = w.Write([]byte("data: [DONE]\n\n")) body := rec.Body.String() if !strings.Contains(body, "candidates") { t.Fatalf("expected gemini stream payload, got: %s", body) } + if !strings.Contains(body, `"promptTokenCount":11`) || !strings.Contains(body, `"candidatesTokenCount":29`) { + t.Fatalf("expected gemini stream usageMetadata to preserve usage, got: %s", body) + } } func TestOpenAIStreamTranslatorWriterPreservesKeepAliveComment(t *testing.T) { From 86ecbc89bd31a80e97ee516ad40178dd4440634d Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Tue, 7 Apr 2026 10:59:27 +0800 Subject: [PATCH 4/4] Preserve SSE frame delimiters when injecting Gemini usage --- internal/translatorcliproxy/stream_writer.go | 12 +++++++++++- internal/translatorcliproxy/stream_writer_test.go | 12 ++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/internal/translatorcliproxy/stream_writer.go b/internal/translatorcliproxy/stream_writer.go index b1b87478..e80ce690 100644 --- a/internal/translatorcliproxy/stream_writer.go +++ b/internal/translatorcliproxy/stream_writer.go @@ -162,6 +162,13 @@ func injectStreamUsageMetadata(chunk []byte, target sdktranslator.Format, usage if target != sdktranslator.FormatGemini { return chunk } + suffix := "" + switch { + case bytes.HasSuffix(chunk, []byte("\n\n")): + suffix = "\n\n" + case bytes.HasSuffix(chunk, []byte("\n")): + suffix = "\n" + } text := strings.TrimSpace(string(chunk)) if text == "" { return chunk @@ -194,7 +201,10 @@ func injectStreamUsageMetadata(chunk []byte, target sdktranslator.Format, usage return chunk } if hasDataPrefix { - return []byte("data: " + string(b)) + return []byte("data: " + string(b) + suffix) + } + if suffix != "" { + return append(b, []byte(suffix)...) } return b } diff --git a/internal/translatorcliproxy/stream_writer_test.go b/internal/translatorcliproxy/stream_writer_test.go index 77d2936c..94d70b8d 100644 --- a/internal/translatorcliproxy/stream_writer_test.go +++ b/internal/translatorcliproxy/stream_writer_test.go @@ -63,3 +63,15 @@ func TestOpenAIStreamTranslatorWriterPreservesKeepAliveComment(t *testing.T) { t.Fatalf("expected keep-alive comment passthrough, got %q", body) } } + +func TestInjectStreamUsageMetadataPreservesSSEFrameTerminator(t *testing.T) { + chunk := []byte("data: {\"candidates\":[{\"index\":0}],\"model\":\"gemini-2.5-pro\"}\n\n") + usage := openAIUsage{PromptTokens: 11, CompletionTokens: 29, TotalTokens: 40} + got := injectStreamUsageMetadata(chunk, sdktranslator.FormatGemini, usage) + if !strings.HasSuffix(string(got), "\n\n") { + t.Fatalf("expected injected chunk to preserve \\n\\n frame terminator, got %q", string(got)) + } + if !strings.Contains(string(got), `"usageMetadata"`) { + t.Fatalf("expected usageMetadata injected, got %q", string(got)) + } +}