diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index 420ebb6..bf9ae3d 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -20,7 +20,10 @@ import { } from "@/plugins/apiKeyRateLimitPlugin"; import { rateLimitPlugin } from "@/plugins/rateLimitPlugin"; import { + DEFAULT_FAILOVER_CONFIG, executeWithFailover, + fetchWithTimeout, + isRetriableNetworkError, selectMultipleCandidates, type FailoverConfig, } from "@/services/failover"; @@ -34,7 +37,7 @@ import { PROVIDER_HEADER, } from "@/utils/api-helpers"; import { addCompletions, type Completion } from "@/utils/completions"; -import { safeParseToolArgs } from "@/utils/json"; +import { parseJsonResponse, safeParseToolArgs } from "@/utils/json"; import { createLogger } from "@/utils/logger"; import { getProviderProxy } from "@/utils/proxy-fetch"; import { @@ -142,6 +145,22 @@ const tAnthropicMessageCreate = tLooseObject({ metadata: t.Optional(tAnthropicMetadata), }); +// Anthropic count_tokens API request schema. Keep this route intentionally loose +// so we can transparently forward newer content block shapes upstream. +const tAnthropicMessageCountTokens = tLooseObject({ + model: t.String(), + messages: t.Array( + tLooseObject({ + role: t.String(), + content: t.Union([t.String(), t.Array(t.Unknown())]), + }), + ), + system: t.Optional(t.Union([t.String(), t.Array(t.Unknown())])), + tools: t.Optional(t.Array(t.Unknown())), + tool_choice: t.Optional(t.Unknown()), + thinking: t.Optional(t.Unknown()), +}); + /** * Build completion record for logging */ @@ -490,6 +509,96 @@ const MESSAGES_FAILOVER_CONFIG: Partial = { timeoutMs: 120000, // 2 minutes for messages }; +const COUNT_TOKENS_FAILOVER_CONFIG: FailoverConfig = { + ...DEFAULT_FAILOVER_CONFIG, + maxProviderAttempts: 3, + sameProviderRetries: 0, + retriableStatusCodes: [404, 405, 429, 500, 502, 503, 504], + timeoutMs: 30000, +}; + +function normalizeAnthropicBaseUrl(baseUrl: string): string { + let normalized = baseUrl.replace(/\/+$/, ""); + if (normalized.endsWith("/v1")) { + normalized = normalized.slice(0, -3); + } + return normalized; +} + +function buildAnthropicCountTokensRequest( + body: Record, + provider: ModelWithProvider, + extraHeaders?: Record, +): { + url: string; + init: RequestInit; + proxy?: string; +} { + const baseUrl = normalizeAnthropicBaseUrl(provider.provider.baseUrl); + const url = `${baseUrl}/v1/messages/count_tokens`; + const remoteModel = provider.model.remoteId ?? provider.model.systemName; + const headers: Record = { + "Content-Type": "application/json", + "anthropic-version": provider.provider.apiVersion || "2023-06-01", + }; + + if (provider.provider.apiKey) { + headers["x-api-key"] = provider.provider.apiKey; + } + if (extraHeaders) { + Object.assign(headers, extraHeaders); + } + + return { + url, + init: { + method: "POST", + headers, + body: JSON.stringify({ + ...body, + model: remoteModel, + }), + }, + proxy: getProviderProxy(provider.provider), + }; +} + +async function parseUpstreamJsonBody( + response: Response, + context: string, +): Promise> { + const text = await response.text(); + return parseJsonResponse>(text, context); +} + +async function parseUpstreamErrorBody(response: Response): Promise< + Record +> { + const text = await response.text(); + if (!text) { + return { + type: "error", + error: { + type: "api_error", + message: `Upstream returned HTTP ${response.status}`, + }, + }; + } + + try { + return JSON.parse(text) as Record; + } catch { + return { + type: "error", + error: { + type: "api_error", + message: text, + code: "unparseable_error", + }, + }; + } +} + export const messagesApi = new Elysia({ detail: { security: [{ apiKey: [] }], @@ -498,6 +607,162 @@ export const messagesApi = new Elysia({ .use(apiKeyPlugin) .use(apiKeyRateLimitPlugin) .use(rateLimitPlugin) + .post( + "/messages/count_tokens", + async function ({ body, set, request }) { + try { + const reqHeaders = request.headers; + + const { systemName, targetProvider } = parseModelProvider( + body.model, + reqHeaders.get(PROVIDER_HEADER), + ); + + const modelsWithProviders = await getModelsWithProviderBySystemName( + systemName, + "chat", + ); + + if (modelsWithProviders.length === 0) { + set.status = 404; + return { + type: "error", + error: { + type: "not_found_error", + message: `Model '${systemName}' not found`, + }, + }; + } + + const filteredCandidates = filterCandidates( + modelsWithProviders as ModelWithProvider[], + targetProvider, + ); + + if (filteredCandidates.length === 0) { + set.status = 404; + return { + type: "error", + error: { + type: "not_found_error", + message: `No available provider for model '${systemName}'`, + }, + }; + } + + const candidates = selectMultipleCandidates( + filteredCandidates, + COUNT_TOKENS_FAILOVER_CONFIG.maxProviderAttempts, + ); + const extraHeaders = extractUpstreamHeaders(reqHeaders); + const upstreamBody = body as Record; + + let lastResponse: Response | undefined; + let lastError: Error | undefined; + + for (const candidate of candidates) { + const { url, init, proxy } = buildAnthropicCountTokensRequest( + upstreamBody, + candidate, + extraHeaders, + ); + + try { + const response = await fetchWithTimeout( + url, + init, + COUNT_TOKENS_FAILOVER_CONFIG.timeoutMs, + proxy, + ); + + if (response.ok) { + return await parseUpstreamJsonBody( + response, + "Anthropic count_tokens", + ); + } + + lastResponse = response; + const shouldTryNext = + COUNT_TOKENS_FAILOVER_CONFIG.retriableStatusCodes.includes( + response.status, + ) && candidate !== candidates[candidates.length - 1]; + + logger.warn("count_tokens upstream request failed", { + provider: candidate.provider.name, + providerType: candidate.provider.type, + status: response.status, + shouldTryNext, + }); + + if (!shouldTryNext) { + set.status = response.status; + return await parseUpstreamErrorBody(response); + } + } catch (error) { + const err = + error instanceof Error ? error : new Error(String(error)); + lastError = err; + const shouldTryNext = + isRetriableNetworkError(err, COUNT_TOKENS_FAILOVER_CONFIG) && + candidate !== candidates[candidates.length - 1]; + + logger.warn("count_tokens upstream network error", { + provider: candidate.provider.name, + providerType: candidate.provider.type, + error: err.message, + shouldTryNext, + }); + + if (!shouldTryNext) { + set.status = 502; + return { + type: "error", + error: { + type: "api_error", + message: `Count tokens request failed: ${err.message}`, + }, + }; + } + } + } + + if (lastResponse) { + set.status = lastResponse.status; + return await parseUpstreamErrorBody(lastResponse); + } + + set.status = 502; + return { + type: "error", + error: { + type: "api_error", + message: + lastError?.message || + "All upstream providers failed for token counting", + }, + }; + } catch (error) { + logger.error("count_tokens handler failed", error); + set.status = 502; + return { + type: "error", + error: { + type: "api_error", + message: "Failed to process count_tokens response", + }, + }; + } + }, + { + body: tAnthropicMessageCountTokens, + checkApiKey: true, + apiKeyRateLimit: true, + rateLimit: { + identifier: (body: unknown) => (body as { model: string }).model, + }, + }, + ) .post( "/messages", async function ({ body, set, bearer, request, apiKeyRecord }) { diff --git a/scripts/verify-anthropic-count-tokens.sh b/scripts/verify-anthropic-count-tokens.sh new file mode 100755 index 0000000..4f9d4f0 --- /dev/null +++ b/scripts/verify-anthropic-count-tokens.sh @@ -0,0 +1,227 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Verify /v1/messages/count_tokens support for Anthropic-compatible coding gateways. +# API keys are read from environment variables and never written to disk. + +: "${KIMI_API_KEY:?KIMI_API_KEY is required}" +: "${DASHSCOPE_API_KEY:?DASHSCOPE_API_KEY is required}" +: "${VOLCENGINE_API_KEY:?VOLCENGINE_API_KEY is required}" + +KIMI_BASE_URL="${KIMI_BASE_URL:-https://api.kimi.com/coding}" +DASHSCOPE_BASE_URL="${DASHSCOPE_BASE_URL:-https://coding.dashscope.aliyuncs.com/apps/anthropic}" +VOLCENGINE_BASE_URL="${VOLCENGINE_BASE_URL:-https://ark.cn-beijing.volces.com/api/coding}" + +REQUEST_TIMEOUT="${REQUEST_TIMEOUT:-25}" + +# Optional explicit models per provider. +KIMI_MODEL="${KIMI_MODEL:-claude-sonnet-4-6}" +DASHSCOPE_MODEL="${DASHSCOPE_MODEL:-qwen3.5-plus}" +VOLCENGINE_MODEL="${VOLCENGINE_MODEL:-ark-code-latest}" + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "$TMP_DIR"' EXIT + +call_api() { + local base_url="$1" + local api_key="$2" + local method="$3" + local path="$4" + local body="$5" + local out_body="$6" + + local out_headers="${out_body}.headers" + local status + + if [[ "$method" == "GET" ]]; then + status="$(curl -sS --max-time "$REQUEST_TIMEOUT" -o "$out_body" -D "$out_headers" \ + -w "%{http_code}" \ + -X GET "${base_url}${path}" \ + -H "anthropic-version: 2023-06-01" \ + -H "x-api-key: ${api_key}" \ + -H "authorization: Bearer ${api_key}")" + else + status="$(curl -sS --max-time "$REQUEST_TIMEOUT" -o "$out_body" -D "$out_headers" \ + -w "%{http_code}" \ + -X "$method" "${base_url}${path}" \ + -H "content-type: application/json" \ + -H "anthropic-version: 2023-06-01" \ + -H "x-api-key: ${api_key}" \ + -H "authorization: Bearer ${api_key}" \ + --data "$body")" + fi + + printf "%s" "$status" +} + +json_snippet() { + local file="$1" + tr '\n' ' ' < "$file" | sed -E 's/[[:space:]]+/ /g' | cut -c1-280 +} + +discover_models() { + local base_url="$1" + local api_key="$2" + local out_file="$TMP_DIR/models.json" + local status + + status="$(call_api "$base_url" "$api_key" "GET" "/v1/models" "" "$out_file")" + if [[ "$status" != "200" ]]; then + echo "" + return 0 + fi + + if jq -e '.data and (.data | type == "array")' "$out_file" >/dev/null 2>&1; then + jq -r '.data[]?.id // empty' "$out_file" + fi +} + +pick_model() { + local provider_name="$1" + local base_url="$2" + local api_key="$3" + local preferred_model="$4" + + local -a candidates=() + + if [[ -n "$preferred_model" ]]; then + candidates+=("$preferred_model") + fi + + while IFS= read -r m; do + if [[ -n "$m" ]]; then + candidates+=("$m") + fi + done < <(discover_models "$base_url" "$api_key") + + candidates+=( + "claude-sonnet-4-6" + "claude-sonnet-4-5" + "claude-sonnet-4-20250514" + "claude-3-7-sonnet-20250219" + "claude-3-5-sonnet-20241022" + "qwen3.5-plus" + "qwen3-coder-plus" + "qwen-plus" + "ark-code-latest" + "deepseek-v3" + "kimi-for-coding" + "doubao-seed-1-6-thinking-250715" + ) + + local dedup_file="$TMP_DIR/candidates-${provider_name}.txt" + printf "%s\n" "${candidates[@]}" | awk 'NF && !seen[$0]++' > "$dedup_file" + + local test_out="$TMP_DIR/${provider_name}.messages.pick.json" + local picked_success="" + local picked_fallback="" + + while IFS= read -r model; do + local payload + payload="$(jq -cn --arg model "$model" '{model:$model,max_tokens:1,messages:[{role:"user",content:"ping"}]}')" + + local status + status="$(call_api "$base_url" "$api_key" "POST" "/v1/messages" "$payload" "$test_out")" + + # Prefer a truly usable model first. + if [[ "$status" == "200" ]]; then + picked_success="$model" + break + fi + + # Keep a fallback model for providers that reject all candidate model names. + if [[ -z "$picked_fallback" ]] && [[ "$status" == "400" || "$status" == "422" || "$status" == "429" ]]; then + picked_fallback="$model" + continue + fi + + if [[ "$status" == "401" || "$status" == "403" ]]; then + echo "" + return 0 + fi + done < "$dedup_file" + + if [[ -n "$picked_success" ]]; then + echo "$picked_success" + else + echo "$picked_fallback" + fi +} + +validate_provider() { + local provider_name="$1" + local base_url="$2" + local api_key="$3" + local preferred_model="$4" + + local out_messages="$TMP_DIR/${provider_name}.messages.json" + local out_count="$TMP_DIR/${provider_name}.count.json" + + echo "" + echo "=== ${provider_name} ===" + echo "Base URL: ${base_url}" + + local model + model="$(pick_model "$provider_name" "$base_url" "$api_key" "$preferred_model")" + + if [[ -z "$model" ]]; then + echo "Model pick: failed (likely auth failure or strict model restrictions)" + local auth_probe_payload + auth_probe_payload='{"model":"claude-sonnet-4-5","max_tokens":1,"messages":[{"role":"user","content":"ping"}]}' + local auth_status + auth_status="$(call_api "$base_url" "$api_key" "POST" "/v1/messages" "$auth_probe_payload" "$out_messages")" + echo "/v1/messages HTTP: ${auth_status}" + echo "messages body: $(json_snippet "$out_messages")" + echo "Verdict: INCONCLUSIVE" + return 0 + fi + + echo "Selected model: ${model}" + + local msg_payload + msg_payload="$(jq -cn --arg model "$model" '{model:$model,max_tokens:8,messages:[{role:"user",content:"Return one short word."}]}')" + + local msg_status + msg_status="$(call_api "$base_url" "$api_key" "POST" "/v1/messages" "$msg_payload" "$out_messages")" + echo "/v1/messages HTTP: ${msg_status}" + echo "messages body: $(json_snippet "$out_messages")" + + local count_payload + count_payload="$(jq -cn --arg model "$model" '{model:$model,messages:[{role:"user",content:"Return one short word."}]}')" + + local count_status + count_status="$(call_api "$base_url" "$api_key" "POST" "/v1/messages/count_tokens" "$count_payload" "$out_count")" + echo "/v1/messages/count_tokens HTTP: ${count_status}" + echo "count_tokens body: $(json_snippet "$out_count")" + + local verdict="INCONCLUSIVE" + + if [[ "$count_status" == "200" ]] && jq -e '.input_tokens | numbers' "$out_count" >/dev/null 2>&1; then + verdict="SUPPORTED" + elif [[ "$count_status" == "404" || "$count_status" == "405" ]]; then + verdict="NOT_SUPPORTED" + elif [[ "$count_status" == "401" || "$count_status" == "403" ]]; then + if [[ "$msg_status" == "200" || "$msg_status" == "400" || "$msg_status" == "422" || "$msg_status" == "429" ]]; then + verdict="AUTH_OR_PERMISSION_ISSUE_ON_COUNT_TOKENS" + fi + elif [[ "$count_status" == "400" || "$count_status" == "422" ]]; then + if jq -e '.error.message? // .error?.message? // .error?.code? // "" | tostring | test("not found|No such|resource|path"; "i")' "$out_count" >/dev/null 2>&1; then + verdict="NOT_SUPPORTED" + elif [[ "$msg_status" == "200" ]]; then + verdict="LIKELY_SUPPORTED_BUT_REQUEST_SCHEMA_DIFF" + else + verdict="INCONCLUSIVE" + fi + fi + + echo "Verdict: ${verdict}" +} + +echo "Running Anthropic count_tokens validation at $(date '+%Y-%m-%d %H:%M:%S %z')" + +validate_provider "kimi" "$KIMI_BASE_URL" "$KIMI_API_KEY" "$KIMI_MODEL" +validate_provider "dashscope" "$DASHSCOPE_BASE_URL" "$DASHSCOPE_API_KEY" "$DASHSCOPE_MODEL" +validate_provider "volcengine" "$VOLCENGINE_BASE_URL" "$VOLCENGINE_API_KEY" "$VOLCENGINE_MODEL" + +echo "" +echo "Done."