From 150a5f85b36e8882344c5c4be3de07ede23de6e2 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 25 Apr 2026 14:31:06 -0700 Subject: [PATCH 1/3] Route Kimi K2.6 requests through CanopyWave MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend-only wiring. No agent or freebuff-model changes — current behavior is unchanged because nothing in the codebase requests moonshotai/kimi-k2.6 yet. Sets the stage for switching the freebuff "smart" model in a follow-up PR. - Add moonshotai/kimi-k2.6 to CANOPYWAVE_MODEL_MAP so isCanopyWaveModel picks it up. - Refactor canopywave pricing into a per-model map and add Kimi pricing ($0.60/$0.15/$2.50 per 1M in/cache/out, approximate Moonshot rates). - Flip useCanopyWave from `false` to isCanopyWaveModel(...) in _post.ts (stream + non-stream). For models not in the map this is a no-op — only minimax-m2.5 and kimi-k2.6 are affected, neither of which is currently used. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/src/app/api/v1/chat/completions/_post.ts | 15 +++---- web/src/llm-api/canopywave.ts | 41 +++++++++++++++----- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 1f71b7792..13baada65 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -532,9 +532,10 @@ export async function postChatCompletions(params: { if (bodyStream) { // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models const useSiliconFlow = false // isSiliconFlowModel(typedBody.model) - const useCanopyWave = false // isCanopyWaveModel(typedBody.model) - const useFireworks = isFireworksModel(typedBody.model) - const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model) + const useCanopyWave = isCanopyWaveModel(typedBody.model) + const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model) + const useOpenAIDirect = + !useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model) const stream = useSiliconFlow ? await handleSiliconFlowStream({ body: typedBody, @@ -606,12 +607,12 @@ export async function postChatCompletions(params: { }) } else { // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models - // TEMPORARILY DISABLED: route through OpenRouter const model = typedBody.model const useSiliconFlow = false // isSiliconFlowModel(model) - const useCanopyWave = false // isCanopyWaveModel(model) - const useFireworks = isFireworksModel(model) - const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model) + const useCanopyWave = isCanopyWaveModel(model) + const useFireworks = !useCanopyWave && isFireworksModel(model) + const shouldUseOpenAIEndpoint = + !useCanopyWave && !useFireworks && isOpenAIDirectModel(model) const nonStreamRequest = useSiliconFlow ? handleSiliconFlowNonStream({ diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 0db3e0f9c..7854953d2 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -29,6 +29,7 @@ const canopywaveAgent = new Agent({ /** Map from OpenRouter model IDs to CanopyWave model IDs */ const CANOPYWAVE_MODEL_MAP: Record = { 'minimax/minimax-m2.5': 'minimax/minimax-m2.5', + 'moonshotai/kimi-k2.6': 'moonshotai/kimi-k2.6', } export function isCanopyWaveModel(model: string): boolean { @@ -85,12 +86,31 @@ function createCanopyWaveRequest(params: { }) } -// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5 -const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000 -const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000 +// CanopyWave per-token pricing (dollars per token), keyed by OpenRouter model ID +interface CanopyWavePricing { + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +const CANOPYWAVE_PRICING_MAP: Record = { + 'minimax/minimax-m2.5': { + inputCostPerToken: 0.27 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.08 / 1_000_000, + }, + 'moonshotai/kimi-k2.6': { + inputCostPerToken: 0.60 / 1_000_000, + cachedInputCostPerToken: 0.15 / 1_000_000, + outputCostPerToken: 2.50 / 1_000_000, + }, +} + +function getCanopyWavePricing(model: string): CanopyWavePricing { + return CANOPYWAVE_PRICING_MAP[model] ?? CANOPYWAVE_PRICING_MAP['moonshotai/kimi-k2.6'] +} -function extractUsageAndCost(usage: Record | undefined | null): UsageData { +function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData { if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } const promptDetails = usage.prompt_tokens_details as Record | undefined | null const completionDetails = usage.completion_tokens_details as Record | undefined | null @@ -100,11 +120,12 @@ function extractUsageAndCost(usage: Record | undefined | null): const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 + const pricing = getCanopyWavePricing(model) const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) const cost = - nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN + - cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN + - outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN + nonCachedInputTokens * pricing.inputCostPerToken + + cacheReadInputTokens * pricing.cachedInputCostPerToken + + outputTokens * pricing.outputCostPerToken return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } } @@ -139,7 +160,7 @@ export async function handleCanopyWaveNonStream({ const data = await response.json() const content = data.choices?.[0]?.message?.content ?? '' const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? '' - const usageData = extractUsageAndCost(data.usage) + const usageData = extractUsageAndCost(data.usage, originalModel) insertMessageToBigQuery({ messageId: data.id, @@ -453,7 +474,7 @@ async function handleResponse({ return { state } } - const usageData = extractUsageAndCost(data.usage as Record) + const usageData = extractUsageAndCost(data.usage as Record, originalModel) const messageId = typeof data.id === 'string' ? data.id : 'unknown' state.billedAlready = true From eedf5285780d69fa5b8c90dbd18fb50f94a02827 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 25 Apr 2026 14:36:56 -0700 Subject: [PATCH 2/3] Address Greptile review: merge canopywave model + pricing maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combine CANOPYWAVE_MODEL_MAP and CANOPYWAVE_PRICING_MAP into a single CANOPYWAVE_MODELS map keyed by OpenRouter model ID. Removes the silent Kimi-pricing fallback in getCanopyWavePricing — it now throws on unknown models, since callers are expected to gate on isCanopyWaveModel first. Eliminates the drift risk if a future model is added to one map but not the other. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/src/llm-api/canopywave.ts | 70 ++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 7854953d2..686d2b3d2 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -26,18 +26,52 @@ const canopywaveAgent = new Agent({ bodyTimeout: 0, }) -/** Map from OpenRouter model IDs to CanopyWave model IDs */ -const CANOPYWAVE_MODEL_MAP: Record = { - 'minimax/minimax-m2.5': 'minimax/minimax-m2.5', - 'moonshotai/kimi-k2.6': 'moonshotai/kimi-k2.6', +// CanopyWave per-token pricing (dollars per token) +interface CanopyWavePricing { + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +/** Single source of truth: which OpenRouter model IDs we route through + * CanopyWave, the corresponding CanopyWave model ID, and per-model pricing. + * Kept as one map so adding a model can't drift between routing and billing. */ +const CANOPYWAVE_MODELS: Record< + string, + { canopywaveId: string; pricing: CanopyWavePricing } +> = { + 'minimax/minimax-m2.5': { + canopywaveId: 'minimax/minimax-m2.5', + pricing: { + inputCostPerToken: 0.27 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.08 / 1_000_000, + }, + }, + 'moonshotai/kimi-k2.6': { + canopywaveId: 'moonshotai/kimi-k2.6', + pricing: { + inputCostPerToken: 0.60 / 1_000_000, + cachedInputCostPerToken: 0.15 / 1_000_000, + outputCostPerToken: 2.50 / 1_000_000, + }, + }, } export function isCanopyWaveModel(model: string): boolean { - return model in CANOPYWAVE_MODEL_MAP + return model in CANOPYWAVE_MODELS } function getCanopyWaveModelId(openrouterModel: string): string { - return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel + return CANOPYWAVE_MODELS[openrouterModel]?.canopywaveId ?? openrouterModel +} + +function getCanopyWavePricing(model: string): CanopyWavePricing { + const entry = CANOPYWAVE_MODELS[model] + if (!entry) { + throw new Error(`No CanopyWave pricing found for model: ${model}`) + } + return entry.pricing } type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean } @@ -86,30 +120,6 @@ function createCanopyWaveRequest(params: { }) } -// CanopyWave per-token pricing (dollars per token), keyed by OpenRouter model ID -interface CanopyWavePricing { - inputCostPerToken: number - cachedInputCostPerToken: number - outputCostPerToken: number -} - -const CANOPYWAVE_PRICING_MAP: Record = { - 'minimax/minimax-m2.5': { - inputCostPerToken: 0.27 / 1_000_000, - cachedInputCostPerToken: 0.03 / 1_000_000, - outputCostPerToken: 1.08 / 1_000_000, - }, - 'moonshotai/kimi-k2.6': { - inputCostPerToken: 0.60 / 1_000_000, - cachedInputCostPerToken: 0.15 / 1_000_000, - outputCostPerToken: 2.50 / 1_000_000, - }, -} - -function getCanopyWavePricing(model: string): CanopyWavePricing { - return CANOPYWAVE_PRICING_MAP[model] ?? CANOPYWAVE_PRICING_MAP['moonshotai/kimi-k2.6'] -} - function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData { if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } const promptDetails = usage.prompt_tokens_details as Record | undefined | null From 52a5bba0f098982cfc90281b05153763f7502abb Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 25 Apr 2026 14:44:27 -0700 Subject: [PATCH 3/3] Update Kimi K2.6 pricing to $0.95/$0.16/$4.00 per 1M Confirmed CanopyWave rates. Was using approximate Moonshot K2 numbers as a placeholder. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/test-canopywave-long.ts | 7 +++---- web/src/llm-api/canopywave.ts | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/test-canopywave-long.ts b/scripts/test-canopywave-long.ts index 827bd4c7f..052ba1c07 100644 --- a/scripts/test-canopywave-long.ts +++ b/scripts/test-canopywave-long.ts @@ -33,11 +33,10 @@ const MODEL_CONFIGS: Record = { outputCostPerToken: 1.20 / 1_000_000, }, kimi: { - // Pricing is approximate — based on public Moonshot k2 rates; CanopyWave may differ. id: 'moonshotai/kimi-k2.6', - inputCostPerToken: 0.60 / 1_000_000, - cachedInputCostPerToken: 0.15 / 1_000_000, - outputCostPerToken: 2.50 / 1_000_000, + inputCostPerToken: 0.95 / 1_000_000, + cachedInputCostPerToken: 0.16 / 1_000_000, + outputCostPerToken: 4.00 / 1_000_000, }, } diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 686d2b3d2..9a5b2ba12 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -51,9 +51,9 @@ const CANOPYWAVE_MODELS: Record< 'moonshotai/kimi-k2.6': { canopywaveId: 'moonshotai/kimi-k2.6', pricing: { - inputCostPerToken: 0.60 / 1_000_000, - cachedInputCostPerToken: 0.15 / 1_000_000, - outputCostPerToken: 2.50 / 1_000_000, + inputCostPerToken: 0.95 / 1_000_000, + cachedInputCostPerToken: 0.16 / 1_000_000, + outputCostPerToken: 4.00 / 1_000_000, }, }, }