From 150a5f85b36e8882344c5c4be3de07ede23de6e2 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sat, 25 Apr 2026 14:31:06 -0700
Subject: [PATCH 1/3] Route Kimi K2.6 requests through CanopyWave
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backend-only wiring. No agent or freebuff-model changes — current
behavior is unchanged because nothing in the codebase requests
moonshotai/kimi-k2.6 yet. Sets the stage for switching the freebuff
"smart" model in a follow-up PR.

- Add moonshotai/kimi-k2.6 to CANOPYWAVE_MODEL_MAP so isCanopyWaveModel
  picks it up.
- Refactor canopywave pricing into a per-model map and add Kimi pricing
  ($0.60/$0.15/$2.50 per 1M in/cache/out, approximate Moonshot rates).
- Flip useCanopyWave from `false` to isCanopyWaveModel(...) in
  _post.ts (stream + non-stream). For models not in the map this is a
  no-op — only minimax-m2.5 and kimi-k2.6 are affected, neither of
  which is currently used.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 web/src/app/api/v1/chat/completions/_post.ts | 15 +++----
 web/src/llm-api/canopywave.ts                | 41 +++++++++++++++-----
 2 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index 1f71b7792..13baada65 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -532,9 +532,10 @@ export async function postChatCompletions(params: {
       if (bodyStream) {
         // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
         const useSiliconFlow = false // isSiliconFlowModel(typedBody.model)
-        const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
-        const useFireworks = isFireworksModel(typedBody.model)
-        const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model)
+        const useCanopyWave = isCanopyWaveModel(typedBody.model)
+        const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model)
+        const useOpenAIDirect =
+          !useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model)
         const stream = useSiliconFlow
           ? await handleSiliconFlowStream({
             body: typedBody,
@@ -606,12 +607,12 @@ export async function postChatCompletions(params: {
         })
       } else {
         // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
-        // TEMPORARILY DISABLED: route through OpenRouter
         const model = typedBody.model
         const useSiliconFlow = false // isSiliconFlowModel(model)
-        const useCanopyWave = false // isCanopyWaveModel(model)
-        const useFireworks = isFireworksModel(model)
-        const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model)
+        const useCanopyWave = isCanopyWaveModel(model)
+        const useFireworks = !useCanopyWave && isFireworksModel(model)
+        const shouldUseOpenAIEndpoint =
+          !useCanopyWave && !useFireworks && isOpenAIDirectModel(model)
 
         const nonStreamRequest = useSiliconFlow
           ? handleSiliconFlowNonStream({
diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 0db3e0f9c..7854953d2 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -29,6 +29,7 @@ const canopywaveAgent = new Agent({
 /** Map from OpenRouter model IDs to CanopyWave model IDs */
 const CANOPYWAVE_MODEL_MAP: Record<string, string> = {
   'minimax/minimax-m2.5': 'minimax/minimax-m2.5',
+  'moonshotai/kimi-k2.6': 'moonshotai/kimi-k2.6',
 }
 
 export function isCanopyWaveModel(model: string): boolean {
@@ -85,12 +86,31 @@ function createCanopyWaveRequest(params: {
   })
 }
 
-// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5
-const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000
-const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
-const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000
+// CanopyWave per-token pricing (dollars per token), keyed by OpenRouter model ID
+interface CanopyWavePricing {
+  inputCostPerToken: number
+  cachedInputCostPerToken: number
+  outputCostPerToken: number
+}
+
+const CANOPYWAVE_PRICING_MAP: Record<string, CanopyWavePricing> = {
+  'minimax/minimax-m2.5': {
+    inputCostPerToken: 0.27 / 1_000_000,
+    cachedInputCostPerToken: 0.03 / 1_000_000,
+    outputCostPerToken: 1.08 / 1_000_000,
+  },
+  'moonshotai/kimi-k2.6': {
+    inputCostPerToken: 0.60 / 1_000_000,
+    cachedInputCostPerToken: 0.15 / 1_000_000,
+    outputCostPerToken: 2.50 / 1_000_000,
+  },
+}
+
+function getCanopyWavePricing(model: string): CanopyWavePricing {
+  return CANOPYWAVE_PRICING_MAP[model] ?? CANOPYWAVE_PRICING_MAP['moonshotai/kimi-k2.6']
+}
 
-function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
+function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
   if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
   const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
   const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
@@ -100,11 +120,12 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
   const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
   const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
 
+  const pricing = getCanopyWavePricing(model)
   const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
   const cost =
-    nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN +
-    cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN +
-    outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN
+    nonCachedInputTokens * pricing.inputCostPerToken +
+    cacheReadInputTokens * pricing.cachedInputCostPerToken +
+    outputTokens * pricing.outputCostPerToken
 
   return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
 }
@@ -139,7 +160,7 @@ export async function handleCanopyWaveNonStream({
   const data = await response.json()
   const content = data.choices?.[0]?.message?.content ?? ''
   const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
-  const usageData = extractUsageAndCost(data.usage)
+  const usageData = extractUsageAndCost(data.usage, originalModel)
 
   insertMessageToBigQuery({
     messageId: data.id,
@@ -453,7 +474,7 @@ async function handleResponse({
     return { state }
   }
 
-  const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
+  const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
   const messageId = typeof data.id === 'string' ? data.id : 'unknown'
 
   state.billedAlready = true

From eedf5285780d69fa5b8c90dbd18fb50f94a02827 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sat, 25 Apr 2026 14:36:56 -0700
Subject: [PATCH 2/3] Address Greptile review: merge canopywave model + pricing
 maps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Combine CANOPYWAVE_MODEL_MAP and CANOPYWAVE_PRICING_MAP into a single
CANOPYWAVE_MODELS map keyed by OpenRouter model ID. Removes the silent
Kimi-pricing fallback in getCanopyWavePricing — it now throws on
unknown models, since callers are expected to gate on isCanopyWaveModel
first. Eliminates the drift risk if a future model is added to one
map but not the other.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 web/src/llm-api/canopywave.ts | 70 ++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 7854953d2..686d2b3d2 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -26,18 +26,52 @@ const canopywaveAgent = new Agent({
   bodyTimeout: 0,
 })
 
-/** Map from OpenRouter model IDs to CanopyWave model IDs */
-const CANOPYWAVE_MODEL_MAP: Record<string, string> = {
-  'minimax/minimax-m2.5': 'minimax/minimax-m2.5',
-  'moonshotai/kimi-k2.6': 'moonshotai/kimi-k2.6',
+// CanopyWave per-token pricing (dollars per token)
+interface CanopyWavePricing {
+  inputCostPerToken: number
+  cachedInputCostPerToken: number
+  outputCostPerToken: number
+}
+
+/** Single source of truth: which OpenRouter model IDs we route through
+ *  CanopyWave, the corresponding CanopyWave model ID, and per-model pricing.
+ *  Kept as one map so adding a model can't drift between routing and billing. */
+const CANOPYWAVE_MODELS: Record<
+  string,
+  { canopywaveId: string; pricing: CanopyWavePricing }
+> = {
+  'minimax/minimax-m2.5': {
+    canopywaveId: 'minimax/minimax-m2.5',
+    pricing: {
+      inputCostPerToken: 0.27 / 1_000_000,
+      cachedInputCostPerToken: 0.03 / 1_000_000,
+      outputCostPerToken: 1.08 / 1_000_000,
+    },
+  },
+  'moonshotai/kimi-k2.6': {
+    canopywaveId: 'moonshotai/kimi-k2.6',
+    pricing: {
+      inputCostPerToken: 0.60 / 1_000_000,
+      cachedInputCostPerToken: 0.15 / 1_000_000,
+      outputCostPerToken: 2.50 / 1_000_000,
+    },
+  },
 }
 
 export function isCanopyWaveModel(model: string): boolean {
-  return model in CANOPYWAVE_MODEL_MAP
+  return model in CANOPYWAVE_MODELS
 }
 
 function getCanopyWaveModelId(openrouterModel: string): string {
-  return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel
+  return CANOPYWAVE_MODELS[openrouterModel]?.canopywaveId ?? openrouterModel
+}
+
+function getCanopyWavePricing(model: string): CanopyWavePricing {
+  const entry = CANOPYWAVE_MODELS[model]
+  if (!entry) {
+    throw new Error(`No CanopyWave pricing found for model: ${model}`)
+  }
+  return entry.pricing
 }
 
 type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
@@ -86,30 +120,6 @@ function createCanopyWaveRequest(params: {
   })
 }
 
-// CanopyWave per-token pricing (dollars per token), keyed by OpenRouter model ID
-interface CanopyWavePricing {
-  inputCostPerToken: number
-  cachedInputCostPerToken: number
-  outputCostPerToken: number
-}
-
-const CANOPYWAVE_PRICING_MAP: Record<string, CanopyWavePricing> = {
-  'minimax/minimax-m2.5': {
-    inputCostPerToken: 0.27 / 1_000_000,
-    cachedInputCostPerToken: 0.03 / 1_000_000,
-    outputCostPerToken: 1.08 / 1_000_000,
-  },
-  'moonshotai/kimi-k2.6': {
-    inputCostPerToken: 0.60 / 1_000_000,
-    cachedInputCostPerToken: 0.15 / 1_000_000,
-    outputCostPerToken: 2.50 / 1_000_000,
-  },
-}
-
-function getCanopyWavePricing(model: string): CanopyWavePricing {
-  return CANOPYWAVE_PRICING_MAP[model] ?? CANOPYWAVE_PRICING_MAP['moonshotai/kimi-k2.6']
-}
-
 function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
   if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
   const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null

From 52a5bba0f098982cfc90281b05153763f7502abb Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sat, 25 Apr 2026 14:44:27 -0700
Subject: [PATCH 3/3] Update Kimi K2.6 pricing to $0.95/$0.16/$4.00 per 1M

Confirmed CanopyWave rates. Was using approximate Moonshot K2 numbers
as a placeholder.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/test-canopywave-long.ts | 7 +++----
 web/src/llm-api/canopywave.ts   | 6 +++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/scripts/test-canopywave-long.ts b/scripts/test-canopywave-long.ts
index 827bd4c7f..052ba1c07 100644
--- a/scripts/test-canopywave-long.ts
+++ b/scripts/test-canopywave-long.ts
@@ -33,11 +33,10 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
     outputCostPerToken: 1.20 / 1_000_000,
   },
   kimi: {
-    // Pricing is approximate — based on public Moonshot k2 rates; CanopyWave may differ.
     id: 'moonshotai/kimi-k2.6',
-    inputCostPerToken: 0.60 / 1_000_000,
-    cachedInputCostPerToken: 0.15 / 1_000_000,
-    outputCostPerToken: 2.50 / 1_000_000,
+    inputCostPerToken: 0.95 / 1_000_000,
+    cachedInputCostPerToken: 0.16 / 1_000_000,
+    outputCostPerToken: 4.00 / 1_000_000,
   },
 }
 
diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 686d2b3d2..9a5b2ba12 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -51,9 +51,9 @@ const CANOPYWAVE_MODELS: Record<
   'moonshotai/kimi-k2.6': {
     canopywaveId: 'moonshotai/kimi-k2.6',
     pricing: {
-      inputCostPerToken: 0.60 / 1_000_000,
-      cachedInputCostPerToken: 0.15 / 1_000_000,
-      outputCostPerToken: 2.50 / 1_000_000,
+      inputCostPerToken: 0.95 / 1_000_000,
+      cachedInputCostPerToken: 0.16 / 1_000_000,
+      outputCostPerToken: 4.00 / 1_000_000,
     },
   },
 }