Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions scripts/test-canopywave-long.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,10 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
outputCostPerToken: 1.20 / 1_000_000,
},
kimi: {
// Pricing is approximate — based on public Moonshot k2 rates; CanopyWave may differ.
id: 'moonshotai/kimi-k2.6',
inputCostPerToken: 0.60 / 1_000_000,
cachedInputCostPerToken: 0.15 / 1_000_000,
outputCostPerToken: 2.50 / 1_000_000,
inputCostPerToken: 0.95 / 1_000_000,
cachedInputCostPerToken: 0.16 / 1_000_000,
outputCostPerToken: 4.00 / 1_000_000,
},
}

Expand Down
15 changes: 8 additions & 7 deletions web/src/app/api/v1/chat/completions/_post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -532,9 +532,10 @@ export async function postChatCompletions(params: {
if (bodyStream) {
// Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
const useSiliconFlow = false // isSiliconFlowModel(typedBody.model)
const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
const useFireworks = isFireworksModel(typedBody.model)
const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model)
const useCanopyWave = isCanopyWaveModel(typedBody.model)
const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model)
const useOpenAIDirect =
!useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model)
const stream = useSiliconFlow
? await handleSiliconFlowStream({
body: typedBody,
Expand Down Expand Up @@ -606,12 +607,12 @@ export async function postChatCompletions(params: {
})
} else {
// Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
// TEMPORARILY DISABLED: route through OpenRouter
const model = typedBody.model
const useSiliconFlow = false // isSiliconFlowModel(model)
const useCanopyWave = false // isCanopyWaveModel(model)
const useFireworks = isFireworksModel(model)
const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model)
const useCanopyWave = isCanopyWaveModel(model)
const useFireworks = !useCanopyWave && isFireworksModel(model)
const shouldUseOpenAIEndpoint =
!useCanopyWave && !useFireworks && isOpenAIDirectModel(model)

const nonStreamRequest = useSiliconFlow
? handleSiliconFlowNonStream({
Expand Down
63 changes: 47 additions & 16 deletions web/src/llm-api/canopywave.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,52 @@ const canopywaveAgent = new Agent({
bodyTimeout: 0,
})

/** Map from OpenRouter model IDs to CanopyWave model IDs */
const CANOPYWAVE_MODEL_MAP: Record<string, string> = {
'minimax/minimax-m2.5': 'minimax/minimax-m2.5',
// CanopyWave per-token pricing (dollars per token)
interface CanopyWavePricing {
inputCostPerToken: number
cachedInputCostPerToken: number
outputCostPerToken: number
}

/** Single source of truth: which OpenRouter model IDs we route through
* CanopyWave, the corresponding CanopyWave model ID, and per-model pricing.
* Kept as one map so adding a model can't drift between routing and billing. */
const CANOPYWAVE_MODELS: Record<
string,
{ canopywaveId: string; pricing: CanopyWavePricing }
> = {
'minimax/minimax-m2.5': {
canopywaveId: 'minimax/minimax-m2.5',
pricing: {
inputCostPerToken: 0.27 / 1_000_000,
cachedInputCostPerToken: 0.03 / 1_000_000,
outputCostPerToken: 1.08 / 1_000_000,
},
},
'moonshotai/kimi-k2.6': {
canopywaveId: 'moonshotai/kimi-k2.6',
pricing: {
inputCostPerToken: 0.95 / 1_000_000,
cachedInputCostPerToken: 0.16 / 1_000_000,
outputCostPerToken: 4.00 / 1_000_000,
},
},
}

export function isCanopyWaveModel(model: string): boolean {
return model in CANOPYWAVE_MODEL_MAP
return model in CANOPYWAVE_MODELS
}

function getCanopyWaveModelId(openrouterModel: string): string {
return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel
return CANOPYWAVE_MODELS[openrouterModel]?.canopywaveId ?? openrouterModel
}

function getCanopyWavePricing(model: string): CanopyWavePricing {
const entry = CANOPYWAVE_MODELS[model]
if (!entry) {
throw new Error(`No CanopyWave pricing found for model: ${model}`)
}
return entry.pricing
}

type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
Expand Down Expand Up @@ -85,12 +120,7 @@ function createCanopyWaveRequest(params: {
})
}

// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5
const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000
const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000

function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
Expand All @@ -100,11 +130,12 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0

const pricing = getCanopyWavePricing(model)
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
const cost =
nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN +
cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN +
outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN
nonCachedInputTokens * pricing.inputCostPerToken +
cacheReadInputTokens * pricing.cachedInputCostPerToken +
outputTokens * pricing.outputCostPerToken

return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
}
Expand Down Expand Up @@ -139,7 +170,7 @@ export async function handleCanopyWaveNonStream({
const data = await response.json()
const content = data.choices?.[0]?.message?.content ?? ''
const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
const usageData = extractUsageAndCost(data.usage)
const usageData = extractUsageAndCost(data.usage, originalModel)

insertMessageToBigQuery({
messageId: data.id,
Expand Down Expand Up @@ -453,7 +484,7 @@ async function handleResponse({
return { state }
}

const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
const messageId = typeof data.id === 'string' ? data.id : 'unknown'

state.billedAlready = true
Expand Down
Loading