From f58c850de1a7cfb5a491e440629c173ff8584e12 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 25 Apr 2026 14:30:07 -0700 Subject: [PATCH 1/2] Use waiting room Gravity placements (#549) --- cli/src/components/waiting-room-screen.tsx | 1 + cli/src/hooks/use-gravity-ad.ts | 7 +++++- web/src/app/api/v1/ads/_post.ts | 3 +++ web/src/lib/ad-providers/gravity.ts | 27 +++++++++++++++------- web/src/lib/ad-providers/types.ts | 4 ++++ 5 files changed, 33 insertions(+), 9 deletions(-) diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index 2bbee6c71..d48d986d2 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -90,6 +90,7 @@ export const WaitingRoomScreen: React.FC = ({ forceStart: true, provider: 'gravity', fallbackProvider: 'carbon', + surface: 'waiting_room', }) useFreebuffCtrlCExit() diff --git a/cli/src/hooks/use-gravity-ad.ts b/cli/src/hooks/use-gravity-ad.ts index 36a18faae..ea6977864 100644 --- a/cli/src/hooks/use-gravity-ad.ts +++ b/cli/src/hooks/use-gravity-ad.ts @@ -35,6 +35,7 @@ export type AdVariant = 'banner' | 'choice' * same normalized response shape, so the rest of the hook is provider-agnostic. */ export type AdProvider = 'gravity' | 'carbon' +export type AdSurface = 'waiting_room' export type AdData = | { variant: 'banner'; ad: AdResponse } @@ -112,11 +113,14 @@ export const useGravityAd = (options?: { provider?: AdProvider /** Backup ad network to try when the primary returns no fill or errors. */ fallbackProvider?: AdProvider + /** Product surface requesting the ad. The server maps this to placements. */ + surface?: AdSurface }): GravityAdState => { const enabled = options?.enabled ?? true const forceStart = options?.forceStart ?? false const provider: AdProvider = options?.provider ?? 'gravity' const fallbackProvider = options?.fallbackProvider + const surface = options?.surface const [ad, setAd] = useState(null) const [adData, setAdData] = useState(null) const [isLoading, setIsLoading] = useState(false) @@ -299,6 +303,7 @@ export const useGravityAd = (options?: { messages: adMessages, sessionId: useChatStore.getState().chatSessionId, device: getDeviceInfo(), + ...(surface ? { surface } : {}), // Carbon requires a real browser-ish useragent for targeting/fraud // detection. Gravity ignores it. We source one centrally so every // provider that needs it sees the same value. @@ -430,7 +435,7 @@ export const useGravityAd = (options?: { clearInterval(id) ctrlRef.current.intervalId = null } - }, [shouldStart, shouldHideAds, provider, fallbackProvider]) + }, [shouldStart, shouldHideAds, provider, fallbackProvider, surface]) // Don't return ad when ads should be hidden const visible = shouldStart && !shouldHideAds diff --git a/web/src/app/api/v1/ads/_post.ts b/web/src/app/api/v1/ads/_post.ts index fc1fa07a5..a56846b05 100644 --- a/web/src/app/api/v1/ads/_post.ts +++ b/web/src/app/api/v1/ads/_post.ts @@ -35,12 +35,14 @@ const deviceSchema = z.object({ }) const providerSchema = z.enum(['gravity', 'carbon']).default('gravity') +const surfaceSchema = z.enum(['waiting_room']) const bodySchema = z.object({ provider: providerSchema.optional(), messages: z.array(messageSchema).optional().default([]), sessionId: z.string().optional(), device: deviceSchema.optional(), + surface: surfaceSchema.optional(), /** Browser/CLI useragent passed through to providers that require it. */ userAgent: z.string().optional(), }) @@ -136,6 +138,7 @@ export async function postAds(params: { clientIp, userAgent, device: parsedBody.device, + surface: parsedBody.surface, messages: parsedBody.messages, testMode: serverEnv.CB_ENVIRONMENT !== 'prod', logger, diff --git a/web/src/lib/ad-providers/gravity.ts b/web/src/lib/ad-providers/gravity.ts index ed9209cb0..4ae33b514 100644 --- a/web/src/lib/ad-providers/gravity.ts +++ b/web/src/lib/ad-providers/gravity.ts @@ -19,6 +19,12 @@ const CHOICE_PLACEMENT_IDS = [ 'choice-ad-3', 'choice-ad-4', ] +const WAITING_ROOM_PLACEMENT_IDS = [ + 'waiting-room-1', + 'waiting-room-2', + 'waiting-room-3', + 'waiting-room-4', +] type GravityRawAd = { adText: string @@ -105,16 +111,21 @@ export function createGravityProvider(config: { apiKey: string }): AdProvider { fetch, } = input - const variant = getGravityVariant(userId) + const variant = + input.surface === 'waiting_room' ? 'choice' : getGravityVariant(userId) const filteredMessages = prepareGravityMessages(messages) - const placements = - variant === 'choice' - ? CHOICE_PLACEMENT_IDS.map((id) => ({ - placement: 'below_response', - placement_id: id, - })) - : [{ placement: 'below_response', placement_id: BANNER_PLACEMENT_ID }] + const placementIds = + input.surface === 'waiting_room' + ? WAITING_ROOM_PLACEMENT_IDS + : variant === 'choice' + ? CHOICE_PLACEMENT_IDS + : [BANNER_PLACEMENT_ID] + + const placements = placementIds.map((id) => ({ + placement: 'below_response', + placement_id: id, + })) const deviceBody = clientIp ? { diff --git a/web/src/lib/ad-providers/types.ts b/web/src/lib/ad-providers/types.ts index 5b664332b..fb3284e2a 100644 --- a/web/src/lib/ad-providers/types.ts +++ b/web/src/lib/ad-providers/types.ts @@ -41,6 +41,8 @@ export type AdDeviceInfo = { locale?: string } +export type AdSurface = 'waiting_room' + export type FetchAdInput = { userId: string userEmail: string | null @@ -50,6 +52,8 @@ export type FetchAdInput = { /** Browser/CLI useragent string, passed through to upstream. */ userAgent?: string device?: AdDeviceInfo + /** Product surface requesting the ad. Providers may map this to placements. */ + surface?: AdSurface /** Last user + last preceding assistant message, if any. Used by Gravity. */ messages?: AdMessage[] /** Set in non-prod so providers can request test ads. */ From 6dfbb3b28a24d4df5d145b56dd7785df76c69f96 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 25 Apr 2026 14:46:04 -0700 Subject: [PATCH 2/2] Route Kimi K2.6 requests through CanopyWave (#550) Co-authored-by: Claude Opus 4.7 (1M context) --- scripts/test-canopywave-long.ts | 7 +-- web/src/app/api/v1/chat/completions/_post.ts | 15 ++--- web/src/llm-api/canopywave.ts | 63 +++++++++++++++----- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/scripts/test-canopywave-long.ts b/scripts/test-canopywave-long.ts index 827bd4c7f..052ba1c07 100644 --- a/scripts/test-canopywave-long.ts +++ b/scripts/test-canopywave-long.ts @@ -33,11 +33,10 @@ const MODEL_CONFIGS: Record = { outputCostPerToken: 1.20 / 1_000_000, }, kimi: { - // Pricing is approximate — based on public Moonshot k2 rates; CanopyWave may differ. id: 'moonshotai/kimi-k2.6', - inputCostPerToken: 0.60 / 1_000_000, - cachedInputCostPerToken: 0.15 / 1_000_000, - outputCostPerToken: 2.50 / 1_000_000, + inputCostPerToken: 0.95 / 1_000_000, + cachedInputCostPerToken: 0.16 / 1_000_000, + outputCostPerToken: 4.00 / 1_000_000, }, } diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 1f71b7792..13baada65 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -532,9 +532,10 @@ export async function postChatCompletions(params: { if (bodyStream) { // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models const useSiliconFlow = false // isSiliconFlowModel(typedBody.model) - const useCanopyWave = false // isCanopyWaveModel(typedBody.model) - const useFireworks = isFireworksModel(typedBody.model) - const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model) + const useCanopyWave = isCanopyWaveModel(typedBody.model) + const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model) + const useOpenAIDirect = + !useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model) const stream = useSiliconFlow ? await handleSiliconFlowStream({ body: typedBody, @@ -606,12 +607,12 @@ export async function postChatCompletions(params: { }) } else { // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models - // TEMPORARILY DISABLED: route through OpenRouter const model = typedBody.model const useSiliconFlow = false // isSiliconFlowModel(model) - const useCanopyWave = false // isCanopyWaveModel(model) - const useFireworks = isFireworksModel(model) - const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model) + const useCanopyWave = isCanopyWaveModel(model) + const useFireworks = !useCanopyWave && isFireworksModel(model) + const shouldUseOpenAIEndpoint = + !useCanopyWave && !useFireworks && isOpenAIDirectModel(model) const nonStreamRequest = useSiliconFlow ? handleSiliconFlowNonStream({ diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 0db3e0f9c..9a5b2ba12 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -26,17 +26,52 @@ const canopywaveAgent = new Agent({ bodyTimeout: 0, }) -/** Map from OpenRouter model IDs to CanopyWave model IDs */ -const CANOPYWAVE_MODEL_MAP: Record = { - 'minimax/minimax-m2.5': 'minimax/minimax-m2.5', +// CanopyWave per-token pricing (dollars per token) +interface CanopyWavePricing { + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +/** Single source of truth: which OpenRouter model IDs we route through + * CanopyWave, the corresponding CanopyWave model ID, and per-model pricing. + * Kept as one map so adding a model can't drift between routing and billing. */ +const CANOPYWAVE_MODELS: Record< + string, + { canopywaveId: string; pricing: CanopyWavePricing } +> = { + 'minimax/minimax-m2.5': { + canopywaveId: 'minimax/minimax-m2.5', + pricing: { + inputCostPerToken: 0.27 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.08 / 1_000_000, + }, + }, + 'moonshotai/kimi-k2.6': { + canopywaveId: 'moonshotai/kimi-k2.6', + pricing: { + inputCostPerToken: 0.95 / 1_000_000, + cachedInputCostPerToken: 0.16 / 1_000_000, + outputCostPerToken: 4.00 / 1_000_000, + }, + }, } export function isCanopyWaveModel(model: string): boolean { - return model in CANOPYWAVE_MODEL_MAP + return model in CANOPYWAVE_MODELS } function getCanopyWaveModelId(openrouterModel: string): string { - return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel + return CANOPYWAVE_MODELS[openrouterModel]?.canopywaveId ?? openrouterModel +} + +function getCanopyWavePricing(model: string): CanopyWavePricing { + const entry = CANOPYWAVE_MODELS[model] + if (!entry) { + throw new Error(`No CanopyWave pricing found for model: ${model}`) + } + return entry.pricing } type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean } @@ -85,12 +120,7 @@ function createCanopyWaveRequest(params: { }) } -// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5 -const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000 -const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000 - -function extractUsageAndCost(usage: Record | undefined | null): UsageData { +function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData { if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } const promptDetails = usage.prompt_tokens_details as Record | undefined | null const completionDetails = usage.completion_tokens_details as Record | undefined | null @@ -100,11 +130,12 @@ function extractUsageAndCost(usage: Record | undefined | null): const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 + const pricing = getCanopyWavePricing(model) const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) const cost = - nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN + - cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN + - outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN + nonCachedInputTokens * pricing.inputCostPerToken + + cacheReadInputTokens * pricing.cachedInputCostPerToken + + outputTokens * pricing.outputCostPerToken return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } } @@ -139,7 +170,7 @@ export async function handleCanopyWaveNonStream({ const data = await response.json() const content = data.choices?.[0]?.message?.content ?? '' const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? '' - const usageData = extractUsageAndCost(data.usage) + const usageData = extractUsageAndCost(data.usage, originalModel) insertMessageToBigQuery({ messageId: data.id, @@ -453,7 +484,7 @@ async function handleResponse({ return { state } } - const usageData = extractUsageAndCost(data.usage as Record) + const usageData = extractUsageAndCost(data.usage as Record, originalModel) const messageId = typeof data.id === 'string' ? data.id : 'unknown' state.billedAlready = true