Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cli/src/components/waiting-room-screen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
forceStart: true,
provider: 'gravity',
fallbackProvider: 'carbon',
surface: 'waiting_room',
})

useFreebuffCtrlCExit()
Expand Down
7 changes: 6 additions & 1 deletion cli/src/hooks/use-gravity-ad.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export type AdVariant = 'banner' | 'choice'
* same normalized response shape, so the rest of the hook is provider-agnostic.
*/
export type AdProvider = 'gravity' | 'carbon'
export type AdSurface = 'waiting_room'

export type AdData =
| { variant: 'banner'; ad: AdResponse }
Expand Down Expand Up @@ -112,11 +113,14 @@ export const useGravityAd = (options?: {
provider?: AdProvider
/** Backup ad network to try when the primary returns no fill or errors. */
fallbackProvider?: AdProvider
/** Product surface requesting the ad. The server maps this to placements. */
surface?: AdSurface
}): GravityAdState => {
const enabled = options?.enabled ?? true
const forceStart = options?.forceStart ?? false
const provider: AdProvider = options?.provider ?? 'gravity'
const fallbackProvider = options?.fallbackProvider
const surface = options?.surface
const [ad, setAd] = useState<AdResponse | null>(null)
const [adData, setAdData] = useState<AdData | null>(null)
const [isLoading, setIsLoading] = useState(false)
Expand Down Expand Up @@ -299,6 +303,7 @@ export const useGravityAd = (options?: {
messages: adMessages,
sessionId: useChatStore.getState().chatSessionId,
device: getDeviceInfo(),
...(surface ? { surface } : {}),
// Carbon requires a real browser-ish useragent for targeting/fraud
// detection. Gravity ignores it. We source one centrally so every
// provider that needs it sees the same value.
Expand Down Expand Up @@ -430,7 +435,7 @@ export const useGravityAd = (options?: {
clearInterval(id)
ctrlRef.current.intervalId = null
}
}, [shouldStart, shouldHideAds, provider, fallbackProvider])
}, [shouldStart, shouldHideAds, provider, fallbackProvider, surface])

// Don't return ad when ads should be hidden
const visible = shouldStart && !shouldHideAds
Expand Down
7 changes: 3 additions & 4 deletions scripts/test-canopywave-long.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,10 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
outputCostPerToken: 1.20 / 1_000_000,
},
kimi: {
// Pricing is approximate — based on public Moonshot k2 rates; CanopyWave may differ.
id: 'moonshotai/kimi-k2.6',
inputCostPerToken: 0.60 / 1_000_000,
cachedInputCostPerToken: 0.15 / 1_000_000,
outputCostPerToken: 2.50 / 1_000_000,
inputCostPerToken: 0.95 / 1_000_000,
cachedInputCostPerToken: 0.16 / 1_000_000,
outputCostPerToken: 4.00 / 1_000_000,
},
}

Expand Down
3 changes: 3 additions & 0 deletions web/src/app/api/v1/ads/_post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,14 @@ const deviceSchema = z.object({
})

const providerSchema = z.enum(['gravity', 'carbon']).default('gravity')
const surfaceSchema = z.enum(['waiting_room'])

const bodySchema = z.object({
provider: providerSchema.optional(),
messages: z.array(messageSchema).optional().default([]),
sessionId: z.string().optional(),
device: deviceSchema.optional(),
surface: surfaceSchema.optional(),
/** Browser/CLI useragent passed through to providers that require it. */
userAgent: z.string().optional(),
})
Expand Down Expand Up @@ -136,6 +138,7 @@ export async function postAds(params: {
clientIp,
userAgent,
device: parsedBody.device,
surface: parsedBody.surface,
messages: parsedBody.messages,
testMode: serverEnv.CB_ENVIRONMENT !== 'prod',
logger,
Expand Down
15 changes: 8 additions & 7 deletions web/src/app/api/v1/chat/completions/_post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -532,9 +532,10 @@ export async function postChatCompletions(params: {
if (bodyStream) {
// Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
const useSiliconFlow = false // isSiliconFlowModel(typedBody.model)
const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
const useFireworks = isFireworksModel(typedBody.model)
const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model)
const useCanopyWave = isCanopyWaveModel(typedBody.model)
const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model)
const useOpenAIDirect =
!useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model)
const stream = useSiliconFlow
? await handleSiliconFlowStream({
body: typedBody,
Expand Down Expand Up @@ -606,12 +607,12 @@ export async function postChatCompletions(params: {
})
} else {
// Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
// TEMPORARILY DISABLED: route through OpenRouter
const model = typedBody.model
const useSiliconFlow = false // isSiliconFlowModel(model)
const useCanopyWave = false // isCanopyWaveModel(model)
const useFireworks = isFireworksModel(model)
const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model)
const useCanopyWave = isCanopyWaveModel(model)
const useFireworks = !useCanopyWave && isFireworksModel(model)
const shouldUseOpenAIEndpoint =
!useCanopyWave && !useFireworks && isOpenAIDirectModel(model)

const nonStreamRequest = useSiliconFlow
? handleSiliconFlowNonStream({
Expand Down
27 changes: 19 additions & 8 deletions web/src/lib/ad-providers/gravity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ const CHOICE_PLACEMENT_IDS = [
'choice-ad-3',
'choice-ad-4',
]
const WAITING_ROOM_PLACEMENT_IDS = [
'waiting-room-1',
'waiting-room-2',
'waiting-room-3',
'waiting-room-4',
]

type GravityRawAd = {
adText: string
Expand Down Expand Up @@ -105,16 +111,21 @@ export function createGravityProvider(config: { apiKey: string }): AdProvider {
fetch,
} = input

const variant = getGravityVariant(userId)
const variant =
input.surface === 'waiting_room' ? 'choice' : getGravityVariant(userId)
const filteredMessages = prepareGravityMessages(messages)

const placements =
variant === 'choice'
? CHOICE_PLACEMENT_IDS.map((id) => ({
placement: 'below_response',
placement_id: id,
}))
: [{ placement: 'below_response', placement_id: BANNER_PLACEMENT_ID }]
const placementIds =
input.surface === 'waiting_room'
? WAITING_ROOM_PLACEMENT_IDS
: variant === 'choice'
? CHOICE_PLACEMENT_IDS
: [BANNER_PLACEMENT_ID]

const placements = placementIds.map((id) => ({
placement: 'below_response',
placement_id: id,
}))

const deviceBody = clientIp
? {
Expand Down
4 changes: 4 additions & 0 deletions web/src/lib/ad-providers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ export type AdDeviceInfo = {
locale?: string
}

export type AdSurface = 'waiting_room'

export type FetchAdInput = {
userId: string
userEmail: string | null
Expand All @@ -50,6 +52,8 @@ export type FetchAdInput = {
/** Browser/CLI useragent string, passed through to upstream. */
userAgent?: string
device?: AdDeviceInfo
/** Product surface requesting the ad. Providers may map this to placements. */
surface?: AdSurface
/** Last user + last preceding assistant message, if any. Used by Gravity. */
messages?: AdMessage[]
/** Set in non-prod so providers can request test ads. */
Expand Down
63 changes: 47 additions & 16 deletions web/src/llm-api/canopywave.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,52 @@ const canopywaveAgent = new Agent({
bodyTimeout: 0,
})

/** Map from OpenRouter model IDs to CanopyWave model IDs */
const CANOPYWAVE_MODEL_MAP: Record<string, string> = {
'minimax/minimax-m2.5': 'minimax/minimax-m2.5',
// CanopyWave per-token pricing (dollars per token)
interface CanopyWavePricing {
inputCostPerToken: number
cachedInputCostPerToken: number
outputCostPerToken: number
}

/** Single source of truth: which OpenRouter model IDs we route through
* CanopyWave, the corresponding CanopyWave model ID, and per-model pricing.
* Kept as one map so adding a model can't drift between routing and billing. */
const CANOPYWAVE_MODELS: Record<
string,
{ canopywaveId: string; pricing: CanopyWavePricing }
> = {
'minimax/minimax-m2.5': {
canopywaveId: 'minimax/minimax-m2.5',
pricing: {
inputCostPerToken: 0.27 / 1_000_000,
cachedInputCostPerToken: 0.03 / 1_000_000,
outputCostPerToken: 1.08 / 1_000_000,
},
},
'moonshotai/kimi-k2.6': {
canopywaveId: 'moonshotai/kimi-k2.6',
pricing: {
inputCostPerToken: 0.95 / 1_000_000,
cachedInputCostPerToken: 0.16 / 1_000_000,
outputCostPerToken: 4.00 / 1_000_000,
},
},
}

export function isCanopyWaveModel(model: string): boolean {
return model in CANOPYWAVE_MODEL_MAP
return model in CANOPYWAVE_MODELS
}

function getCanopyWaveModelId(openrouterModel: string): string {
return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel
return CANOPYWAVE_MODELS[openrouterModel]?.canopywaveId ?? openrouterModel
}

function getCanopyWavePricing(model: string): CanopyWavePricing {
const entry = CANOPYWAVE_MODELS[model]
if (!entry) {
throw new Error(`No CanopyWave pricing found for model: ${model}`)
}
return entry.pricing
}

type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
Expand Down Expand Up @@ -85,12 +120,7 @@ function createCanopyWaveRequest(params: {
})
}

// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5
const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000
const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000

function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
Expand All @@ -100,11 +130,12 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0

const pricing = getCanopyWavePricing(model)
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
const cost =
nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN +
cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN +
outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN
nonCachedInputTokens * pricing.inputCostPerToken +
cacheReadInputTokens * pricing.cachedInputCostPerToken +
outputTokens * pricing.outputCostPerToken

return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
}
Expand Down Expand Up @@ -139,7 +170,7 @@ export async function handleCanopyWaveNonStream({
const data = await response.json()
const content = data.choices?.[0]?.message?.content ?? ''
const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
const usageData = extractUsageAndCost(data.usage)
const usageData = extractUsageAndCost(data.usage, originalModel)

insertMessageToBigQuery({
messageId: data.id,
Expand Down Expand Up @@ -453,7 +484,7 @@ async function handleResponse({
return { state }
}

const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
const messageId = typeof data.id === 'string' ? data.id : 'unknown'

state.billedAlready = true
Expand Down
Loading