From 4f907c1ffbf68970f0438e93a73160b31da63c32 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 25 Apr 2026 12:41:53 -0700 Subject: [PATCH 1/2] canopy wave kimi --- agents/__tests__/editor.test.ts | 22 +-- agents/base2/base2.ts | 2 +- agents/editor/editor-lite.ts | 2 +- agents/editor/editor.ts | 8 +- agents/reviewer/code-reviewer-lite.ts | 2 +- .../components/freebuff-model-selector.tsx | 6 +- common/src/constants/free-agents.ts | 6 +- common/src/constants/freebuff-models.ts | 15 +- web/src/app/api/v1/chat/completions/_post.ts | 15 +- .../session/__tests__/session.test.ts | 13 -- web/src/llm-api/canopywave.ts | 41 ++++-- .../free-session/__tests__/public-api.test.ts | 134 ++++++++---------- .../__tests__/session-view.test.ts | 2 +- web/src/server/free-session/config.ts | 2 +- web/src/server/free-session/public-api.ts | 4 +- 15 files changed, 132 insertions(+), 142 deletions(-) diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts index 36d6b75c5c..f9731f3a13 100644 --- a/agents/__tests__/editor.test.ts +++ b/agents/__tests__/editor.test.ts @@ -62,9 +62,9 @@ describe('editor agent', () => { expect(gpt5Editor.model).toBe('openai/gpt-5.1') }) - test('creates glm editor', () => { - const glmEditor = createCodeEditor({ model: 'glm' }) - expect(glmEditor.model).toBe('z-ai/glm-5.1') + test('creates kimi editor', () => { + const kimiEditor = createCodeEditor({ model: 'kimi' }) + expect(kimiEditor.model).toBe('moonshotai/kimi-k2.6') }) test('creates minimax editor', () => { @@ -78,10 +78,10 @@ describe('editor agent', () => { expect(gpt5Editor.instructionsPrompt).not.toContain('') }) - test('glm editor does not include think tags in instructions', () => { - const glmEditor = createCodeEditor({ model: 'glm' }) - expect(glmEditor.instructionsPrompt).not.toContain('') - expect(glmEditor.instructionsPrompt).not.toContain('') + test('kimi editor does not include think tags in instructions', () => { + const kimiEditor = createCodeEditor({ model: 'kimi' }) + expect(kimiEditor.instructionsPrompt).not.toContain('') + expect(kimiEditor.instructionsPrompt).not.toContain('') }) test('minimax editor does not include think tags in instructions', () => { @@ -99,17 +99,17 @@ describe('editor agent', () => { test('all variants have same base properties', () => { const opusEditor = createCodeEditor({ model: 'opus' }) const gpt5Editor = createCodeEditor({ model: 'gpt-5' }) - const glmEditor = createCodeEditor({ model: 'glm' }) + const kimiEditor = createCodeEditor({ model: 'kimi' }) // All should have same basic structure expect(opusEditor.displayName).toBe(gpt5Editor.displayName) - expect(gpt5Editor.displayName).toBe(glmEditor.displayName) + expect(gpt5Editor.displayName).toBe(kimiEditor.displayName) expect(opusEditor.outputMode).toBe(gpt5Editor.outputMode) - expect(gpt5Editor.outputMode).toBe(glmEditor.outputMode) + expect(gpt5Editor.outputMode).toBe(kimiEditor.outputMode) expect(opusEditor.toolNames).toEqual(gpt5Editor.toolNames) - expect(gpt5Editor.toolNames).toEqual(glmEditor.toolNames) + expect(gpt5Editor.toolNames).toEqual(kimiEditor.toolNames) }) }) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 1a81f948bf..b1e24efff6 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -25,7 +25,7 @@ export function createBase2( const isFree = mode === 'free' || mode === 'lite' const isSonnet = false - const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7' + const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7' return { publisher, diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts index 29225f0c29..6dbb4bb3c6 100644 --- a/agents/editor/editor-lite.ts +++ b/agents/editor/editor-lite.ts @@ -3,7 +3,7 @@ import { createCodeEditor } from './editor' import type { AgentDefinition } from '../types/agent-definition' const definition: AgentDefinition = { - ...createCodeEditor({ model: 'glm' }), + ...createCodeEditor({ model: 'kimi' }), id: 'editor-lite', } export default definition diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index c98544d0f2..34d3382eb4 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -4,7 +4,7 @@ import { publisher } from '../constants' import type { AgentDefinition } from '../types/agent-definition' export const createCodeEditor = (options: { - model: 'gpt-5' | 'opus' | 'glm' | 'minimax' + model: 'gpt-5' | 'opus' | 'kimi' | 'minimax' }): Omit => { const { model } = options return { @@ -14,8 +14,8 @@ export const createCodeEditor = (options: { ? 'openai/gpt-5.1' : options.model === 'minimax' ? 'minimax/minimax-m2.7' - : options.model === 'glm' - ? 'z-ai/glm-5.1' + : options.model === 'kimi' + ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7', ...(options.model === 'opus' && { providerOptions: { @@ -67,7 +67,7 @@ OR for new files or major rewrites: } -${model === 'gpt-5' || model === 'glm' || model === 'minimax' +${model === 'gpt-5' || model === 'kimi' || model === 'minimax' ? '' : `Before you start writing your implementation, you should use tags to think about the best way to implement the changes. diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts index feafb87c45..888cadf4f7 100644 --- a/agents/reviewer/code-reviewer-lite.ts +++ b/agents/reviewer/code-reviewer-lite.ts @@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer' const definition: SecretAgentDefinition = { id: 'code-reviewer-lite', publisher, - ...createReviewer('z-ai/glm-5.1'), + ...createReviewer('moonshotai/kimi-k2.6'), } export default definition diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx index a453a15389..1bcda80a76 100644 --- a/cli/src/components/freebuff-model-selector.tsx +++ b/cli/src/components/freebuff-model-selector.tsx @@ -5,7 +5,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react' import { Button } from './button' import { FALLBACK_FREEBUFF_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, + FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MODELS, getFreebuffDeploymentAvailabilityLabel, isFreebuffModelAvailable, @@ -25,8 +25,8 @@ import { import type { KeyEvent } from '@opentui/core' const FREEBUFF_MODEL_SELECTOR_MODELS = [ - ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID), - ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID), + ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_KIMI_MODEL_ID), + ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_KIMI_MODEL_ID), ] /** diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 308e12df6d..4a2a4a147e 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator 'base2-free': new Set([ 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', + 'moonshotai/kimi-k2.6', ]), // File exploration agents @@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record> = { // Editor for free mode 'editor-lite': new Set([ 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', + 'moonshotai/kimi-k2.6', ]), // Code reviewer for free mode 'code-reviewer-lite': new Set([ 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', + 'moonshotai/kimi-k2.6', ]), } diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 8b3e9d82d9..3f4c91a082 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -21,7 +21,7 @@ export interface FreebuffModelOption { * the caller's local timezone. The CLI should render * `getFreebuffDeploymentAvailabilityLabel()` instead. */ export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day' -export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1' +export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6' export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7' const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York' const FREEBUFF_PACIFIC_TIMEZONE = 'America/Los_Angeles' @@ -47,20 +47,17 @@ export const FREEBUFF_MODELS = [ availability: 'always', }, { - id: FREEBUFF_GLM_MODEL_ID, - displayName: 'GLM 5.1', + id: FREEBUFF_KIMI_MODEL_ID, + displayName: 'Kimi K2.6', tagline: 'Smartest', - availability: 'deployment_hours', + availability: 'always', }, ] as const satisfies readonly FreebuffModelOption[] export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id'] -/** What new freebuff users see selected in the picker. May not be currently - * available (GLM is closed outside deployment hours); callers that need an - * always-available id for resolution / auto-fallbacks should use - * FALLBACK_FREEBUFF_MODEL_ID instead. */ -export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID +/** What new freebuff users see selected in the picker. */ +export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_KIMI_MODEL_ID /** Always-available fallback used when the requested model can't be served * right now (unknown id, deployment hours closed, etc.). Kept distinct from diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 1f71b77922..13baada653 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -532,9 +532,10 @@ export async function postChatCompletions(params: { if (bodyStream) { // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models const useSiliconFlow = false // isSiliconFlowModel(typedBody.model) - const useCanopyWave = false // isCanopyWaveModel(typedBody.model) - const useFireworks = isFireworksModel(typedBody.model) - const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model) + const useCanopyWave = isCanopyWaveModel(typedBody.model) + const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model) + const useOpenAIDirect = + !useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model) const stream = useSiliconFlow ? await handleSiliconFlowStream({ body: typedBody, @@ -606,12 +607,12 @@ export async function postChatCompletions(params: { }) } else { // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models - // TEMPORARILY DISABLED: route through OpenRouter const model = typedBody.model const useSiliconFlow = false // isSiliconFlowModel(model) - const useCanopyWave = false // isCanopyWaveModel(model) - const useFireworks = isFireworksModel(model) - const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model) + const useCanopyWave = isCanopyWaveModel(model) + const useFireworks = !useCanopyWave && isFireworksModel(model) + const shouldUseOpenAIEndpoint = + !useCanopyWave && !useFireworks && isOpenAIDirectModel(model) const nonStreamRequest = useSiliconFlow ? handleSiliconFlowNonStream({ diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index 7ed29ec4b5..4839d5348c 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -158,19 +158,6 @@ describe('POST /api/v1/freebuff/session', () => { expect(body.status).toBe('queued') }) - test('returns model_unavailable for GLM outside deployment hours', async () => { - const sessionDeps = makeSessionDeps() - const resp = await postFreebuffSession( - makeReq('ok', { model: 'z-ai/glm-5.1' }), - makeDeps(sessionDeps, 'u1'), - ) - expect(resp.status).toBe(409) - const body = await resp.json() - expect(body.status).toBe('model_unavailable') - expect(body.availableHours).toBe('9am ET-5pm PT every day') - expect(sessionDeps.rows.size).toBe(0) - }) - // Banned bots with valid API keys were POSTing every few seconds and // inflating queueDepth between the 15s admission-tick sweeps. Rejecting at // the HTTP layer with 403 (terminal, like country_blocked) keeps them out diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 0db3e0f9cb..7854953d29 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -29,6 +29,7 @@ const canopywaveAgent = new Agent({ /** Map from OpenRouter model IDs to CanopyWave model IDs */ const CANOPYWAVE_MODEL_MAP: Record = { 'minimax/minimax-m2.5': 'minimax/minimax-m2.5', + 'moonshotai/kimi-k2.6': 'moonshotai/kimi-k2.6', } export function isCanopyWaveModel(model: string): boolean { @@ -85,12 +86,31 @@ function createCanopyWaveRequest(params: { }) } -// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5 -const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000 -const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000 +// CanopyWave per-token pricing (dollars per token), keyed by OpenRouter model ID +interface CanopyWavePricing { + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +const CANOPYWAVE_PRICING_MAP: Record = { + 'minimax/minimax-m2.5': { + inputCostPerToken: 0.27 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.08 / 1_000_000, + }, + 'moonshotai/kimi-k2.6': { + inputCostPerToken: 0.60 / 1_000_000, + cachedInputCostPerToken: 0.15 / 1_000_000, + outputCostPerToken: 2.50 / 1_000_000, + }, +} + +function getCanopyWavePricing(model: string): CanopyWavePricing { + return CANOPYWAVE_PRICING_MAP[model] ?? CANOPYWAVE_PRICING_MAP['moonshotai/kimi-k2.6'] +} -function extractUsageAndCost(usage: Record | undefined | null): UsageData { +function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData { if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } const promptDetails = usage.prompt_tokens_details as Record | undefined | null const completionDetails = usage.completion_tokens_details as Record | undefined | null @@ -100,11 +120,12 @@ function extractUsageAndCost(usage: Record | undefined | null): const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 + const pricing = getCanopyWavePricing(model) const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) const cost = - nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN + - cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN + - outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN + nonCachedInputTokens * pricing.inputCostPerToken + + cacheReadInputTokens * pricing.cachedInputCostPerToken + + outputTokens * pricing.outputCostPerToken return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } } @@ -139,7 +160,7 @@ export async function handleCanopyWaveNonStream({ const data = await response.json() const content = data.choices?.[0]?.message?.content ?? '' const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? '' - const usageData = extractUsageAndCost(data.usage) + const usageData = extractUsageAndCost(data.usage, originalModel) insertMessageToBigQuery({ messageId: data.id, @@ -453,7 +474,7 @@ async function handleResponse({ return { state } } - const usageData = extractUsageAndCost(data.usage as Record) + const usageData = extractUsageAndCost(data.usage as Record, originalModel) const messageId = typeof data.id === 'string' ? data.id : 'unknown' state.billedAlready = true diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index 44d516c123..3a4bc36773 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -200,34 +200,20 @@ describe('requestSession', () => { expect(state.instanceId).toBe('inst-1') }) - test('deployment-hours-only model is unavailable outside deployment hours', async () => { - const state = await requestSession({ - userId: 'u1', - model: 'z-ai/glm-5.1', - deps, - }) - expect(state).toEqual({ - status: 'model_unavailable', - requestedModel: 'z-ai/glm-5.1', - availableHours: '9am ET-5pm PT every day', - }) - expect(deps.rows.size).toBe(0) - }) - test('queued response includes a per-model depth snapshot for the selector', async () => { deps._tick(new Date('2026-04-17T16:00:00Z')) - // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both. + // Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both. await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) deps._tick(new Date(deps._now().getTime() + 1000)) await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps }) deps._tick(new Date(deps._now().getTime() + 1000)) - await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps }) + await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.6', deps }) const state = await getSessionState({ userId: 'u1', deps }) if (state.status !== 'queued') throw new Error('unreachable') expect(state.queueDepthByModel).toEqual({ [DEFAULT_MODEL]: 2, - 'z-ai/glm-5.1': 1, + 'moonshotai/kimi-k2.6': 1, }) }) @@ -302,7 +288,7 @@ describe('requestSession', () => { }) test('instant-admit: per-model capacities are independent', async () => { - // MiniMax saturated at 1 active, GLM still has room. + // MiniMax saturated at 1 active, Kimi still has room. const admitDeps = makeDeps({ getInstantAdmitCapacity: (model) => model === DEFAULT_MODEL ? 1 : 10, @@ -316,25 +302,23 @@ describe('requestSession', () => { }) const s3 = await requestSession({ userId: 'u3', - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', deps: admitDeps, }) expect(s2.status).toBe('queued') expect(s3.status).toBe('active') }) - // Per-user rate limit (5 GLM admissions per 20h) — the wire limit is + // Per-user rate limit (5 Kimi admissions per 20h) — the wire limit is // hard-coded in public-api.ts, so tests seed the fake admit log directly - // rather than configuring it. GLM also has deployment-hours gating, so - // these tests bump `now` into the open window (12pm ET on a weekday) - // before issuing the request. - const GLM_MODEL = 'z-ai/glm-5.1' - const GLM_LIMIT = 5 - const GLM_WINDOW_HOURS = 20 - const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z') - - test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => { - deps._tick(GLM_OPEN_TIME) + // rather than configuring it. + const KIMI_MODEL = 'moonshotai/kimi-k2.6' + const KIMI_LIMIT = 5 + const KIMI_WINDOW_HOURS = 20 + const KIMI_OPEN_TIME = new Date('2026-04-17T16:00:00Z') + + test('rate_limited: 5th Kimi admit in window blocks the 6th attempt', async () => { + deps._tick(KIMI_OPEN_TIME) // Seed 5 admits inside the 20h window, spaced so we can verify retryAfter // points at the oldest one sliding off. const now = deps._now() @@ -343,22 +327,22 @@ describe('requestSession', () => { for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), }) } const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('rate_limited') if (state.status !== 'rate_limited') throw new Error('unreachable') - expect(state.model).toBe(GLM_MODEL) - expect(state.limit).toBe(GLM_LIMIT) - expect(state.windowHours).toBe(GLM_WINDOW_HOURS) - expect(state.recentCount).toBe(GLM_LIMIT) + expect(state.model).toBe(KIMI_MODEL) + expect(state.limit).toBe(KIMI_LIMIT) + expect(state.windowHours).toBe(KIMI_WINDOW_HOURS) + expect(state.recentCount).toBe(KIMI_LIMIT) // Oldest admit is 19h ago; slot opens when it hits 20h, i.e. in 1h. expect(state.retryAfterMs).toBe(60 * 60 * 1000) // Blocked before any row is written — the user doesn't take a queue slot. @@ -366,21 +350,21 @@ describe('requestSession', () => { }) test('rate_limited: admits outside the 20h window do not count', async () => { - deps._tick(GLM_OPEN_TIME) + deps._tick(KIMI_OPEN_TIME) // 5 admits, each just over 20h old → all fall off the window. const now = deps._now() for (let i = 0; i < 5; i++) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date( - now.getTime() - (GLM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i), + now.getTime() - (KIMI_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i), ), }) } const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('queued') @@ -408,41 +392,41 @@ describe('requestSession', () => { expect(state.rateLimit).toBeUndefined() }) - test('queued GLM response carries the current admit count', async () => { - deps._tick(GLM_OPEN_TIME) + test('queued Kimi response carries the current admit count', async () => { + deps._tick(KIMI_OPEN_TIME) const now = deps._now() // 2 admits in the window — under the limit so the user still queues. deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - 60 * 60 * 1000), }) deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - 30 * 60 * 1000), }) const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) if (state.status !== 'queued') throw new Error('unreachable') expect(state.rateLimit).toEqual({ - model: GLM_MODEL, - limit: GLM_LIMIT, - windowHours: GLM_WINDOW_HOURS, + model: KIMI_MODEL, + limit: KIMI_LIMIT, + windowHours: KIMI_WINDOW_HOURS, recentCount: 2, }) }) - test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => { - // Reclaim path: user has an active+unexpired GLM session and restarts + test('rate_limited: takeover of an active Kimi row is allowed even when at cap', async () => { + // Reclaim path: user has an active+unexpired Kimi session and restarts // the CLI. POST must rotate their instance id (takeover) and NOT reject // with rate_limited — otherwise they'd be stranded with a live session // they can't reconnect to. The 5th admission is already in the log, so // this also exercises "at the cap" rather than "over the cap". - deps._tick(GLM_OPEN_TIME) + deps._tick(KIMI_OPEN_TIME) const now = deps._now() // Seed 5 prior admits (the cap), with the latest one matching the // active row we're about to install. @@ -450,7 +434,7 @@ describe('requestSession', () => { for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), }) } @@ -461,7 +445,7 @@ describe('requestSession', () => { user_id: 'u1', status: 'active', active_instance_id: 'inst-pre', - model: GLM_MODEL, + model: KIMI_MODEL, queued_at: admittedAt, admitted_at: admittedAt, expires_at: new Date(admittedAt.getTime() + SESSION_LEN), @@ -471,27 +455,27 @@ describe('requestSession', () => { const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('active') if (state.status !== 'active') throw new Error('unreachable') // Instance id rotated; quota snapshot still reflects the full window. expect(state.instanceId).not.toBe('inst-pre') - expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT) + expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT) }) - test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => { + test('rate_limited: reclaim of a queued Kimi row is allowed even when at cap', async () => { // Same reclaim exception for queued rows: if a user has already queued // (say they slipped in just before their 5th admit landed), a subsequent // POST from the same CLI must preserve their queue position instead of // flipping to rate_limited. - deps._tick(GLM_OPEN_TIME) + deps._tick(KIMI_OPEN_TIME) const now = deps._now() - for (let i = 0; i < GLM_LIMIT; i++) { + for (let i = 0; i < KIMI_LIMIT; i++) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000), }) } @@ -500,7 +484,7 @@ describe('requestSession', () => { user_id: 'u1', status: 'queued', active_instance_id: 'inst-pre', - model: GLM_MODEL, + model: KIMI_MODEL, queued_at: queuedAt, admitted_at: null, expires_at: null, @@ -510,7 +494,7 @@ describe('requestSession', () => { const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('queued') @@ -518,20 +502,20 @@ describe('requestSession', () => { // Same position (1) since we preserved queued_at and nobody else is // ahead; the instance id rotated so any prior CLI is superseded. expect(state.instanceId).not.toBe('inst-pre') - expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT) + expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT) }) - test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => { + test('rate_limited: expired Kimi row is not a reclaim — quota still applies', async () => { // The stored row's expires_at is in the past, so it doesn't represent // an in-flight session. This POST is effectively a fresh request and // must be blocked by the quota. - deps._tick(GLM_OPEN_TIME) + deps._tick(KIMI_OPEN_TIME) const now = deps._now() const ages = [19, 4, 3, 2, 1] for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), }) } @@ -540,7 +524,7 @@ describe('requestSession', () => { user_id: 'u1', status: 'active', active_instance_id: 'inst-pre', - model: GLM_MODEL, + model: KIMI_MODEL, queued_at: admittedAt, admitted_at: admittedAt, expires_at: new Date(admittedAt.getTime() + SESSION_LEN), @@ -549,7 +533,7 @@ describe('requestSession', () => { }) const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('rate_limited') @@ -557,18 +541,18 @@ describe('requestSession', () => { test('instant-admit bumps the quota count for the freshly-written admit row', async () => { const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 }) - admitDeps._tick(GLM_OPEN_TIME) + admitDeps._tick(KIMI_OPEN_TIME) // 1 existing admit in the window; this new call should instant-admit and // write a second row, so the response's recentCount reflects 2. const now = admitDeps._now() admitDeps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - 30 * 60 * 1000), }) const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps: admitDeps, }) if (state.status !== 'active') throw new Error('unreachable') @@ -636,16 +620,16 @@ describe('getSessionState', () => { // Regression: the POST response attached rateLimit, but GET polls did // not — so the "Sessions N/M used" line flashed once then disappeared on // the next 5s poll. GET must attach the same quota snapshot. Rate - // limits only apply to GLM, so this test uses GLM explicitly (inside - // deployment hours) rather than the Minimax DEFAULT_MODEL. + // limits only apply to Kimi, so this test uses Kimi explicitly rather + // than the Minimax DEFAULT_MODEL. deps._tick(new Date('2026-04-17T16:00:00Z')) const now = deps._now() deps.admits.push({ user_id: 'u1', - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', admitted_at: new Date(now.getTime() - 60 * 60 * 1000), }) - await requestSession({ userId: 'u1', model: 'z-ai/glm-5.1', deps }) + await requestSession({ userId: 'u1', model: 'moonshotai/kimi-k2.6', deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = now @@ -658,7 +642,7 @@ describe('getSessionState', () => { }) if (state.status !== 'active') throw new Error('unreachable') expect(state.rateLimit).toEqual({ - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', limit: 5, windowHours: 20, recentCount: 1, diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts index 52dc82c12b..215059b841 100644 --- a/web/src/server/free-session/__tests__/session-view.test.ts +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -7,7 +7,7 @@ import type { InternalSessionRow } from '../types' const WAIT_PER_SPOT_MS = 24_000 const GRACE_MS = 30 * 60_000 -const TEST_MODEL = 'z-ai/glm-5.1' +const TEST_MODEL = 'moonshotai/kimi-k2.6' function row(overrides: Partial = {}): InternalSessionRow { const now = new Date('2026-04-17T12:00:00Z') diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index 10071b35fc..6d162c4617 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -48,7 +48,7 @@ export function getSessionGraceMs(): number { * queue). */ const INSTANT_ADMIT_CAPACITY: Record = { - 'z-ai/glm-5.1': 50, + 'moonshotai/kimi-k2.6': 50, 'minimax/minimax-m2.7': 1000, } diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 02c5c05c9f..7c64830777 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -33,7 +33,7 @@ import type { InternalSessionRow, SessionStateResponse } from './types' /** * Per-model admission rate limits. Keyed by freebuff model id; a model not - * in the map has no rate limit applied. Today only GLM 5.1 is limited + * in the map has no rate limit applied. Today only Kimi K2.6 is limited * (Minimax is cheap enough to leave unlimited). * * Hard-coded rather than env-driven: the values need to be observable in the @@ -41,7 +41,7 @@ import type { InternalSessionRow, SessionStateResponse } from './types' * queued/active responses — changing them is a deliberate, typed edit. */ const RATE_LIMITS: Record = { - 'z-ai/glm-5.1': { limit: 5, windowHours: 20 }, + 'moonshotai/kimi-k2.6': { limit: 5, windowHours: 20 }, } /** Fetch the caller's current quota snapshot for `model`, or undefined if the From c1179757d34380b8f89d21b3e304343481722eff Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 25 Apr 2026 14:03:15 -0700 Subject: [PATCH 2/2] Wire Kimi K2.6 via CanopyWave through to base2-free Co-Authored-By: Claude Opus 4.7 (1M context) --- .../freebuff-model-navigation.test.ts | 22 +++++++------- common/src/constants/freebuff-models.ts | 8 +++-- .../completions/__tests__/completions.test.ts | 29 +++++++------------ 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/cli/src/utils/__tests__/freebuff-model-navigation.test.ts b/cli/src/utils/__tests__/freebuff-model-navigation.test.ts index 4723245bad..16efef166d 100644 --- a/cli/src/utils/__tests__/freebuff-model-navigation.test.ts +++ b/cli/src/utils/__tests__/freebuff-model-navigation.test.ts @@ -7,40 +7,40 @@ import { describe('nextSelectableFreebuffModelId', () => { test('skips unavailable models when moving forward', () => { - const modelIds = ['glm', 'minimax'] + const modelIds = ['kimi', 'minimax'] expect( nextSelectableFreebuffModelId({ modelIds, focusedId: 'minimax', direction: 'forward', - isSelectable: (id) => id !== 'glm', + isSelectable: (id) => id !== 'kimi', }), ).toBe('minimax') }) test('skips unavailable models when moving backward', () => { - const modelIds = ['glm', 'minimax'] + const modelIds = ['kimi', 'minimax'] expect( nextSelectableFreebuffModelId({ modelIds, focusedId: 'minimax', direction: 'backward', - isSelectable: (id) => id !== 'glm', + isSelectable: (id) => id !== 'kimi', }), ).toBe('minimax') }) test('moves to the next available model when more than one is selectable', () => { - const modelIds = ['glm', 'minimax', 'other'] + const modelIds = ['kimi', 'minimax', 'other'] expect( nextSelectableFreebuffModelId({ modelIds, focusedId: 'minimax', direction: 'forward', - isSelectable: (id) => id !== 'glm', + isSelectable: (id) => id !== 'kimi', }), ).toBe('other') }) @@ -48,8 +48,8 @@ describe('nextSelectableFreebuffModelId', () => { test('returns null when no selectable model exists', () => { expect( nextSelectableFreebuffModelId({ - modelIds: ['glm'], - focusedId: 'glm', + modelIds: ['kimi'], + focusedId: 'kimi', direction: 'forward', isSelectable: () => false, }), @@ -61,10 +61,10 @@ describe('resolveFreebuffModelCommitTarget', () => { test('falls back to the selected model when focus is on a closed model', () => { expect( resolveFreebuffModelCommitTarget({ - focusedId: 'glm', + focusedId: 'kimi', selectedId: 'minimax', committedId: null, - isSelectable: (id) => id !== 'glm', + isSelectable: (id) => id !== 'kimi', }), ).toBe('minimax') }) @@ -73,7 +73,7 @@ describe('resolveFreebuffModelCommitTarget', () => { expect( resolveFreebuffModelCommitTarget({ focusedId: 'minimax', - selectedId: 'glm', + selectedId: 'kimi', committedId: null, isSelectable: (id) => id === 'minimax', }), diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 3f4c91a082..7c29e497cb 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -39,7 +39,7 @@ interface LocalTimeFormatOptions { timeZone?: string } -export const FREEBUFF_MODELS = [ +export const FREEBUFF_MODELS: readonly FreebuffModelOption[] = [ { id: FREEBUFF_MINIMAX_MODEL_ID, displayName: 'MiniMax M2.7', @@ -52,9 +52,11 @@ export const FREEBUFF_MODELS = [ tagline: 'Smartest', availability: 'always', }, -] as const satisfies readonly FreebuffModelOption[] +] -export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id'] +export type FreebuffModelId = + | typeof FREEBUFF_MINIMAX_MODEL_ID + | typeof FREEBUFF_KIMI_MODEL_ID /** What new freebuff users see selected in the picker. */ export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_KIMI_MODEL_ID diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 1aac8800cd..deb324ea6a 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -1,7 +1,6 @@ import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test' import { NextRequest } from 'next/server' -import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models' import { formatQuotaResetCountdown, postChatCompletions } from '../_post' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' @@ -556,15 +555,15 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(response.status).toBe(200) }) - it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => { + it('lets freebuff use Kimi K2.6 through CanopyWave', async () => { const fetchedBodies: Record[] = [] - const fetchViaFireworks = mock( + const fetchViaCanopyWave = mock( async (_url: string | URL | Request, init?: RequestInit) => { fetchedBodies.push(JSON.parse(init?.body as string)) return new Response( JSON.stringify({ id: 'test-id', - model: 'accounts/james-65d217/deployments/mjb4i7ea', + model: 'moonshotai/kimi-k2.6', choices: [{ message: { content: 'test response' } }], usage: { prompt_tokens: 10, @@ -586,7 +585,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: { Authorization: 'Bearer test-api-key-new-free' }, body: JSON.stringify({ - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', stream: false, codebuff_metadata: { run_id: 'run-free', @@ -604,26 +603,18 @@ describe('/api/v1/chat/completions POST endpoint', () => { trackEvent: mockTrackEvent, getUserUsageData: mockGetUserUsageData, getAgentRunFromId: mockGetAgentRunFromId, - fetch: fetchViaFireworks, + fetch: fetchViaCanopyWave, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) const body = await response.json() - if (isFreebuffDeploymentHours()) { - expect(response.status).toBe(200) - expect(fetchedBodies).toHaveLength(1) - expect(fetchedBodies[0].model).toBe( - 'accounts/james-65d217/deployments/mjb4i7ea', - ) - expect(body.model).toBe('z-ai/glm-5.1') - expect(body.provider).toBe('Fireworks') - } else { - expect(response.status).toBe(503) - expect(fetchedBodies).toHaveLength(0) - expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS') - } + expect(response.status).toBe(200) + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].model).toBe('moonshotai/kimi-k2.6') + expect(body.model).toBe('moonshotai/kimi-k2.6') + expect(body.provider).toBe('CanopyWave') }) it('skips credit check when in FREE mode even with 0 credits', async () => {