From 4f907c1ffbf68970f0438e93a73160b31da63c32 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sat, 25 Apr 2026 12:41:53 -0700
Subject: [PATCH 1/2] canopy wave kimi

---
 agents/__tests__/editor.test.ts               |  22 +--
 agents/base2/base2.ts                         |   2 +-
 agents/editor/editor-lite.ts                  |   2 +-
 agents/editor/editor.ts                       |   8 +-
 agents/reviewer/code-reviewer-lite.ts         |   2 +-
 .../components/freebuff-model-selector.tsx    |   6 +-
 common/src/constants/free-agents.ts           |   6 +-
 common/src/constants/freebuff-models.ts       |  15 +-
 web/src/app/api/v1/chat/completions/_post.ts  |  15 +-
 .../session/__tests__/session.test.ts         |  13 --
 web/src/llm-api/canopywave.ts                 |  41 ++++--
 .../free-session/__tests__/public-api.test.ts | 134 ++++++++----------
 .../__tests__/session-view.test.ts            |   2 +-
 web/src/server/free-session/config.ts         |   2 +-
 web/src/server/free-session/public-api.ts     |   4 +-
 15 files changed, 132 insertions(+), 142 deletions(-)
diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts
index 36d6b75c5c..f9731f3a13 100644
--- a/agents/__tests__/editor.test.ts
+++ b/agents/__tests__/editor.test.ts
@@ -62,9 +62,9 @@ describe('editor agent', () => {
       expect(gpt5Editor.model).toBe('openai/gpt-5.1')
     })
 
-    test('creates glm editor', () => {
-      const glmEditor = createCodeEditor({ model: 'glm' })
-      expect(glmEditor.model).toBe('z-ai/glm-5.1')
+    test('creates kimi editor', () => {
+      const kimiEditor = createCodeEditor({ model: 'kimi' })
+      expect(kimiEditor.model).toBe('moonshotai/kimi-k2.6')
     })
 
     test('creates minimax editor', () => {
@@ -78,10 +78,10 @@ describe('editor agent', () => {
       expect(gpt5Editor.instructionsPrompt).not.toContain('</think>')
     })
 
-    test('glm editor does not include think tags in instructions', () => {
-      const glmEditor = createCodeEditor({ model: 'glm' })
-      expect(glmEditor.instructionsPrompt).not.toContain('<think>')
-      expect(glmEditor.instructionsPrompt).not.toContain('</think>')
+    test('kimi editor does not include think tags in instructions', () => {
+      const kimiEditor = createCodeEditor({ model: 'kimi' })
+      expect(kimiEditor.instructionsPrompt).not.toContain('<think>')
+      expect(kimiEditor.instructionsPrompt).not.toContain('</think>')
     })
 
     test('minimax editor does not include think tags in instructions', () => {
@@ -99,17 +99,17 @@ describe('editor agent', () => {
     test('all variants have same base properties', () => {
       const opusEditor = createCodeEditor({ model: 'opus' })
       const gpt5Editor = createCodeEditor({ model: 'gpt-5' })
-      const glmEditor = createCodeEditor({ model: 'glm' })
+      const kimiEditor = createCodeEditor({ model: 'kimi' })
 
       // All should have same basic structure
       expect(opusEditor.displayName).toBe(gpt5Editor.displayName)
-      expect(gpt5Editor.displayName).toBe(glmEditor.displayName)
+      expect(gpt5Editor.displayName).toBe(kimiEditor.displayName)
 
       expect(opusEditor.outputMode).toBe(gpt5Editor.outputMode)
-      expect(gpt5Editor.outputMode).toBe(glmEditor.outputMode)
+      expect(gpt5Editor.outputMode).toBe(kimiEditor.outputMode)
 
       expect(opusEditor.toolNames).toEqual(gpt5Editor.toolNames)
-      expect(gpt5Editor.toolNames).toEqual(glmEditor.toolNames)
+      expect(gpt5Editor.toolNames).toEqual(kimiEditor.toolNames)
     })
   })
 
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 1a81f948bf..b1e24efff6 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -25,7 +25,7 @@ export function createBase2(
   const isFree = mode === 'free' || mode === 'lite'
 
   const isSonnet = false
-  const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7'
+  const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7'
 
   return {
     publisher,
diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts
index 29225f0c29..6dbb4bb3c6 100644
--- a/agents/editor/editor-lite.ts
+++ b/agents/editor/editor-lite.ts
@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
 import type { AgentDefinition } from '../types/agent-definition'
 
 const definition: AgentDefinition = {
-  ...createCodeEditor({ model: 'glm' }),
+  ...createCodeEditor({ model: 'kimi' }),
   id: 'editor-lite',
 }
 export default definition
diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts
index c98544d0f2..34d3382eb4 100644
--- a/agents/editor/editor.ts
+++ b/agents/editor/editor.ts
@@ -4,7 +4,7 @@ import { publisher } from '../constants'
 import type { AgentDefinition } from '../types/agent-definition'
 
 export const createCodeEditor = (options: {
-  model: 'gpt-5' | 'opus' | 'glm' | 'minimax'
+  model: 'gpt-5' | 'opus' | 'kimi' | 'minimax'
 }): Omit<AgentDefinition, 'id'> => {
   const { model } = options
   return {
@@ -14,8 +14,8 @@ export const createCodeEditor = (options: {
         ? 'openai/gpt-5.1'
         : options.model === 'minimax'
           ? 'minimax/minimax-m2.7'
-        : options.model === 'glm'
-          ? 'z-ai/glm-5.1'
+        : options.model === 'kimi'
+          ? 'moonshotai/kimi-k2.6'
           : 'anthropic/claude-opus-4.7',
     ...(options.model === 'opus' && {
       providerOptions: {
@@ -67,7 +67,7 @@ OR for new files or major rewrites:
 }
 </codebuff_tool_call>
 
-${model === 'gpt-5' || model === 'glm' || model === 'minimax'
+${model === 'gpt-5' || model === 'kimi' || model === 'minimax'
         ? ''
         : `Before you start writing your implementation, you should use <think> tags to think about the best way to implement the changes.
 
diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts
index feafb87c45..888cadf4f7 100644
--- a/agents/reviewer/code-reviewer-lite.ts
+++ b/agents/reviewer/code-reviewer-lite.ts
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
 const definition: SecretAgentDefinition = {
   id: 'code-reviewer-lite',
   publisher,
-  ...createReviewer('z-ai/glm-5.1'),
+  ...createReviewer('moonshotai/kimi-k2.6'),
 }
 
 export default definition
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index a453a15389..1bcda80a76 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -5,7 +5,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'
 import { Button } from './button'
 import {
   FALLBACK_FREEBUFF_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
+  FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MODELS,
   getFreebuffDeploymentAvailabilityLabel,
   isFreebuffModelAvailable,
@@ -25,8 +25,8 @@ import {
 import type { KeyEvent } from '@opentui/core'
 
 const FREEBUFF_MODEL_SELECTOR_MODELS = [
-  ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
-  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
+  ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_KIMI_MODEL_ID),
+  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_KIMI_MODEL_ID),
 ]
 
 /**
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index 308e12df6d..4a2a4a147e 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
   'base2-free': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // File exploration agents
@@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Editor for free mode
   'editor-lite': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // Code reviewer for free mode
   'code-reviewer-lite': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 }
 
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index 8b3e9d82d9..3f4c91a082 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -21,7 +21,7 @@ export interface FreebuffModelOption {
  *  the caller's local timezone. The CLI should render
  *  `getFreebuffDeploymentAvailabilityLabel()` instead. */
 export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day'
-export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
+export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
 export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
 const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York'
 const FREEBUFF_PACIFIC_TIMEZONE = 'America/Los_Angeles'
@@ -47,20 +47,17 @@ export const FREEBUFF_MODELS = [
     availability: 'always',
   },
   {
-    id: FREEBUFF_GLM_MODEL_ID,
-    displayName: 'GLM 5.1',
+    id: FREEBUFF_KIMI_MODEL_ID,
+    displayName: 'Kimi K2.6',
     tagline: 'Smartest',
-    availability: 'deployment_hours',
+    availability: 'always',
   },
 ] as const satisfies readonly FreebuffModelOption[]
 
 export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
 
-/** What new freebuff users see selected in the picker. May not be currently
- *  available (GLM is closed outside deployment hours); callers that need an
- *  always-available id for resolution / auto-fallbacks should use
- *  FALLBACK_FREEBUFF_MODEL_ID instead. */
-export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID
+/** What new freebuff users see selected in the picker. */
+export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_KIMI_MODEL_ID
 
 /** Always-available fallback used when the requested model can't be served
  *  right now (unknown id, deployment hours closed, etc.). Kept distinct from
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index 1f71b77922..13baada653 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -532,9 +532,10 @@ export async function postChatCompletions(params: {
       if (bodyStream) {
         // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
         const useSiliconFlow = false // isSiliconFlowModel(typedBody.model)
-        const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
-        const useFireworks = isFireworksModel(typedBody.model)
-        const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model)
+        const useCanopyWave = isCanopyWaveModel(typedBody.model)
+        const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model)
+        const useOpenAIDirect =
+          !useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model)
         const stream = useSiliconFlow
           ? await handleSiliconFlowStream({
             body: typedBody,
@@ -606,12 +607,12 @@ export async function postChatCompletions(params: {
         })
       } else {
         // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
-        // TEMPORARILY DISABLED: route through OpenRouter
         const model = typedBody.model
         const useSiliconFlow = false // isSiliconFlowModel(model)
-        const useCanopyWave = false // isCanopyWaveModel(model)
-        const useFireworks = isFireworksModel(model)
-        const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model)
+        const useCanopyWave = isCanopyWaveModel(model)
+        const useFireworks = !useCanopyWave && isFireworksModel(model)
+        const shouldUseOpenAIEndpoint =
+          !useCanopyWave && !useFireworks && isOpenAIDirectModel(model)
 
         const nonStreamRequest = useSiliconFlow
           ? handleSiliconFlowNonStream({
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index 7ed29ec4b5..4839d5348c 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -158,19 +158,6 @@ describe('POST /api/v1/freebuff/session', () => {
     expect(body.status).toBe('queued')
   })
 
-  test('returns model_unavailable for GLM outside deployment hours', async () => {
-    const sessionDeps = makeSessionDeps()
-    const resp = await postFreebuffSession(
-      makeReq('ok', { model: 'z-ai/glm-5.1' }),
-      makeDeps(sessionDeps, 'u1'),
-    )
-    expect(resp.status).toBe(409)
-    const body = await resp.json()
-    expect(body.status).toBe('model_unavailable')
-    expect(body.availableHours).toBe('9am ET-5pm PT every day')
-    expect(sessionDeps.rows.size).toBe(0)
-  })
-
   // Banned bots with valid API keys were POSTing every few seconds and
   // inflating queueDepth between the 15s admission-tick sweeps. Rejecting at
   // the HTTP layer with 403 (terminal, like country_blocked) keeps them out
diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 0db3e0f9cb..7854953d29 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -29,6 +29,7 @@ const canopywaveAgent = new Agent({
 /** Map from OpenRouter model IDs to CanopyWave model IDs */
 const CANOPYWAVE_MODEL_MAP: Record<string, string> = {
   'minimax/minimax-m2.5': 'minimax/minimax-m2.5',
+  'moonshotai/kimi-k2.6': 'moonshotai/kimi-k2.6',
 }
 
 export function isCanopyWaveModel(model: string): boolean {
@@ -85,12 +86,31 @@ function createCanopyWaveRequest(params: {
   })
 }
 
-// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5
-const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000
-const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
-const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000
+// CanopyWave per-token pricing (dollars per token), keyed by OpenRouter model ID
+interface CanopyWavePricing {
+  inputCostPerToken: number
+  cachedInputCostPerToken: number
+  outputCostPerToken: number
+}
+
+const CANOPYWAVE_PRICING_MAP: Record<string, CanopyWavePricing> = {
+  'minimax/minimax-m2.5': {
+    inputCostPerToken: 0.27 / 1_000_000,
+    cachedInputCostPerToken: 0.03 / 1_000_000,
+    outputCostPerToken: 1.08 / 1_000_000,
+  },
+  'moonshotai/kimi-k2.6': {
+    inputCostPerToken: 0.60 / 1_000_000,
+    cachedInputCostPerToken: 0.15 / 1_000_000,
+    outputCostPerToken: 2.50 / 1_000_000,
+  },
+}
+
+function getCanopyWavePricing(model: string): CanopyWavePricing {
+  return CANOPYWAVE_PRICING_MAP[model] ?? CANOPYWAVE_PRICING_MAP['moonshotai/kimi-k2.6']
+}
 
-function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
+function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
   if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
   const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
   const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
@@ -100,11 +120,12 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
   const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
   const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
 
+  const pricing = getCanopyWavePricing(model)
   const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
   const cost =
-    nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN +
-    cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN +
-    outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN
+    nonCachedInputTokens * pricing.inputCostPerToken +
+    cacheReadInputTokens * pricing.cachedInputCostPerToken +
+    outputTokens * pricing.outputCostPerToken
 
   return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
 }
@@ -139,7 +160,7 @@ export async function handleCanopyWaveNonStream({
   const data = await response.json()
   const content = data.choices?.[0]?.message?.content ?? ''
   const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
-  const usageData = extractUsageAndCost(data.usage)
+  const usageData = extractUsageAndCost(data.usage, originalModel)
 
   insertMessageToBigQuery({
     messageId: data.id,
@@ -453,7 +474,7 @@ async function handleResponse({
     return { state }
   }
 
-  const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
+  const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
   const messageId = typeof data.id === 'string' ? data.id : 'unknown'
 
   state.billedAlready = true
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index 44d516c123..3a4bc36773 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -200,34 +200,20 @@ describe('requestSession', () => {
     expect(state.instanceId).toBe('inst-1')
   })
 
-  test('deployment-hours-only model is unavailable outside deployment hours', async () => {
-    const state = await requestSession({
-      userId: 'u1',
-      model: 'z-ai/glm-5.1',
-      deps,
-    })
-    expect(state).toEqual({
-      status: 'model_unavailable',
-      requestedModel: 'z-ai/glm-5.1',
-      availableHours: '9am ET-5pm PT every day',
-    })
-    expect(deps.rows.size).toBe(0)
-  })
-
   test('queued response includes a per-model depth snapshot for the selector', async () => {
     deps._tick(new Date('2026-04-17T16:00:00Z'))
-    // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both.
+    // Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both.
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
     await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
-    await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps })
+    await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.6', deps })
 
     const state = await getSessionState({ userId: 'u1', deps })
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.queueDepthByModel).toEqual({
       [DEFAULT_MODEL]: 2,
-      'z-ai/glm-5.1': 1,
+      'moonshotai/kimi-k2.6': 1,
     })
   })
 
@@ -302,7 +288,7 @@ describe('requestSession', () => {
   })
 
   test('instant-admit: per-model capacities are independent', async () => {
-    // MiniMax saturated at 1 active, GLM still has room.
+    // MiniMax saturated at 1 active, Kimi still has room.
     const admitDeps = makeDeps({
       getInstantAdmitCapacity: (model) =>
         model === DEFAULT_MODEL ? 1 : 10,
@@ -316,25 +302,23 @@ describe('requestSession', () => {
     })
     const s3 = await requestSession({
       userId: 'u3',
-      model: 'z-ai/glm-5.1',
+      model: 'moonshotai/kimi-k2.6',
       deps: admitDeps,
     })
     expect(s2.status).toBe('queued')
     expect(s3.status).toBe('active')
   })
 
-  // Per-user rate limit (5 GLM admissions per 20h) — the wire limit is
+  // Per-user rate limit (5 Kimi admissions per 20h) — the wire limit is
   // hard-coded in public-api.ts, so tests seed the fake admit log directly
-  // rather than configuring it. GLM also has deployment-hours gating, so
-  // these tests bump `now` into the open window (12pm ET on a weekday)
-  // before issuing the request.
-  const GLM_MODEL = 'z-ai/glm-5.1'
-  const GLM_LIMIT = 5
-  const GLM_WINDOW_HOURS = 20
-  const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
-
-  test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => {
-    deps._tick(GLM_OPEN_TIME)
+  // rather than configuring it.
+  const KIMI_MODEL = 'moonshotai/kimi-k2.6'
+  const KIMI_LIMIT = 5
+  const KIMI_WINDOW_HOURS = 20
+  const KIMI_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
+
+  test('rate_limited: 5th Kimi admit in window blocks the 6th attempt', async () => {
+    deps._tick(KIMI_OPEN_TIME)
     // Seed 5 admits inside the 20h window, spaced so we can verify retryAfter
     // points at the oldest one sliding off.
     const now = deps._now()
@@ -343,22 +327,22 @@ describe('requestSession', () => {
     for (const hoursAgo of ages) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
       })
     }
 
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('rate_limited')
     if (state.status !== 'rate_limited') throw new Error('unreachable')
-    expect(state.model).toBe(GLM_MODEL)
-    expect(state.limit).toBe(GLM_LIMIT)
-    expect(state.windowHours).toBe(GLM_WINDOW_HOURS)
-    expect(state.recentCount).toBe(GLM_LIMIT)
+    expect(state.model).toBe(KIMI_MODEL)
+    expect(state.limit).toBe(KIMI_LIMIT)
+    expect(state.windowHours).toBe(KIMI_WINDOW_HOURS)
+    expect(state.recentCount).toBe(KIMI_LIMIT)
     // Oldest admit is 19h ago; slot opens when it hits 20h, i.e. in 1h.
     expect(state.retryAfterMs).toBe(60 * 60 * 1000)
     // Blocked before any row is written — the user doesn't take a queue slot.
@@ -366,21 +350,21 @@ describe('requestSession', () => {
   })
 
   test('rate_limited: admits outside the 20h window do not count', async () => {
-    deps._tick(GLM_OPEN_TIME)
+    deps._tick(KIMI_OPEN_TIME)
     // 5 admits, each just over 20h old → all fall off the window.
     const now = deps._now()
     for (let i = 0; i < 5; i++) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(
-          now.getTime() - (GLM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i),
+          now.getTime() - (KIMI_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i),
         ),
       })
     }
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('queued')
@@ -408,41 +392,41 @@ describe('requestSession', () => {
     expect(state.rateLimit).toBeUndefined()
   })
 
-  test('queued GLM response carries the current admit count', async () => {
-    deps._tick(GLM_OPEN_TIME)
+  test('queued Kimi response carries the current admit count', async () => {
+    deps._tick(KIMI_OPEN_TIME)
     const now = deps._now()
     // 2 admits in the window — under the limit so the user still queues.
     deps.admits.push({
       user_id: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
     })
     deps.admits.push({
       user_id: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
     })
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.rateLimit).toEqual({
-      model: GLM_MODEL,
-      limit: GLM_LIMIT,
-      windowHours: GLM_WINDOW_HOURS,
+      model: KIMI_MODEL,
+      limit: KIMI_LIMIT,
+      windowHours: KIMI_WINDOW_HOURS,
       recentCount: 2,
     })
   })
 
-  test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => {
-    // Reclaim path: user has an active+unexpired GLM session and restarts
+  test('rate_limited: takeover of an active Kimi row is allowed even when at cap', async () => {
+    // Reclaim path: user has an active+unexpired Kimi session and restarts
     // the CLI. POST must rotate their instance id (takeover) and NOT reject
     // with rate_limited — otherwise they'd be stranded with a live session
     // they can't reconnect to. The 5th admission is already in the log, so
     // this also exercises "at the cap" rather than "over the cap".
-    deps._tick(GLM_OPEN_TIME)
+    deps._tick(KIMI_OPEN_TIME)
     const now = deps._now()
     // Seed 5 prior admits (the cap), with the latest one matching the
     // active row we're about to install.
@@ -450,7 +434,7 @@ describe('requestSession', () => {
     for (const hoursAgo of ages) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
       })
     }
@@ -461,7 +445,7 @@ describe('requestSession', () => {
       user_id: 'u1',
       status: 'active',
       active_instance_id: 'inst-pre',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       queued_at: admittedAt,
       admitted_at: admittedAt,
       expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
@@ -471,27 +455,27 @@ describe('requestSession', () => {
 
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('active')
     if (state.status !== 'active') throw new Error('unreachable')
     // Instance id rotated; quota snapshot still reflects the full window.
     expect(state.instanceId).not.toBe('inst-pre')
-    expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+    expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT)
   })
 
-  test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => {
+  test('rate_limited: reclaim of a queued Kimi row is allowed even when at cap', async () => {
     // Same reclaim exception for queued rows: if a user has already queued
     // (say they slipped in just before their 5th admit landed), a subsequent
     // POST from the same CLI must preserve their queue position instead of
     // flipping to rate_limited.
-    deps._tick(GLM_OPEN_TIME)
+    deps._tick(KIMI_OPEN_TIME)
     const now = deps._now()
-    for (let i = 0; i < GLM_LIMIT; i++) {
+    for (let i = 0; i < KIMI_LIMIT; i++) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
       })
     }
@@ -500,7 +484,7 @@ describe('requestSession', () => {
       user_id: 'u1',
       status: 'queued',
       active_instance_id: 'inst-pre',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       queued_at: queuedAt,
       admitted_at: null,
       expires_at: null,
@@ -510,7 +494,7 @@ describe('requestSession', () => {
 
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('queued')
@@ -518,20 +502,20 @@ describe('requestSession', () => {
     // Same position (1) since we preserved queued_at and nobody else is
     // ahead; the instance id rotated so any prior CLI is superseded.
     expect(state.instanceId).not.toBe('inst-pre')
-    expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+    expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT)
   })
 
-  test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => {
+  test('rate_limited: expired Kimi row is not a reclaim — quota still applies', async () => {
     // The stored row's expires_at is in the past, so it doesn't represent
     // an in-flight session. This POST is effectively a fresh request and
     // must be blocked by the quota.
-    deps._tick(GLM_OPEN_TIME)
+    deps._tick(KIMI_OPEN_TIME)
     const now = deps._now()
     const ages = [19, 4, 3, 2, 1]
     for (const hoursAgo of ages) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
       })
     }
@@ -540,7 +524,7 @@ describe('requestSession', () => {
       user_id: 'u1',
       status: 'active',
       active_instance_id: 'inst-pre',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       queued_at: admittedAt,
       admitted_at: admittedAt,
       expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
@@ -549,7 +533,7 @@ describe('requestSession', () => {
     })
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('rate_limited')
@@ -557,18 +541,18 @@ describe('requestSession', () => {
 
   test('instant-admit bumps the quota count for the freshly-written admit row', async () => {
     const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 })
-    admitDeps._tick(GLM_OPEN_TIME)
+    admitDeps._tick(KIMI_OPEN_TIME)
     // 1 existing admit in the window; this new call should instant-admit and
     // write a second row, so the response's recentCount reflects 2.
     const now = admitDeps._now()
     admitDeps.admits.push({
       user_id: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
     })
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps: admitDeps,
     })
     if (state.status !== 'active') throw new Error('unreachable')
@@ -636,16 +620,16 @@ describe('getSessionState', () => {
     // Regression: the POST response attached rateLimit, but GET polls did
     // not — so the "Sessions N/M used" line flashed once then disappeared on
     // the next 5s poll. GET must attach the same quota snapshot. Rate
-    // limits only apply to GLM, so this test uses GLM explicitly (inside
-    // deployment hours) rather than the Minimax DEFAULT_MODEL.
+    // limits only apply to Kimi, so this test uses Kimi explicitly rather
+    // than the Minimax DEFAULT_MODEL.
     deps._tick(new Date('2026-04-17T16:00:00Z'))
     const now = deps._now()
     deps.admits.push({
       user_id: 'u1',
-      model: 'z-ai/glm-5.1',
+      model: 'moonshotai/kimi-k2.6',
       admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
     })
-    await requestSession({ userId: 'u1', model: 'z-ai/glm-5.1', deps })
+    await requestSession({ userId: 'u1', model: 'moonshotai/kimi-k2.6', deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = now
@@ -658,7 +642,7 @@ describe('getSessionState', () => {
     })
     if (state.status !== 'active') throw new Error('unreachable')
     expect(state.rateLimit).toEqual({
-      model: 'z-ai/glm-5.1',
+      model: 'moonshotai/kimi-k2.6',
       limit: 5,
       windowHours: 20,
       recentCount: 1,
diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts
index 52dc82c12b..215059b841 100644
--- a/web/src/server/free-session/__tests__/session-view.test.ts
+++ b/web/src/server/free-session/__tests__/session-view.test.ts
@@ -7,7 +7,7 @@ import type { InternalSessionRow } from '../types'
 const WAIT_PER_SPOT_MS = 24_000
 const GRACE_MS = 30 * 60_000
 
-const TEST_MODEL = 'z-ai/glm-5.1'
+const TEST_MODEL = 'moonshotai/kimi-k2.6'
 
 function row(overrides: Partial<InternalSessionRow> = {}): InternalSessionRow {
   const now = new Date('2026-04-17T12:00:00Z')
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index 10071b35fc..6d162c4617 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -48,7 +48,7 @@ export function getSessionGraceMs(): number {
  * queue).
  */
 const INSTANT_ADMIT_CAPACITY: Record<string, number> = {
-  'z-ai/glm-5.1': 50,
+  'moonshotai/kimi-k2.6': 50,
   'minimax/minimax-m2.7': 1000,
 }
 
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 02c5c05c9f..7c64830777 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -33,7 +33,7 @@ import type { InternalSessionRow, SessionStateResponse } from './types'
 
 /**
  * Per-model admission rate limits. Keyed by freebuff model id; a model not
- * in the map has no rate limit applied. Today only GLM 5.1 is limited
+ * in the map has no rate limit applied. Today only Kimi K2.6 is limited
  * (Minimax is cheap enough to leave unlimited).
  *
  * Hard-coded rather than env-driven: the values need to be observable in the
@@ -41,7 +41,7 @@ import type { InternalSessionRow, SessionStateResponse } from './types'
  * queued/active responses — changing them is a deliberate, typed edit.
  */
 const RATE_LIMITS: Record<string, { limit: number; windowHours: number }> = {
-  'z-ai/glm-5.1': { limit: 5, windowHours: 20 },
+  'moonshotai/kimi-k2.6': { limit: 5, windowHours: 20 },
 }
 
 /** Fetch the caller's current quota snapshot for `model`, or undefined if the

From c1179757d34380b8f89d21b3e304343481722eff Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Sat, 25 Apr 2026 14:03:15 -0700
Subject: [PATCH 2/2] Wire Kimi K2.6 via CanopyWave through to base2-free

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../freebuff-model-navigation.test.ts         | 22 +++++++-------
 common/src/constants/freebuff-models.ts       |  8 +++--
 .../completions/__tests__/completions.test.ts | 29 +++++++------------
 3 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/cli/src/utils/__tests__/freebuff-model-navigation.test.ts b/cli/src/utils/__tests__/freebuff-model-navigation.test.ts
index 4723245bad..16efef166d 100644
--- a/cli/src/utils/__tests__/freebuff-model-navigation.test.ts
+++ b/cli/src/utils/__tests__/freebuff-model-navigation.test.ts
@@ -7,40 +7,40 @@ import {
 
 describe('nextSelectableFreebuffModelId', () => {
   test('skips unavailable models when moving forward', () => {
-    const modelIds = ['glm', 'minimax']
+    const modelIds = ['kimi', 'minimax']
 
     expect(
       nextSelectableFreebuffModelId({
         modelIds,
         focusedId: 'minimax',
         direction: 'forward',
-        isSelectable: (id) => id !== 'glm',
+        isSelectable: (id) => id !== 'kimi',
       }),
     ).toBe('minimax')
   })
 
   test('skips unavailable models when moving backward', () => {
-    const modelIds = ['glm', 'minimax']
+    const modelIds = ['kimi', 'minimax']
 
     expect(
       nextSelectableFreebuffModelId({
         modelIds,
         focusedId: 'minimax',
         direction: 'backward',
-        isSelectable: (id) => id !== 'glm',
+        isSelectable: (id) => id !== 'kimi',
       }),
     ).toBe('minimax')
   })
 
   test('moves to the next available model when more than one is selectable', () => {
-    const modelIds = ['glm', 'minimax', 'other']
+    const modelIds = ['kimi', 'minimax', 'other']
 
     expect(
       nextSelectableFreebuffModelId({
         modelIds,
         focusedId: 'minimax',
         direction: 'forward',
-        isSelectable: (id) => id !== 'glm',
+        isSelectable: (id) => id !== 'kimi',
       }),
     ).toBe('other')
   })
@@ -48,8 +48,8 @@ describe('nextSelectableFreebuffModelId', () => {
   test('returns null when no selectable model exists', () => {
     expect(
       nextSelectableFreebuffModelId({
-        modelIds: ['glm'],
-        focusedId: 'glm',
+        modelIds: ['kimi'],
+        focusedId: 'kimi',
         direction: 'forward',
         isSelectable: () => false,
       }),
@@ -61,10 +61,10 @@ describe('resolveFreebuffModelCommitTarget', () => {
   test('falls back to the selected model when focus is on a closed model', () => {
     expect(
       resolveFreebuffModelCommitTarget({
-        focusedId: 'glm',
+        focusedId: 'kimi',
         selectedId: 'minimax',
         committedId: null,
-        isSelectable: (id) => id !== 'glm',
+        isSelectable: (id) => id !== 'kimi',
       }),
     ).toBe('minimax')
   })
@@ -73,7 +73,7 @@ describe('resolveFreebuffModelCommitTarget', () => {
     expect(
       resolveFreebuffModelCommitTarget({
         focusedId: 'minimax',
-        selectedId: 'glm',
+        selectedId: 'kimi',
         committedId: null,
         isSelectable: (id) => id === 'minimax',
       }),
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index 3f4c91a082..7c29e497cb 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -39,7 +39,7 @@ interface LocalTimeFormatOptions {
   timeZone?: string
 }
 
-export const FREEBUFF_MODELS = [
+export const FREEBUFF_MODELS: readonly FreebuffModelOption[] = [
   {
     id: FREEBUFF_MINIMAX_MODEL_ID,
     displayName: 'MiniMax M2.7',
@@ -52,9 +52,11 @@ export const FREEBUFF_MODELS = [
     tagline: 'Smartest',
     availability: 'always',
   },
-] as const satisfies readonly FreebuffModelOption[]
+]
 
-export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
+export type FreebuffModelId =
+  | typeof FREEBUFF_MINIMAX_MODEL_ID
+  | typeof FREEBUFF_KIMI_MODEL_ID
 
 /** What new freebuff users see selected in the picker. */
 export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_KIMI_MODEL_ID
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index 1aac8800cd..deb324ea6a 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,7 +1,6 @@
 import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
 import { NextRequest } from 'next/server'
 
-import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models'
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
 
 import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
@@ -556,15 +555,15 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(response.status).toBe(200)
     })
 
-    it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => {
+    it('lets freebuff use Kimi K2.6 through CanopyWave', async () => {
       const fetchedBodies: Record<string, unknown>[] = []
-      const fetchViaFireworks = mock(
+      const fetchViaCanopyWave = mock(
         async (_url: string | URL | Request, init?: RequestInit) => {
           fetchedBodies.push(JSON.parse(init?.body as string))
           return new Response(
             JSON.stringify({
               id: 'test-id',
-              model: 'accounts/james-65d217/deployments/mjb4i7ea',
+              model: 'moonshotai/kimi-k2.6',
               choices: [{ message: { content: 'test response' } }],
               usage: {
                 prompt_tokens: 10,
@@ -586,7 +585,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-new-free' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'moonshotai/kimi-k2.6',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -604,26 +603,18 @@ describe('/api/v1/chat/completions POST endpoint', () => {
         trackEvent: mockTrackEvent,
         getUserUsageData: mockGetUserUsageData,
         getAgentRunFromId: mockGetAgentRunFromId,
-        fetch: fetchViaFireworks,
+        fetch: fetchViaCanopyWave,
         insertMessageBigquery: mockInsertMessageBigquery,
         loggerWithContext: mockLoggerWithContext,
         checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
       })
 
       const body = await response.json()
-      if (isFreebuffDeploymentHours()) {
-        expect(response.status).toBe(200)
-        expect(fetchedBodies).toHaveLength(1)
-        expect(fetchedBodies[0].model).toBe(
-          'accounts/james-65d217/deployments/mjb4i7ea',
-        )
-        expect(body.model).toBe('z-ai/glm-5.1')
-        expect(body.provider).toBe('Fireworks')
-      } else {
-        expect(response.status).toBe(503)
-        expect(fetchedBodies).toHaveLength(0)
-        expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
-      }
+      expect(response.status).toBe(200)
+      expect(fetchedBodies).toHaveLength(1)
+      expect(fetchedBodies[0].model).toBe('moonshotai/kimi-k2.6')
+      expect(body.model).toBe('moonshotai/kimi-k2.6')
+      expect(body.provider).toBe('CanopyWave')
     })
 
     it('skips credit check when in FREE mode even with 0 credits', async () => {