From 350576c56d25982cfece0f5cf88eaedb92a9924c Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Thu, 23 Apr 2026 21:01:31 -0700
Subject: [PATCH 1/3] glm 5.1 => kimi l2.5

---
 agents/__tests__/editor.test.ts               |  11 +
 agents/base2/base2.ts                         |  14 +-
 agents/editor/editor-lite.ts                  |   2 +-
 agents/editor/editor.ts                       |   6 +-
 agents/reviewer/code-reviewer-lite.ts         |   2 +-
 .../components/freebuff-model-selector.tsx    |  53 ++-
 cli/src/components/waiting-room-screen.tsx    |   2 +-
 cli/src/hooks/use-freebuff-session.ts         |  19 +-
 cli/src/state/freebuff-model-store.ts         |   6 +-
 cli/src/utils/local-agent-registry.ts         |   2 +-
 common/src/constants/free-agents.ts           |  15 +-
 common/src/constants/freebuff-models.ts       |  63 ++-
 common/src/types/freebuff-session.ts          |   7 +
 docs/freebuff-waiting-room.md                 |  20 +-
 scripts/test-fireworks-cache-intervals.ts     |  17 +-
 scripts/test-fireworks-long.ts                |  31 +-
 .../completions/__tests__/completions.test.ts |  80 +++-
 .../session/__tests__/session.test.ts         |  19 +-
 .../app/api/v1/freebuff/session/_handlers.ts  |  15 +-
 .../__tests__/fireworks-deployment.test.ts    | 443 ++++++++----------
 web/src/llm-api/fireworks-config.ts           |   3 +-
 web/src/llm-api/fireworks.ts                  |  66 ++-
 .../free-session/__tests__/config.test.ts     |  13 +
 .../free-session/__tests__/public-api.test.ts |  28 +-
 web/src/server/free-session/admission.ts      |  10 +-
 web/src/server/free-session/config.ts         |   2 +-
 web/src/server/free-session/public-api.ts     |  19 +-
 27 files changed, 614 insertions(+), 354 deletions(-)
 create mode 100644 web/src/server/free-session/__tests__/config.test.ts
diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts
index 030857c8dc..36d6b75c5c 100644
--- a/agents/__tests__/editor.test.ts
+++ b/agents/__tests__/editor.test.ts
@@ -67,6 +67,11 @@ describe('editor agent', () => {
       expect(glmEditor.model).toBe('z-ai/glm-5.1')
     })
 
+    test('creates minimax editor', () => {
+      const minimaxEditor = createCodeEditor({ model: 'minimax' })
+      expect(minimaxEditor.model).toBe('minimax/minimax-m2.7')
+    })
+
     test('gpt-5 editor does not include think tags in instructions', () => {
       const gpt5Editor = createCodeEditor({ model: 'gpt-5' })
       expect(gpt5Editor.instructionsPrompt).not.toContain('<think>')
@@ -79,6 +84,12 @@ describe('editor agent', () => {
       expect(glmEditor.instructionsPrompt).not.toContain('</think>')
     })
 
+    test('minimax editor does not include think tags in instructions', () => {
+      const minimaxEditor = createCodeEditor({ model: 'minimax' })
+      expect(minimaxEditor.instructionsPrompt).not.toContain('<think>')
+      expect(minimaxEditor.instructionsPrompt).not.toContain('</think>')
+    })
+
     test('opus editor includes think tags in instructions', () => {
       const opusEditor = createCodeEditor({ model: 'opus' })
       expect(opusEditor.instructionsPrompt).toContain('<think>')
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 1a81f948bf..c6f7e15f8a 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -25,16 +25,18 @@ export function createBase2(
   const isFree = mode === 'free' || mode === 'lite'
 
   const isSonnet = false
-  const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7'
+  const model = isFree ? 'minimax/minimax-m2.7' : 'anthropic/claude-opus-4.7'
 
   return {
     publisher,
     model,
-    providerOptions: isFree ? {
-      data_collection: 'deny',
-    } : {
-      only: ['amazon-bedrock'],
-    },
+    providerOptions: isFree
+      ? {
+          data_collection: 'deny',
+        }
+      : {
+          only: ['amazon-bedrock'],
+        },
     displayName: 'Buffy the Orchestrator',
     spawnerPrompt:
       'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks',
diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts
index 29225f0c29..9cb5675b5e 100644
--- a/agents/editor/editor-lite.ts
+++ b/agents/editor/editor-lite.ts
@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
 import type { AgentDefinition } from '../types/agent-definition'
 
 const definition: AgentDefinition = {
-  ...createCodeEditor({ model: 'glm' }),
+  ...createCodeEditor({ model: 'minimax' }),
   id: 'editor-lite',
 }
 export default definition
diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts
index 3d208aa13a..c98544d0f2 100644
--- a/agents/editor/editor.ts
+++ b/agents/editor/editor.ts
@@ -4,7 +4,7 @@ import { publisher } from '../constants'
 import type { AgentDefinition } from '../types/agent-definition'
 
 export const createCodeEditor = (options: {
-  model: 'gpt-5' | 'opus' | 'glm'
+  model: 'gpt-5' | 'opus' | 'glm' | 'minimax'
 }): Omit<AgentDefinition, 'id'> => {
   const { model } = options
   return {
@@ -12,6 +12,8 @@ export const createCodeEditor = (options: {
     model:
       options.model === 'gpt-5'
         ? 'openai/gpt-5.1'
+        : options.model === 'minimax'
+          ? 'minimax/minimax-m2.7'
         : options.model === 'glm'
           ? 'z-ai/glm-5.1'
           : 'anthropic/claude-opus-4.7',
@@ -65,7 +67,7 @@ OR for new files or major rewrites:
 }
 </codebuff_tool_call>
 
-${model === 'gpt-5' || model === 'glm'
+${model === 'gpt-5' || model === 'glm' || model === 'minimax'
         ? ''
         : `Before you start writing your implementation, you should use <think> tags to think about the best way to implement the changes.
 
diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts
index feafb87c45..ee017c24e6 100644
--- a/agents/reviewer/code-reviewer-lite.ts
+++ b/agents/reviewer/code-reviewer-lite.ts
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
 const definition: SecretAgentDefinition = {
   id: 'code-reviewer-lite',
   publisher,
-  ...createReviewer('z-ai/glm-5.1'),
+  ...createReviewer('minimax/minimax-m2.7'),
 }
 
 export default definition
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index a33d89540a..1ba966fd22 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -3,9 +3,15 @@ import { useKeyboard } from '@opentui/react'
 import React, { useCallback, useEffect, useMemo, useState } from 'react'
 
 import { Button } from './button'
-import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+import {
+  DEFAULT_FREEBUFF_MODEL_ID,
+  FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+  FREEBUFF_MODELS,
+  isFreebuffModelAvailable,
+} from '@codebuff/common/constants/freebuff-models'
 
 import { joinFreebuffQueue } from '../hooks/use-freebuff-session'
+import { useNow } from '../hooks/use-now'
 import { useFreebuffModelStore } from '../state/freebuff-model-store'
 import { useFreebuffSessionStore } from '../state/freebuff-session-store'
 import { useTerminalDimensions } from '../hooks/use-terminal-dimensions'
@@ -33,7 +39,9 @@ export const FreebuffModelSelector: React.FC = () => {
   const theme = useTheme()
   const { terminalWidth } = useTerminalDimensions()
   const selectedModel = useFreebuffModelStore((s) => s.selectedModel)
+  const setSelectedModel = useFreebuffModelStore((s) => s.setSelectedModel)
   const session = useFreebuffSessionStore((s) => s.session)
+  const now = useNow(60_000)
   const [pending, setPending] = useState<string | null>(null)
   const [hoveredId, setHoveredId] = useState<string | null>(null)
   // Keyboard cursor — separate from the actually-selected model so that
@@ -45,6 +53,15 @@ export const FreebuffModelSelector: React.FC = () => {
     setFocusedId(selectedModel)
   }, [selectedModel])
 
+  useEffect(() => {
+    if (
+      (session?.status === 'none' || !session) &&
+      !isFreebuffModelAvailable(selectedModel, new Date(now))
+    ) {
+      setSelectedModel(DEFAULT_FREEBUFF_MODEL_ID)
+    }
+  }, [now, selectedModel, session, setSelectedModel])
+
   // Landing ('none'): depths come from the server snapshot, no "self" to
   // subtract. In-queue ('queued'): for the user's queue, "ahead" is
   // `position - 1` (themselves don't count); for every other queue, switching
@@ -85,7 +102,8 @@ export const FreebuffModelSelector: React.FC = () => {
   )
 
   // Decide row vs column layout based on whether both buttons actually fit
-  // side-by-side. Each button's inner text is "● {displayName} · {tagline}  {hint}",
+  // side-by-side. Each button's inner text is
+  // "● {displayName} · {tagline} · {hours}  {hint}",
   // plus 2 cols of border and 2 cols of padding. Buttons are separated by a
   // gap of 2. If the total exceeds the terminal width, stack vertically.
   const stackVertically = useMemo(() => {
@@ -97,6 +115,9 @@ export const FreebuffModelSelector: React.FC = () => {
         model.displayName.length +
         3 /* " · " */ +
         model.tagline.length +
+        (model.availability === 'deployment_hours'
+          ? 3 + FREEBUFF_DEPLOYMENT_HOURS_LABEL.length
+          : 0) +
         2 /* "  " */ +
         hintWidth
       return sum + inner + BUTTON_CHROME + (idx > 0 ? GAP : 0)
@@ -115,10 +136,11 @@ export const FreebuffModelSelector: React.FC = () => {
     (modelId: string) => {
       if (pending) return
       if (modelId === committedModelId) return
+      if (!isFreebuffModelAvailable(modelId, new Date(now))) return
       setPending(modelId)
       joinFreebuffQueue(modelId).finally(() => setPending(null))
     },
-    [pending, committedModelId],
+    [pending, committedModelId, now],
   )
 
   // Tab / Shift+Tab and arrow keys move the focus highlight only; Enter or
@@ -136,7 +158,10 @@ export const FreebuffModelSelector: React.FC = () => {
         const isCommit = name === 'return' || name === 'enter' || name === 'space'
         if (!isForward && !isBackward && !isCommit) return
         if (isCommit) {
-          if (focusedId !== committedModelId) {
+          if (
+            focusedId !== committedModelId &&
+            isFreebuffModelAvailable(focusedId, new Date(now))
+          ) {
             key.preventDefault?.()
             pick(focusedId)
           }
@@ -154,7 +179,7 @@ export const FreebuffModelSelector: React.FC = () => {
           setFocusedId(target.id)
         }
       },
-      [pending, pick, focusedId, committedModelId],
+      [pending, pick, focusedId, committedModelId, now],
     ),
   )
 
@@ -181,15 +206,22 @@ export const FreebuffModelSelector: React.FC = () => {
           const isSelected = model.id === selectedModel
           const isHovered = hoveredId === model.id
           const isFocused = focusedId === model.id && !isSelected
+          const isAvailable = isFreebuffModelAvailable(model.id, new Date(now))
           const indicator = isSelected ? '●' : '○'
           const indicatorColor = isSelected ? theme.primary : theme.muted
-          const labelColor = isSelected ? theme.foreground : theme.muted
+          const labelColor = isSelected && isAvailable ? theme.foreground : theme.muted
           // Clickable whenever picking would actually do something — i.e.
           // anything except re-picking the queue we're already in.
-          const interactable = !pending && model.id !== committedModelId
+          const interactable = !pending && isAvailable && model.id !== committedModelId
           const ahead = aheadByModel?.[model.id]
           const hint =
-            ahead === undefined ? '' : ahead === 0 ? 'No wait' : `${ahead} ahead`
+            !isAvailable
+              ? 'Closed'
+              : ahead === undefined
+                ? ''
+                : ahead === 0
+                  ? 'No wait'
+                  : `${ahead} ahead`
 
           const borderColor = isSelected
             ? theme.primary
@@ -202,7 +234,7 @@ export const FreebuffModelSelector: React.FC = () => {
               key={model.id}
               onClick={() => {
                 setFocusedId(model.id)
-                pick(model.id)
+                if (isAvailable) pick(model.id)
               }}
               onMouseOver={() => interactable && setHoveredId(model.id)}
               onMouseOut={() => setHoveredId((curr) => (curr === model.id ? null : curr))}
@@ -223,6 +255,9 @@ export const FreebuffModelSelector: React.FC = () => {
                   {model.displayName}
                 </span>
                 <span fg={theme.muted}> · {model.tagline}</span>
+                {model.availability === 'deployment_hours' && (
+                  <span fg={theme.muted}> · {FREEBUFF_DEPLOYMENT_HOURS_LABEL}</span>
+                )}
                 <span fg={theme.muted}>  {hint.padEnd(hintWidth)}</span>
               </text>
             </Button>
diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx
index e67823f7a2..251ca87c0a 100644
--- a/cli/src/components/waiting-room-screen.tsx
+++ b/cli/src/components/waiting-room-screen.tsx
@@ -253,7 +253,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
                 ⚠ Account unavailable
               </text>
               <text style={{ fg: theme.muted, wrapMode: 'word' }}>
-                This account can't use freebuff. If you think this is a
+                This account has been suspended and can't use freebuff. If you think this is a
                 mistake, contact support@codebuff.com. Press Ctrl+C to exit.
               </text>
             </>
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
index 79deea1cfb..225eee2b24 100644
--- a/cli/src/hooks/use-freebuff-session.ts
+++ b/cli/src/hooks/use-freebuff-session.ts
@@ -1,4 +1,5 @@
 import { env } from '@codebuff/common/env'
+import { DEFAULT_FREEBUFF_MODEL_ID } from '@codebuff/common/constants/freebuff-models'
 import { useEffect } from 'react'
 
 import {
@@ -75,14 +76,17 @@ async function callSession(
       return body
     }
   }
-  // 409 from POST means the user picked a different model than their active
-  // session is bound to. Surface as a non-throw `model_locked` so the UI can
-  // show a confirmation prompt (DELETE then re-POST to switch).
+  // 409 from POST means the selected model cannot be joined right now, either
+  // because an active session is locked to another model or because a
+  // deployment-hours-only model is closed. Surface both as non-throw states.
   if (resp.status === 409 && method === 'POST') {
     const body = (await resp.json().catch(() => null)) as
       | FreebuffSessionResponse
       | null
-    if (body && body.status === 'model_locked') {
+    if (
+      body &&
+      (body.status === 'model_locked' || body.status === 'model_unavailable')
+    ) {
       return body
     }
   }
@@ -119,6 +123,7 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null {
     case 'country_blocked':
     case 'banned':
     case 'model_locked':
+    case 'model_unavailable':
       return null
   }
 }
@@ -398,6 +403,12 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
           schedule(0)
           return
         }
+        if (next.status === 'model_unavailable') {
+          useFreebuffModelStore.getState().setSelectedModel(DEFAULT_FREEBUFF_MODEL_ID)
+          nextMethod = 'GET'
+          schedule(0)
+          return
+        }
 
         // Startup takeover: the initial probe GET saw we already hold a seat
         // (from a prior CLI instance). POST now to rotate our instance id so
diff --git a/cli/src/state/freebuff-model-store.ts b/cli/src/state/freebuff-model-store.ts
index 182a38831f..1aa9f2db80 100644
--- a/cli/src/state/freebuff-model-store.ts
+++ b/cli/src/state/freebuff-model-store.ts
@@ -1,6 +1,6 @@
 import {
   DEFAULT_FREEBUFF_MODEL_ID,
-  resolveFreebuffModel,
+  resolveAvailableFreebuffModel,
 } from '@codebuff/common/constants/freebuff-models'
 import { create } from 'zustand'
 
@@ -24,11 +24,11 @@ interface FreebuffModelStore {
 }
 
 export const useFreebuffModelStore = create<FreebuffModelStore>((set) => ({
-  selectedModel: resolveFreebuffModel(
+  selectedModel: resolveAvailableFreebuffModel(
     loadFreebuffModelPreference() ?? DEFAULT_FREEBUFF_MODEL_ID,
   ),
   setSelectedModel: (model) => {
-    const resolved = resolveFreebuffModel(model)
+    const resolved = resolveAvailableFreebuffModel(model)
     saveFreebuffModelPreference(resolved)
     set({ selectedModel: resolved })
   },
diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts
index 59206eb848..6106b3928e 100644
--- a/cli/src/utils/local-agent-registry.ts
+++ b/cli/src/utils/local-agent-registry.ts
@@ -370,7 +370,7 @@ export const loadAgentDefinitions = (): AgentDefinition[] => {
   }
 
   // Override the model of free-mode agents to match the user's pick from the
-  // freebuff waiting room. Bundled definitions hardcode glm-5.1; we swap in
+  // freebuff waiting room. Bundled definitions hardcode a free model; we swap in
   // whatever the user chose so the chat-completions request body carries the
   // matching model and the server-side session gate doesn't reject it as a
   // model mismatch.
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index e44c74cc65..762202dcca 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -26,7 +26,10 @@ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const
  */
 export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
-  'base2-free': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+  'base2-free': new Set([
+    'minimax/minimax-m2.7',
+    'moonshotai/kimi-k2.5',
+  ]),
 
   // File exploration agents
   'file-picker': new Set(['google/gemini-2.5-flash-lite']),
@@ -41,10 +44,16 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   'basher': new Set(['google/gemini-3.1-flash-lite-preview']),
 
   // Editor for free mode
-  'editor-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+  'editor-lite': new Set([
+    'minimax/minimax-m2.7',
+    'moonshotai/kimi-k2.5',
+  ]),
 
   // Code reviewer for free mode
-  'code-reviewer-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+  'code-reviewer-lite': new Set([
+    'minimax/minimax-m2.7',
+    'moonshotai/kimi-k2.5',
+  ]),
 }
 
 /**
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index d71ebd619d..d38d187ffc 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -13,18 +13,25 @@ export interface FreebuffModelOption {
   displayName: string
   /** One-line description shown next to the label. */
   tagline: string
+  /** Availability policy for the selector and server-side admission. */
+  availability: 'always' | 'deployment_hours'
 }
 
+export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT'
+export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.5'
+
 export const FREEBUFF_MODELS = [
-  {
-    id: 'z-ai/glm-5.1',
-    displayName: 'GLM 5.1',
-    tagline: 'Smartest',
-  },
   {
     id: 'minimax/minimax-m2.7',
     displayName: 'MiniMax M2.7',
     tagline: 'Fastest',
+    availability: 'always',
+  },
+  {
+    id: FREEBUFF_KIMI_MODEL_ID,
+    displayName: 'Kimi K2.5',
+    tagline: 'Balanced',
+    availability: 'deployment_hours',
   },
 ] as const satisfies readonly FreebuffModelOption[]
 
@@ -51,3 +58,49 @@ export function getFreebuffModel(id: string): FreebuffModelOption {
     FREEBUFF_MODELS.find((m) => m.id === DEFAULT_FREEBUFF_MODEL_ID)!
   )
 }
+
+function getZonedParts(
+  date: Date,
+  timeZone: string,
+): { weekday: string; minutes: number } {
+  const parts = new Intl.DateTimeFormat('en-US', {
+    timeZone,
+    weekday: 'short',
+    hour: '2-digit',
+    minute: '2-digit',
+    hourCycle: 'h23',
+  }).formatToParts(date)
+  const value = (type: string) => parts.find((part) => part.type === type)?.value
+  const hour = Number(value('hour') ?? 0)
+  const minute = Number(value('minute') ?? 0)
+  return {
+    weekday: value('weekday') ?? '',
+    minutes: hour * 60 + minute,
+  }
+}
+
+export function isFreebuffDeploymentHours(now: Date = new Date()): boolean {
+  const eastern = getZonedParts(now, 'America/New_York')
+  const pacific = getZonedParts(now, 'America/Los_Angeles')
+  if (eastern.weekday === 'Sat' || eastern.weekday === 'Sun') return false
+  return eastern.minutes >= 9 * 60 && pacific.minutes < 24 * 60
+}
+
+export function isFreebuffModelAvailable(
+  id: string,
+  now: Date = new Date(),
+): boolean {
+  const model = FREEBUFF_MODELS.find((m) => m.id === id)
+  if (!model) return false
+  return model.availability === 'always' || isFreebuffDeploymentHours(now)
+}
+
+export function resolveAvailableFreebuffModel(
+  id: string | null | undefined,
+  now: Date = new Date(),
+): FreebuffModelId {
+  const resolved = resolveFreebuffModel(id)
+  return isFreebuffModelAvailable(resolved, now)
+    ? resolved
+    : DEFAULT_FREEBUFF_MODEL_ID
+}
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
index e42d9f0bee..43cd3eaa25 100644
--- a/common/src/types/freebuff-session.ts
+++ b/common/src/types/freebuff-session.ts
@@ -92,6 +92,13 @@ export type FreebuffSessionServerResponse =
       currentModel: string
       requestedModel: string
     }
+  | {
+      /** Requested model is valid but not selectable right now. Currently
+       *  used for deployment-hours-only models such as Kimi K2.5. */
+      status: 'model_unavailable'
+      requestedModel: string
+      availableHours: string
+    }
   | {
       /** Account is banned. Returned from every endpoint so banned bots can't
        *  join the queue at all (otherwise they inflate `queueDepth` until the
diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md
index b1384d7b60..73fa779270 100644
--- a/docs/freebuff-waiting-room.md
+++ b/docs/freebuff-waiting-room.md
@@ -5,7 +5,7 @@
 The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
 
 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
-2. **Gate on per-deployment health** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` admit that tick; a degraded minimax-m2.7 no longer stalls glm-5.1 admissions.
+2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; Kimi K2.5 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
 
 Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
@@ -149,8 +149,8 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r
 | Constant | Location | Default | Purpose |
 |---|---|---|---|
 | `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. |
-| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `glm-5.1`, `minimax-m2.7` | Selectable models; each gets its own queue and admission slot. |
-| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | glm-5.1 only | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
+| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `kimi-k2.5` | Selectable models; each gets its own queue and admission slot. |
+| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `kimi-k2.5` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
 | `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. |
 | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime |
 | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
@@ -180,12 +180,12 @@ Response shapes:
 {
   "status": "queued",
   "instanceId": "e47…",
-  "model": "z-ai/glm-5.1",
+  "model": "minimax/minimax-m2.7",
   "position": 17,          // 1-indexed within this model's queue
   "queueDepth": 43,        // size of this model's queue
   "queueDepthByModel": {   // snapshot of every model's queue — powers the
-    "z-ai/glm-5.1": 43,    //  "N ahead" hint in the selector. Missing
-    "minimax/minimax-m2.7": 4  //  entries should be treated as 0.
+    "minimax/minimax-m2.7": 43, //  "N ahead" hint in the selector. Missing
+    "moonshotai/kimi-k2.5": 4   //  entries should be treated as 0.
   },
   "estimatedWaitMs": 384000,
   "queuedAt": "2026-04-17T12:00:00Z"
@@ -195,7 +195,7 @@ Response shapes:
 {
   "status": "active",
   "instanceId": "e47…",
-  "model": "z-ai/glm-5.1",
+  "model": "minimax/minimax-m2.7",
   "admittedAt": "2026-04-17T12:00:00Z",
   "expiresAt":  "2026-04-17T13:00:00Z",
   "remainingMs": 3600000
@@ -219,7 +219,7 @@ Response shapes:
 // to actually switch.
 {
   "status": "model_locked",
-  "currentModel": "z-ai/glm-5.1",
+  "currentModel": "minimax/minimax-m2.7",
   "requestedModel": "minimax/minimax-m2.7"
 }
 ```
@@ -285,7 +285,7 @@ waitMs = (position - 1) * 24_000
 - Position 1 → 0 (next tick admits you)
 - Position 2 → 24s, and so on.
 
-`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses (during a per-deployment Fireworks incident only the affected model's queue stalls; healthy models keep draining), so the real wait can be longer or shorter.
+`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `moonshotai/kimi-k2.5` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a Kimi Fireworks incident or outside 9am ET-5pm PT, only Kimi's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
 
 ## CLI Integration (frontend-side contract)
 
@@ -324,7 +324,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
 | Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. |
 | Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. |
 | Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. |
-| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded minimax-m2.7 doesn't block glm-5.1 admissions. |
+| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded Kimi deployment doesn't block MiniMax admissions. |
 | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy |
 
 ## Testing
diff --git a/scripts/test-fireworks-cache-intervals.ts b/scripts/test-fireworks-cache-intervals.ts
index 0ed71193fd..92d7ac49e3 100644
--- a/scripts/test-fireworks-cache-intervals.ts
+++ b/scripts/test-fireworks-cache-intervals.ts
@@ -25,11 +25,11 @@
  *   # Default glm-5.1 serverless with default intervals
  *   bun scripts/test-fireworks-cache-intervals.ts
  *
- *   # Custom GLM deployment with a faster sweep
- *   bun scripts/test-fireworks-cache-intervals.ts glm-5.1 --deployment --intervals=30,60,120,300,600
+ *   # Custom Kimi deployment with a faster sweep
+ *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.5 --deployment --intervals=30,60,120,300,600
  *
  *   # Long sweep up to 1 hour
- *   bun scripts/test-fireworks-cache-intervals.ts glm-5.1 --deployment --intervals=60,300,600,1200,1800,2700,3600
+ *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.5 --deployment --intervals=60,300,600,1200,1800,2700,3600
  */
 
 export {}
@@ -39,7 +39,7 @@ const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
 type ModelConfig = {
   id: string
   standardModel: string
-  deploymentModel: string
+  deploymentModel?: string
   inputCostPerToken: number
   cachedInputCostPerToken: number
   outputCostPerToken: number
@@ -49,7 +49,6 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
   'glm-5.1': {
     id: 'z-ai/glm-5.1',
     standardModel: 'accounts/fireworks/models/glm-5p1',
-    deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
     inputCostPerToken: 1.4 / 1_000_000,
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.4 / 1_000_000,
@@ -57,7 +56,7 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
   'kimi-k2.5': {
     id: 'moonshotai/kimi-k2.5',
     standardModel: 'accounts/fireworks/models/kimi-k2p5',
-    deploymentModel: 'accounts/james-65d217/deployments/mx8l5rq2',
+    deploymentModel: 'accounts/james-65d217/deployments/y5b3z17u',
     inputCostPerToken: 0.6 / 1_000_000,
     cachedInputCostPerToken: 0.1 / 1_000_000,
     outputCostPerToken: 3.0 / 1_000_000,
@@ -117,8 +116,12 @@ function parseArgs(): {
 const { modelKey, useDeployment: USE_DEPLOYMENT, intervals: INTERVALS_SEC } =
   parseArgs()
 const MODEL = MODEL_CONFIGS[modelKey]
+if (USE_DEPLOYMENT && !MODEL.deploymentModel) {
+  console.error(`❌ No custom deployment configured for ${MODEL.id}`)
+  process.exit(1)
+}
 const FIREWORKS_MODEL = USE_DEPLOYMENT
-  ? MODEL.deploymentModel
+  ? MODEL.deploymentModel!
   : MODEL.standardModel
 const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
 const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
index 67028228da..e506ccf022 100644
--- a/scripts/test-fireworks-long.ts
+++ b/scripts/test-fireworks-long.ts
@@ -11,13 +11,19 @@
  *
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
+ *   kimi-k2.5           — moonshotai/kimi-k2.5
  *   minimax             — minimax/minimax-m2.5
+ *   minimax-m2.7        — minimax/minimax-m2.7
  *
  * Flags:
  *   --deployment   Use custom deployment instead of serverless (standard API)
  *                  Serverless is the default
+ * Examples:
+ *   bun scripts/test-fireworks-long.ts kimi-k2.5 --deployment
  */
 
+import { FIREWORKS_DEPLOYMENT_MAP } from '../web/src/llm-api/fireworks-config'
+
 export { }
 
 const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
@@ -25,7 +31,7 @@ const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
 type ModelConfig = {
   id: string              // OpenRouter-style ID (for display)
   standardModel: string  // Fireworks standard API model ID
-  deploymentModel: string // Fireworks custom deployment model ID
+  deploymentModel?: string // Fireworks custom deployment model ID
   inputCostPerToken: number
   cachedInputCostPerToken: number
   outputCostPerToken: number
@@ -35,7 +41,6 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
   'glm-5.1': {
     id: 'z-ai/glm-5.1',
     standardModel: 'accounts/fireworks/models/glm-5p1',
-    deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
     inputCostPerToken: 1.40 / 1_000_000,
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.40 / 1_000_000,
@@ -43,7 +48,7 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
   'kimi-k2.5': {
     id: 'moonshotai/kimi-k2.5',
     standardModel: 'accounts/fireworks/models/kimi-k2p5',
-    deploymentModel: 'accounts/james-65d217/deployments/mx8l5rq2',
+    deploymentModel: FIREWORKS_DEPLOYMENT_MAP['moonshotai/kimi-k2.5'],
     inputCostPerToken: 0.60 / 1_000_000,
     cachedInputCostPerToken: 0.10 / 1_000_000,
     outputCostPerToken: 3.00 / 1_000_000,
@@ -67,9 +72,19 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
 }
 
 const DEFAULT_MODEL = 'glm-5.1'
+const MODEL_ALIASES: Record<string, keyof typeof MODEL_CONFIGS> = {
+  glm: 'glm-5.1',
+  'z-ai/glm-5.1': 'glm-5.1',
+  kimi: 'kimi-k2.5',
+  'kimi-k2': 'kimi-k2.5',
+  'moonshotai/kimi-k2.5': 'kimi-k2.5',
+  'minimax/minimax-m2.5': 'minimax',
+  'minimax/minimax-m2.7': 'minimax-m2.7',
+}
 
 function getModelConfig(modelArg?: string): ModelConfig {
-  const key = modelArg ?? DEFAULT_MODEL
+  const rawKey = modelArg ?? DEFAULT_MODEL
+  const key = MODEL_ALIASES[rawKey] ?? rawKey
   const config = MODEL_CONFIGS[key]
   if (!config) {
     console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`)
@@ -83,7 +98,11 @@ const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !=
 const MODEL = getModelConfig(modelArg)
 
 // Default to serverless (standard API); use --deployment for custom deployment
-const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel
+if (USE_DEPLOYMENT && !MODEL.deploymentModel) {
+  console.error(`❌ No custom deployment configured for ${MODEL.id}`)
+  process.exit(1)
+}
+const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel! : MODEL.standardModel
 const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
 const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
 const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken
@@ -455,4 +474,4 @@ async function main() {
   console.log('Done!')
 }
 
-main()
\ No newline at end of file
+main()
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index 51a3eb46be..04f9b570cd 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,6 +1,8 @@
 import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
 import { NextRequest } from 'next/server'
 
+import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models'
+
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
 
 import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
@@ -528,7 +530,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-new-free' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'minimax/minimax-m2.7',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -555,6 +557,76 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(response.status).toBe(200)
     })
 
+    it('lets freebuff use Kimi K2.5 through Fireworks availability rules', async () => {
+      const fetchedBodies: Record<string, unknown>[] = []
+      const fetchViaFireworks = mock(
+        async (_url: string | URL | Request, init?: RequestInit) => {
+          fetchedBodies.push(JSON.parse(init?.body as string))
+          return new Response(
+            JSON.stringify({
+              id: 'test-id',
+              model: 'accounts/james-65d217/deployments/y5b3z17u',
+              choices: [{ message: { content: 'test response' } }],
+              usage: {
+                prompt_tokens: 10,
+                completion_tokens: 20,
+                total_tokens: 30,
+              },
+            }),
+            {
+              status: 200,
+              headers: { 'Content-Type': 'application/json' },
+            },
+          )
+        },
+      ) as unknown as typeof globalThis.fetch
+
+      const req = new NextRequest(
+        'http://localhost:3000/api/v1/chat/completions',
+        {
+          method: 'POST',
+          headers: { Authorization: 'Bearer test-api-key-new-free' },
+          body: JSON.stringify({
+            model: 'moonshotai/kimi-k2.5',
+            stream: false,
+            codebuff_metadata: {
+              run_id: 'run-free',
+              client_id: 'test-client-id-123',
+              cost_mode: 'free',
+            },
+          }),
+        },
+      )
+
+      const response = await postChatCompletions({
+        req,
+        getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+        logger: mockLogger,
+        trackEvent: mockTrackEvent,
+        getUserUsageData: mockGetUserUsageData,
+        getAgentRunFromId: mockGetAgentRunFromId,
+        fetch: fetchViaFireworks,
+        insertMessageBigquery: mockInsertMessageBigquery,
+        loggerWithContext: mockLoggerWithContext,
+        checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+      })
+
+      const body = await response.json()
+      if (isFreebuffDeploymentHours()) {
+        expect(response.status).toBe(200)
+        expect(fetchedBodies).toHaveLength(1)
+        expect(fetchedBodies[0].model).toBe(
+          'accounts/james-65d217/deployments/y5b3z17u',
+        )
+        expect(body.model).toBe('moonshotai/kimi-k2.5')
+        expect(body.provider).toBe('Fireworks')
+      } else {
+        expect(response.status).toBe(503)
+        expect(fetchedBodies).toHaveLength(0)
+        expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+      }
+    })
+
     it('skips credit check when in FREE mode even with 0 credits', async () => {
       const req = new NextRequest(
         'http://localhost:3000/api/v1/chat/completions',
@@ -562,7 +634,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-no-credits' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'minimax/minimax-m2.7',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -671,7 +743,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-new-free' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'minimax/minimax-m2.7',
             stream: true,
             codebuff_metadata: {
               run_id: 'run-123',
@@ -853,7 +925,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-123' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'minimax/minimax-m2.7',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index 657c17f6da..2d33a1ae09 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -3,6 +3,7 @@ import { describe, expect, test } from 'bun:test'
 import {
   deleteFreebuffSession,
   FREEBUFF_INSTANCE_HEADER,
+  FREEBUFF_MODEL_HEADER,
   getFreebuffSession,
   postFreebuffSession,
 } from '../_handlers'
@@ -12,16 +13,17 @@ import type { SessionDeps } from '@/server/free-session/public-api'
 import type { InternalSessionRow } from '@/server/free-session/types'
 import type { NextRequest } from 'next/server'
 
-const DEFAULT_MODEL = 'z-ai/glm-5.1'
+const DEFAULT_MODEL = 'minimax/minimax-m2.7'
 
 function makeReq(
   apiKey: string | null,
-  opts: { instanceId?: string; cfCountry?: string } = {},
+  opts: { instanceId?: string; cfCountry?: string; model?: string } = {},
 ): NextRequest {
   const headers = new Headers()
   if (apiKey) headers.set('Authorization', `Bearer ${apiKey}`)
   if (opts.instanceId) headers.set(FREEBUFF_INSTANCE_HEADER, opts.instanceId)
   if (opts.cfCountry) headers.set('cf-ipcountry', opts.cfCountry)
+  if (opts.model) headers.set(FREEBUFF_MODEL_HEADER, opts.model)
   return {
     headers,
   } as unknown as NextRequest
@@ -153,6 +155,19 @@ describe('POST /api/v1/freebuff/session', () => {
     expect(body.status).toBe('queued')
   })
 
+  test('returns model_unavailable for Kimi outside deployment hours', async () => {
+    const sessionDeps = makeSessionDeps()
+    const resp = await postFreebuffSession(
+      makeReq('ok', { model: 'moonshotai/kimi-k2.5' }),
+      makeDeps(sessionDeps, 'u1'),
+    )
+    expect(resp.status).toBe(409)
+    const body = await resp.json()
+    expect(body.status).toBe('model_unavailable')
+    expect(body.availableHours).toBe('9am ET-5pm PT')
+    expect(sessionDeps.rows.size).toBe(0)
+  })
+
   // Banned bots with valid API keys were POSTing every few seconds and
   // inflating queueDepth between the 15s admission-tick sweeps. Rejecting at
   // the HTTP layer with 403 (terminal, like country_blocked) keeps them out
diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts
index ec17568a33..6f93e92825 100644
--- a/web/src/app/api/v1/freebuff/session/_handlers.ts
+++ b/web/src/app/api/v1/freebuff/session/_handlers.ts
@@ -138,12 +138,17 @@ export async function postFreebuffSession(
       model: requestedModel,
       deps: deps.sessionDeps,
     })
-    // model_locked is a 409 so it's distinguishable from a normal queued/active
-    // response on the client. banned is a 403 (terminal, mirrors country_blocked)
-    // so older CLIs that don't know the status fall into their `!resp.ok` error
-    // path and back off instead of tight-polling on the unrecognized 200 body.
+    // model_locked / model_unavailable are 409 so they're distinguishable from
+    // normal queued/active responses on the client. banned is a 403 (terminal,
+    // mirrors country_blocked) so older CLIs that don't know the status fall
+    // into their `!resp.ok` error path and back off instead of tight-polling
+    // on the unrecognized 200 body.
     const status =
-      state.status === 'model_locked' ? 409 : state.status === 'banned' ? 403 : 200
+      state.status === 'model_locked' || state.status === 'model_unavailable'
+        ? 409
+        : state.status === 'banned'
+          ? 403
+          : 200
     return NextResponse.json(state, { status })
   } catch (error) {
     return serverError(deps, 'POST', auth.userId, error)
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index 9ed91fd0a6..99078f5284 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -3,7 +3,7 @@ import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test'
 import {
   createFireworksRequestWithFallback,
   DEPLOYMENT_COOLDOWN_MS,
-  FireworksError,
+  isDeploymentHours,
   isDeploymentCoolingDown,
   markDeploymentScalingUp,
   resetDeploymentCooldown,
@@ -11,8 +11,12 @@ import {
 
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 
-const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1'
-const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea'
+const STANDARD_MODEL_ID = 'accounts/fireworks/models/kimi-k2p5'
+const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/y5b3z17u'
+const IN_DEPLOYMENT_HOURS = new Date('2026-04-17T16:00:00Z') // Friday, 12pm ET / 9am PT
+const BEFORE_DEPLOYMENT_HOURS = new Date('2026-04-17T12:59:00Z') // Friday, 8:59am ET
+const AFTER_DEPLOYMENT_HOURS = new Date('2026-04-18T00:00:00Z') // Friday, 5pm PT
+const WEEKEND_DEPLOYMENT_HOURS = new Date('2026-04-18T16:00:00Z') // Saturday
 
 function createMockLogger(): Logger {
   return {
@@ -23,18 +27,19 @@ function createMockLogger(): Logger {
   }
 }
 
-// Helper: create a Date at a specific ET hour using a known EDT date (June 2025, UTC-4)
-function dateAtEtHour(hour: number): Date {
-  // June 15, 2025 is EDT (UTC-4), so ET hour H = UTC hour H+4
-  const utcHour = hour + 4
-  if (utcHour < 24) {
-    return new Date(`2025-06-15T${String(utcHour).padStart(2, '0')}:30:00Z`)
-  }
-  // Wraps to next day
-  return new Date(`2025-06-16T${String(utcHour - 24).padStart(2, '0')}:30:00Z`)
-}
-
 describe('Fireworks deployment routing', () => {
+  describe('deployment hours', () => {
+    it('is active from 9am ET until before 5pm PT on weekdays', () => {
+      expect(isDeploymentHours(BEFORE_DEPLOYMENT_HOURS)).toBe(false)
+      expect(isDeploymentHours(IN_DEPLOYMENT_HOURS)).toBe(true)
+      expect(isDeploymentHours(AFTER_DEPLOYMENT_HOURS)).toBe(false)
+    })
+
+    it('is inactive on weekends', () => {
+      expect(isDeploymentHours(WEEKEND_DEPLOYMENT_HOURS)).toBe(false)
+    })
+  })
+
   describe('deployment cooldown', () => {
     beforeEach(() => {
       resetDeploymentCooldown()
@@ -78,32 +83,10 @@ describe('Fireworks deployment routing', () => {
     })
 
     const minimalBody = {
-      model: 'z-ai/glm-5.1',
+      model: 'moonshotai/kimi-k2.5',
       messages: [{ role: 'user' as const, content: 'test' }],
     }
 
-    function spyDeploymentHours(inHours: boolean) {
-      // Control isDeploymentHours by mocking Date.prototype.toLocaleString
-      // When called with the ET timezone options, return an hour inside or outside the window
-      const original = Date.prototype.toLocaleString
-      const spy = {
-        restore: () => {
-          Date.prototype.toLocaleString = original
-        },
-      }
-      Date.prototype.toLocaleString = function (
-        this: Date,
-        ...args: Parameters<Date['toLocaleString']>
-      ) {
-        const options = args[1] as Intl.DateTimeFormatOptions | undefined
-        if (options?.timeZone === 'America/New_York' && options?.hour === 'numeric') {
-          return inHours ? '14' : '3'
-        }
-        return original.apply(this, args)
-      }
-      return spy
-    }
-
     it('uses standard API when custom deployment is disabled', async () => {
       const fetchCalls: string[] = []
 
@@ -115,7 +98,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'z-ai/glm-5.1',
+        originalModel: 'moonshotai/kimi-k2.5',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -128,7 +111,6 @@ describe('Fireworks deployment routing', () => {
     })
 
     it('tries custom deployment during deployment hours', async () => {
-      const spy = spyDeploymentHours(true)
       const fetchCalls: string[] = []
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -137,160 +119,115 @@ describe('Fireworks deployment routing', () => {
         return new Response(JSON.stringify({ ok: true }), { status: 200 })
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(1)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'moonshotai/kimi-k2.5',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toHaveLength(1)
+      expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
     })
 
-    it('falls back to standard API on 503 DEPLOYMENT_SCALING_UP', async () => {
-      const spy = spyDeploymentHours(true)
+    it('returns deployment 503 on DEPLOYMENT_SCALING_UP without serverless fallback', async () => {
       const fetchCalls: string[] = []
-      let callCount = 0
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
         const body = JSON.parse(init?.body as string)
         fetchCalls.push(body.model)
-        callCount++
-
-        if (callCount === 1) {
-          return new Response(
-            JSON.stringify({
-              error: {
-                message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
-                code: 'DEPLOYMENT_SCALING_UP',
-                type: 'error',
-              },
-            }),
-            { status: 503, statusText: 'Service Unavailable' },
-          )
-        }
-
-        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+        return new Response(
+          JSON.stringify({
+            error: {
+              message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
+              code: 'DEPLOYMENT_SCALING_UP',
+              type: 'error',
+            },
+          }),
+          { status: 503, statusText: 'Service Unavailable' },
+        )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(2)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-        expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
-        // Verify cooldown was activated
-        expect(isDeploymentCoolingDown()).toBe(true)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'moonshotai/kimi-k2.5',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+      expect(isDeploymentCoolingDown()).toBe(true)
     })
 
-    it('falls back to standard API on non-scaling 503 from deployment', async () => {
-      const spy = spyDeploymentHours(true)
+    it('returns non-scaling deployment 503 without serverless fallback', async () => {
       const fetchCalls: string[] = []
-      let callCount = 0
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
         const body = JSON.parse(init?.body as string)
         fetchCalls.push(body.model)
-        callCount++
-
-        if (callCount === 1) {
-          return new Response(
-            JSON.stringify({
-              error: {
-                message: 'Service temporarily unavailable',
-                code: 'SERVICE_UNAVAILABLE',
-                type: 'error',
-              },
-            }),
-            { status: 503, statusText: 'Service Unavailable' },
-          )
-        }
-
-        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+        return new Response(
+          JSON.stringify({
+            error: {
+              message: 'Service temporarily unavailable',
+              code: 'SERVICE_UNAVAILABLE',
+              type: 'error',
+            },
+          }),
+          { status: 503, statusText: 'Service Unavailable' },
+        )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(2)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-        expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
-        // Non-scaling 503 should NOT activate the cooldown
-        expect(isDeploymentCoolingDown()).toBe(false)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'moonshotai/kimi-k2.5',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+      expect(isDeploymentCoolingDown()).toBe(false)
     })
 
-    it('falls back to standard API on 500 Internal Error from deployment', async () => {
-      const spy = spyDeploymentHours(true)
+    it('returns 500 Internal Error from deployment without serverless fallback', async () => {
       const fetchCalls: string[] = []
-      let callCount = 0
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
         const body = JSON.parse(init?.body as string)
         fetchCalls.push(body.model)
-        callCount++
-
-        if (callCount === 1) {
-          return new Response(
-            JSON.stringify({ error: 'Internal error' }),
-            { status: 500, statusText: 'Internal Server Error' },
-          )
-        }
-
-        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+        return new Response(
+          JSON.stringify({ error: 'Internal error' }),
+          { status: 500, statusText: 'Internal Server Error' },
+        )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(2)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-        expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
-        expect(isDeploymentCoolingDown()).toBe(false)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'moonshotai/kimi-k2.5',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(500)
+      expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+      expect(isDeploymentCoolingDown()).toBe(false)
     })
 
-    it('skips deployment during cooldown and goes straight to standard API', async () => {
-      const spy = spyDeploymentHours(true)
+    it('returns cooldown error without serverless fallback', async () => {
       markDeploymentScalingUp()
 
       const fetchCalls: string[] = []
@@ -300,26 +237,21 @@ describe('Fireworks deployment routing', () => {
         return new Response(JSON.stringify({ ok: true }), { status: 200 })
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(1)
-        expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'moonshotai/kimi-k2.5',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      expect(fetchCalls).toHaveLength(0)
     })
 
     it('uses standard API for models without a custom deployment', async () => {
-      const spy = spyDeploymentHours(true)
       const fetchCalls: string[] = []
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -328,27 +260,43 @@ describe('Fireworks deployment routing', () => {
         return new Response(JSON.stringify({ ok: true }), { status: 200 })
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: { ...minimalBody, model: 'some-other/model' } as never,
-          originalModel: 'some-other/model',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(1)
-        // Model without mapping falls through to the original model
-        expect(fetchCalls[0]).toBe('some-other/model')
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: { ...minimalBody, model: 'some-other/model' } as never,
+        originalModel: 'some-other/model',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: BEFORE_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toHaveLength(1)
+      // Model without mapping falls through to the original model
+      expect(fetchCalls[0]).toBe('some-other/model')
+    })
+
+    it('returns an availability error for deployment models outside hours', async () => {
+      const mockFetch = mock(async () => {
+        throw new Error('should not fetch outside deployment hours')
+      }) as unknown as typeof globalThis.fetch
+
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'moonshotai/kimi-k2.5',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: BEFORE_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      const body = await response.json()
+      expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
     })
 
     it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => {
-      const spy = spyDeploymentHours(true)
       const fetchCalls: string[] = []
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -360,23 +308,20 @@ describe('Fireworks deployment routing', () => {
         )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        // Non-5xx errors from deployment are returned as-is (caller handles them)
-        expect(response.status).toBe(429)
-        expect(fetchCalls).toHaveLength(1)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'moonshotai/kimi-k2.5',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      // Non-5xx errors from deployment are returned as-is (caller handles them)
+      expect(response.status).toBe(429)
+      expect(fetchCalls).toHaveLength(1)
+      expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
     })
 
     it('transforms reasoning to reasoning_effort (defaults to medium)', async () => {
@@ -393,7 +338,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { enabled: true },
         } as never,
-        originalModel: 'z-ai/glm-5.1',
+        originalModel: 'moonshotai/kimi-k2.5',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -419,7 +364,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { effort: 'high' },
         } as never,
-        originalModel: 'z-ai/glm-5.1',
+        originalModel: 'moonshotai/kimi-k2.5',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -445,7 +390,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { enabled: false, effort: 'high' },
         } as never,
-        originalModel: 'z-ai/glm-5.1',
+        originalModel: 'moonshotai/kimi-k2.5',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -472,7 +417,7 @@ describe('Fireworks deployment routing', () => {
           reasoning: { effort: 'high' },
           tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }],
         } as never,
-        originalModel: 'z-ai/glm-5.1',
+        originalModel: 'moonshotai/kimi-k2.5',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -498,7 +443,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning_effort: 'low',
         } as never,
-        originalModel: 'z-ai/glm-5.1',
+        originalModel: 'moonshotai/kimi-k2.5',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -524,7 +469,7 @@ describe('Fireworks deployment routing', () => {
           reasoning_effort: 'low',
           tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }],
         } as never,
-        originalModel: 'z-ai/glm-5.1',
+        originalModel: 'moonshotai/kimi-k2.5',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -535,41 +480,31 @@ describe('Fireworks deployment routing', () => {
       expect(fetchedBodies[0].reasoning_effort).toBe('low')
     })
 
-    it('logs when trying deployment and when falling back on 5xx', async () => {
-      const spy = spyDeploymentHours(true)
-      let callCount = 0
-
+    it('logs when trying deployment and when deployment returns 5xx', async () => {
       const mockFetch = mock(async () => {
-        callCount++
-        if (callCount === 1) {
-          return new Response(
-            JSON.stringify({
-              error: {
-                message: 'Scaling up',
-                code: 'DEPLOYMENT_SCALING_UP',
-                type: 'error',
-              },
-            }),
-            { status: 503, statusText: 'Service Unavailable' },
-          )
-        }
-        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+        return new Response(
+          JSON.stringify({
+            error: {
+              message: 'Scaling up',
+              code: 'DEPLOYMENT_SCALING_UP',
+              type: 'error',
+            },
+          }),
+          { status: 503, statusText: 'Service Unavailable' },
+        )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(logger.info).toHaveBeenCalledTimes(2)
-      } finally {
-        spy.restore()
-      }
+      await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'moonshotai/kimi-k2.5',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(logger.info).toHaveBeenCalledTimes(2)
     })
   })
 })
diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts
index fb6d595801..6856f3f347 100644
--- a/web/src/llm-api/fireworks-config.ts
+++ b/web/src/llm-api/fireworks-config.ts
@@ -10,7 +10,6 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217'
 
 export const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
   // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
-  // 'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/mx8l5rq2',
+  'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/y5b3z17u',
   // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd',
-  'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
 }
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 6e304638d7..138671c8aa 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -1,5 +1,9 @@
 import { Agent } from 'undici'
 
+import {
+  FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+  isFreebuffDeploymentHours,
+} from '@codebuff/common/constants/freebuff-models'
 import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
 import { getErrorObject } from '@codebuff/common/util/error'
 import { env } from '@codebuff/internal/env'
@@ -38,9 +42,9 @@ const FIREWORKS_MODEL_MAP: Record<string, string> = {
 /** Flag to enable custom Fireworks deployments (set to false to use global API only) */
 const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true
 
-/** Check if current time is within deployment hours (always enabled) */
-export function isDeploymentHours(_now: Date = new Date()): boolean {
-  return true
+/** Check if current time is within deployment hours: Mon-Fri, 9am ET to 5pm PT. */
+export function isDeploymentHours(now: Date = new Date()): boolean {
+  return isFreebuffDeploymentHours(now)
 }
 
 /**
@@ -173,7 +177,7 @@ const FIREWORKS_PRICING_MAP: Record<string, FireworksPricing> = {
 }
 
 function getFireworksPricing(model: string): FireworksPricing {
-  return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1']
+  return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_PRICING_MAP['z-ai/glm-5.1']
 }
 
 function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
@@ -708,9 +712,10 @@ async function parseFireworksError(response: Response): Promise<FireworksError>
 }
 
 /**
- * Tries the custom Fireworks deployment during business hours (10am–8pm ET),
- * falling back to the standard API if the deployment returns 503 DEPLOYMENT_SCALING_UP.
- * Outside deployment hours or during cooldown, goes straight to the standard API.
+ * Uses custom Fireworks deployments only during deployment hours. Deployment
+ * mapped models never fall back to the serverless API outside hours, during
+ * cooldown, or after deployment 5xxs; those states surface as provider errors
+ * so freebuff can offer MiniMax as the always-on option.
  */
 export async function createFireworksRequestWithFallback(params: {
   body: ChatCompletionRequestBody
@@ -719,17 +724,41 @@ export async function createFireworksRequestWithFallback(params: {
   logger: Logger
   useCustomDeployment?: boolean
   sessionId: string
+  now?: Date
 }): Promise<Response> {
   const { body, originalModel, fetch, logger, sessionId } = params
+  const now = params.now ?? new Date()
   const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT
   const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel]
-  const shouldTryDeployment =
-    useCustomDeployment &&
-    deploymentModelId &&
-    isDeploymentHours() &&
-    !isDeploymentCoolingDown()
+  const hasDeployment = useCustomDeployment && Boolean(deploymentModelId)
+
+  if (hasDeployment && !isDeploymentHours(now)) {
+    return new Response(
+      JSON.stringify({
+        error: {
+          message: `${originalModel} is only available during ${FREEBUFF_DEPLOYMENT_HOURS_LABEL}. Use minimax/minimax-m2.7 outside those hours.`,
+          code: 'DEPLOYMENT_OUTSIDE_HOURS',
+          type: 'availability_error',
+        },
+      }),
+      { status: 503, statusText: 'Service Unavailable' },
+    )
+  }
 
-  if (shouldTryDeployment) {
+  if (hasDeployment && isDeploymentCoolingDown()) {
+    return new Response(
+      JSON.stringify({
+        error: {
+          message: `${originalModel} deployment is temporarily unavailable. Use minimax/minimax-m2.7 while it recovers.`,
+          code: 'DEPLOYMENT_COOLDOWN',
+          type: 'availability_error',
+        },
+      }),
+      { status: 503, statusText: 'Service Unavailable' },
+    )
+  }
+
+  if (hasDeployment && deploymentModelId) {
     logger.info(
       { model: originalModel, deploymentModel: deploymentModelId },
       'Trying Fireworks custom deployment',
@@ -746,15 +775,18 @@ export async function createFireworksRequestWithFallback(params: {
       const errorText = await response.text()
       logger.info(
         { model: originalModel, status: response.status, errorText: errorText.slice(0, 200) },
-        'Fireworks custom deployment returned 5xx, falling back to standard API',
+        'Fireworks custom deployment returned 5xx',
       )
       if (errorText.includes('DEPLOYMENT_SCALING_UP')) {
         markDeploymentScalingUp()
       }
-      // Fall through to standard API request below
-    } else {
-      return response
+      return new Response(errorText, {
+        status: response.status,
+        statusText: response.statusText,
+        headers: response.headers,
+      })
     }
+    return response
   }
 
   return createFireworksRequest({ body, originalModel, fetch, sessionId })
diff --git a/web/src/server/free-session/__tests__/config.test.ts b/web/src/server/free-session/__tests__/config.test.ts
new file mode 100644
index 0000000000..93f5fdcf04
--- /dev/null
+++ b/web/src/server/free-session/__tests__/config.test.ts
@@ -0,0 +1,13 @@
+import { describe, expect, test } from 'bun:test'
+
+import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+
+import { getInstantAdmitCapacity } from '../config'
+
+describe('free session config', () => {
+  test('every selectable freebuff model has instant-admit capacity', () => {
+    for (const model of FREEBUFF_MODELS) {
+      expect(getInstantAdmitCapacity(model.id)).toBeGreaterThan(0)
+    }
+  })
+})
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index a824f6d22b..e0e0aa956b 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -13,7 +13,7 @@ import type { InternalSessionRow } from '../types'
 
 const SESSION_LEN = 60 * 60 * 1000
 const GRACE_MS = 30 * 60 * 1000
-const DEFAULT_MODEL = 'z-ai/glm-5.1'
+const DEFAULT_MODEL = 'minimax/minimax-m2.7'
 
 function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
   rows: Map<string, InternalSessionRow>
@@ -177,19 +177,34 @@ describe('requestSession', () => {
     expect(state.instanceId).toBe('inst-1')
   })
 
+  test('deployment-hours-only model is unavailable outside deployment hours', async () => {
+    const state = await requestSession({
+      userId: 'u1',
+      model: 'moonshotai/kimi-k2.5',
+      deps,
+    })
+    expect(state).toEqual({
+      status: 'model_unavailable',
+      requestedModel: 'moonshotai/kimi-k2.5',
+      availableHours: '9am ET-5pm PT',
+    })
+    expect(deps.rows.size).toBe(0)
+  })
+
   test('queued response includes a per-model depth snapshot for the selector', async () => {
-    // Seed 2 users in glm + 1 in minimax so the returned map captures both.
+    deps._tick(new Date('2026-04-17T16:00:00Z'))
+    // Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both.
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
     await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
-    await requestSession({ userId: 'u3', model: 'minimax/minimax-m2.7', deps })
+    await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.5', deps })
 
     const state = await getSessionState({ userId: 'u1', deps })
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.queueDepthByModel).toEqual({
       [DEFAULT_MODEL]: 2,
-      'minimax/minimax-m2.7': 1,
+      'moonshotai/kimi-k2.5': 1,
     })
   })
 
@@ -264,11 +279,12 @@ describe('requestSession', () => {
   })
 
   test('instant-admit: per-model capacities are independent', async () => {
-    // GLM saturated at 1 active, MiniMax still has room.
+    // MiniMax saturated at 1 active, Kimi still has room.
     const admitDeps = makeDeps({
       getInstantAdmitCapacity: (model) =>
         model === DEFAULT_MODEL ? 1 : 10,
     })
+    admitDeps._tick(new Date('2026-04-17T16:00:00Z'))
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps: admitDeps })
     const s2 = await requestSession({
       userId: 'u2',
@@ -277,7 +293,7 @@ describe('requestSession', () => {
     })
     const s3 = await requestSession({
       userId: 'u3',
-      model: 'minimax/minimax-m2.7',
+      model: 'moonshotai/kimi-k2.5',
       deps: admitDeps,
     })
     expect(s2.status).toBe('queued')
diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts
index 3f3c051d2a..9f0b74c9f9 100644
--- a/web/src/server/free-session/admission.ts
+++ b/web/src/server/free-session/admission.ts
@@ -1,4 +1,7 @@
-import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+import {
+  FREEBUFF_MODELS,
+  isFreebuffModelAvailable,
+} from '@codebuff/common/constants/freebuff-models'
 
 import {
   ADMISSION_TICK_MS,
@@ -111,7 +114,10 @@ export async function runAdmissionTick(
   // advisory locks and a single update each.
   const perModel = await Promise.all(
     models.map(async (model) => {
-      const health = fleet[model] ?? 'healthy'
+      const isRegisteredModel = FREEBUFF_MODELS.some((m) => m.id === model)
+      const health = !isRegisteredModel || isFreebuffModelAvailable(model, now)
+        ? fleet[model] ?? 'healthy'
+        : 'unhealthy'
       const { admitted, skipped } = await deps.admitFromQueue({
         model,
         sessionLengthMs: deps.sessionLengthMs,
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index 85bba7fa6f..c0b4d84c66 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -48,7 +48,7 @@ export function getSessionGraceMs(): number {
  * queue).
  */
 const INSTANT_ADMIT_CAPACITY: Record<string, number> = {
-  'z-ai/glm-5.1': 50,
+  'moonshotai/kimi-k2.5': 100,
   'minimax/minimax-m2.7': 200,
 }
 
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 4505404436..7ea85f2e48 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -1,4 +1,6 @@
 import {
+  FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+  isFreebuffModelAvailable,
   isFreebuffModelId as isSelectableFreebuffModel,
   resolveFreebuffModel,
 } from '@codebuff/common/constants/freebuff-models'
@@ -122,6 +124,11 @@ export type RequestSessionResult =
       currentModel: string
       requestedModel: string
     }
+  | {
+      status: 'model_unavailable'
+      requestedModel: string
+      availableHours: string
+    }
 
 /**
  * Client calls this on CLI startup with the model they want to use.
@@ -152,6 +159,7 @@ export async function requestSession(params: {
 }): Promise<RequestSessionResult> {
   const deps = params.deps ?? defaultDeps
   const model = resolveFreebuffModel(params.model)
+  const now = nowOf(deps)
   if (params.userBanned) {
     return { status: 'banned' }
   }
@@ -161,13 +169,20 @@ export async function requestSession(params: {
   ) {
     return { status: 'disabled' }
   }
+  if (!isFreebuffModelAvailable(model, now)) {
+    return {
+      status: 'model_unavailable',
+      requestedModel: model,
+      availableHours: FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+    }
+  }
 
   let row: InternalSessionRow
   try {
     row = await deps.joinOrTakeOver({
       userId: params.userId,
       model,
-      now: nowOf(deps),
+      now,
     })
   } catch (err) {
     if (err instanceof FreeSessionModelLockedError) {
@@ -199,7 +214,7 @@ export async function requestSession(params: {
           userId: params.userId,
           model,
           sessionLengthMs: deps.sessionLengthMs,
-          now: nowOf(deps),
+          now,
         })
         if (promoted) row = promoted
       }

From 6043ee25a8afd965ec85f3ae41c6c9412385546c Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Fri, 24 Apr 2026 12:23:31 -0700
Subject: [PATCH 2/3] switch to kimi k2.6, 9am ET to 5pm PT

---
 agents/base2/base2.ts                         | 14 +++----
 agents/reviewer/code-reviewer-lite.ts         |  2 +-
 agents/types/agent-definition.ts              |  4 +-
 cli/src/hooks/use-freebuff-session.ts         |  3 +-
 common/src/constants/free-agents.ts           |  6 +--
 common/src/constants/freebuff-models.ts       |  6 +--
 .../types/agent-definition.ts                 |  4 +-
 common/src/types/freebuff-session.ts          |  3 +-
 docs/freebuff-waiting-room.md                 | 10 ++---
 scripts/test-fireworks-cache-intervals.ts     | 14 +++----
 scripts/test-fireworks-long.ts                | 18 ++++-----
 .../completions/__tests__/completions.test.ts | 28 +++++---------
 .../session/__tests__/session.test.ts         |  2 +-
 .../__tests__/fireworks-deployment.test.ts    | 38 ++++++++++---------
 web/src/llm-api/fireworks-config.ts           |  2 +-
 web/src/llm-api/fireworks.ts                  |  4 +-
 .../free-session/__tests__/public-api.test.ts | 10 ++---
 web/src/server/free-session/config.ts         |  2 +-
 18 files changed, 81 insertions(+), 89 deletions(-)

diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index c6f7e15f8a..b1e24efff6 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -25,18 +25,16 @@ export function createBase2(
   const isFree = mode === 'free' || mode === 'lite'
 
   const isSonnet = false
-  const model = isFree ? 'minimax/minimax-m2.7' : 'anthropic/claude-opus-4.7'
+  const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7'
 
   return {
     publisher,
     model,
-    providerOptions: isFree
-      ? {
-          data_collection: 'deny',
-        }
-      : {
-          only: ['amazon-bedrock'],
-        },
+    providerOptions: isFree ? {
+      data_collection: 'deny',
+    } : {
+      only: ['amazon-bedrock'],
+    },
     displayName: 'Buffy the Orchestrator',
     spawnerPrompt:
       'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks',
diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts
index ee017c24e6..888cadf4f7 100644
--- a/agents/reviewer/code-reviewer-lite.ts
+++ b/agents/reviewer/code-reviewer-lite.ts
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
 const definition: SecretAgentDefinition = {
   id: 'code-reviewer-lite',
   publisher,
-  ...createReviewer('minimax/minimax-m2.7'),
+  ...createReviewer('moonshotai/kimi-k2.6'),
 }
 
 export default definition
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
index b28a77c311..2fbfed0a49 100644
--- a/agents/types/agent-definition.ts
+++ b/agents/types/agent-definition.ts
@@ -423,8 +423,8 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
-  | 'moonshotai/kimi-k2.5'
-  | 'moonshotai/kimi-k2.5:nitro'
+  | 'moonshotai/kimi-k2.6'
+  | 'moonshotai/kimi-k2.6:nitro'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
index 225eee2b24..f24fba7b30 100644
--- a/cli/src/hooks/use-freebuff-session.ts
+++ b/cli/src/hooks/use-freebuff-session.ts
@@ -78,7 +78,8 @@ async function callSession(
   }
   // 409 from POST means the selected model cannot be joined right now, either
   // because an active session is locked to another model or because a
-  // deployment-hours-only model is closed. Surface both as non-throw states.
+  // Surface model-switch conflicts and temporary model availability closures
+  // as non-throw states.
   if (resp.status === 409 && method === 'POST') {
     const body = (await resp.json().catch(() => null)) as
       | FreebuffSessionResponse
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index 762202dcca..4a2a4a147e 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
   'base2-free': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.5',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // File exploration agents
@@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Editor for free mode
   'editor-lite': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.5',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // Code reviewer for free mode
   'code-reviewer-lite': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.5',
+    'moonshotai/kimi-k2.6',
   ]),
 }
 
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index d38d187ffc..2f6da2ce0b 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -18,7 +18,7 @@ export interface FreebuffModelOption {
 }
 
 export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT'
-export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.5'
+export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
 
 export const FREEBUFF_MODELS = [
   {
@@ -29,7 +29,7 @@ export const FREEBUFF_MODELS = [
   },
   {
     id: FREEBUFF_KIMI_MODEL_ID,
-    displayName: 'Kimi K2.5',
+    displayName: 'Kimi K2.6',
     tagline: 'Balanced',
     availability: 'deployment_hours',
   },
@@ -83,7 +83,7 @@ export function isFreebuffDeploymentHours(now: Date = new Date()): boolean {
   const eastern = getZonedParts(now, 'America/New_York')
   const pacific = getZonedParts(now, 'America/Los_Angeles')
   if (eastern.weekday === 'Sat' || eastern.weekday === 'Sun') return false
-  return eastern.minutes >= 9 * 60 && pacific.minutes < 24 * 60
+  return eastern.minutes >= 9 * 60 && pacific.minutes < 17 * 60
 }
 
 export function isFreebuffModelAvailable(
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index b28a77c311..2fbfed0a49 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -423,8 +423,8 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
-  | 'moonshotai/kimi-k2.5'
-  | 'moonshotai/kimi-k2.5:nitro'
+  | 'moonshotai/kimi-k2.6'
+  | 'moonshotai/kimi-k2.6:nitro'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
index 43cd3eaa25..d141000a40 100644
--- a/common/src/types/freebuff-session.ts
+++ b/common/src/types/freebuff-session.ts
@@ -93,8 +93,7 @@ export type FreebuffSessionServerResponse =
       requestedModel: string
     }
   | {
-      /** Requested model is valid but not selectable right now. Currently
-       *  used for deployment-hours-only models such as Kimi K2.5. */
+      /** Requested model is valid but not selectable right now. */
       status: 'model_unavailable'
       requestedModel: string
       availableHours: string
diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md
index 73fa779270..153487897a 100644
--- a/docs/freebuff-waiting-room.md
+++ b/docs/freebuff-waiting-room.md
@@ -5,7 +5,7 @@
 The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
 
 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
-2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; Kimi K2.5 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
+2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; Kimi K2.6 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
 
 Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
@@ -149,8 +149,8 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r
 | Constant | Location | Default | Purpose |
 |---|---|---|---|
 | `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. |
-| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `kimi-k2.5` | Selectable models; each gets its own queue and admission slot. |
-| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `kimi-k2.5` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
+| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `kimi-k2.6` | Selectable models; each gets its own queue and admission slot. |
+| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `kimi-k2.6` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
 | `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. |
 | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime |
 | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
@@ -185,7 +185,7 @@ Response shapes:
   "queueDepth": 43,        // size of this model's queue
   "queueDepthByModel": {   // snapshot of every model's queue — powers the
     "minimax/minimax-m2.7": 43, //  "N ahead" hint in the selector. Missing
-    "moonshotai/kimi-k2.5": 4   //  entries should be treated as 0.
+    "moonshotai/kimi-k2.6": 4   //  entries should be treated as 0.
   },
   "estimatedWaitMs": 384000,
   "queuedAt": "2026-04-17T12:00:00Z"
@@ -285,7 +285,7 @@ waitMs = (position - 1) * 24_000
 - Position 1 → 0 (next tick admits you)
 - Position 2 → 24s, and so on.
 
-`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `moonshotai/kimi-k2.5` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a Kimi Fireworks incident or outside 9am ET-5pm PT, only Kimi's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
+`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `moonshotai/kimi-k2.6` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a Kimi Fireworks incident or outside 9am ET-5pm PT, only Kimi's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
 
 ## CLI Integration (frontend-side contract)
 
diff --git a/scripts/test-fireworks-cache-intervals.ts b/scripts/test-fireworks-cache-intervals.ts
index 92d7ac49e3..44bffd4b75 100644
--- a/scripts/test-fireworks-cache-intervals.ts
+++ b/scripts/test-fireworks-cache-intervals.ts
@@ -13,7 +13,7 @@
  *
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
- *   kimi-k2.5           — moonshotai/kimi-k2.5
+ *   kimi-k2.6           — moonshotai/kimi-k2.6
  *   minimax             — minimax/minimax-m2.5
  *
  * Flags:
@@ -26,10 +26,10 @@
  *   bun scripts/test-fireworks-cache-intervals.ts
  *
  *   # Custom Kimi deployment with a faster sweep
- *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.5 --deployment --intervals=30,60,120,300,600
+ *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.6 --deployment --intervals=30,60,120,300,600
  *
  *   # Long sweep up to 1 hour
- *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.5 --deployment --intervals=60,300,600,1200,1800,2700,3600
+ *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.6 --deployment --intervals=60,300,600,1200,1800,2700,3600
  */
 
 export {}
@@ -53,10 +53,10 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.4 / 1_000_000,
   },
-  'kimi-k2.5': {
-    id: 'moonshotai/kimi-k2.5',
-    standardModel: 'accounts/fireworks/models/kimi-k2p5',
-    deploymentModel: 'accounts/james-65d217/deployments/y5b3z17u',
+  'kimi-k2.6': {
+    id: 'moonshotai/kimi-k2.6',
+    standardModel: 'accounts/fireworks/models/kimi-k2p6',
+    deploymentModel: 'accounts/james-65d217/deployments/j8ar2x0y',
     inputCostPerToken: 0.6 / 1_000_000,
     cachedInputCostPerToken: 0.1 / 1_000_000,
     outputCostPerToken: 3.0 / 1_000_000,
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
index e506ccf022..45561fbc42 100644
--- a/scripts/test-fireworks-long.ts
+++ b/scripts/test-fireworks-long.ts
@@ -11,7 +11,7 @@
  *
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
- *   kimi-k2.5           — moonshotai/kimi-k2.5
+ *   kimi-k2.6           — moonshotai/kimi-k2.6
  *   minimax             — minimax/minimax-m2.5
  *   minimax-m2.7        — minimax/minimax-m2.7
  *
@@ -19,7 +19,7 @@
  *   --deployment   Use custom deployment instead of serverless (standard API)
  *                  Serverless is the default
  * Examples:
- *   bun scripts/test-fireworks-long.ts kimi-k2.5 --deployment
+ *   bun scripts/test-fireworks-long.ts kimi-k2.6 --deployment
  */
 
 import { FIREWORKS_DEPLOYMENT_MAP } from '../web/src/llm-api/fireworks-config'
@@ -45,10 +45,10 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.40 / 1_000_000,
   },
-  'kimi-k2.5': {
-    id: 'moonshotai/kimi-k2.5',
-    standardModel: 'accounts/fireworks/models/kimi-k2p5',
-    deploymentModel: FIREWORKS_DEPLOYMENT_MAP['moonshotai/kimi-k2.5'],
+  'kimi-k2.6': {
+    id: 'moonshotai/kimi-k2.6',
+    standardModel: 'accounts/fireworks/models/kimi-k2p6',
+    deploymentModel: FIREWORKS_DEPLOYMENT_MAP['moonshotai/kimi-k2.6'],
     inputCostPerToken: 0.60 / 1_000_000,
     cachedInputCostPerToken: 0.10 / 1_000_000,
     outputCostPerToken: 3.00 / 1_000_000,
@@ -75,9 +75,9 @@ const DEFAULT_MODEL = 'glm-5.1'
 const MODEL_ALIASES: Record<string, keyof typeof MODEL_CONFIGS> = {
   glm: 'glm-5.1',
   'z-ai/glm-5.1': 'glm-5.1',
-  kimi: 'kimi-k2.5',
-  'kimi-k2': 'kimi-k2.5',
-  'moonshotai/kimi-k2.5': 'kimi-k2.5',
+  kimi: 'kimi-k2.6',
+  'kimi-k2': 'kimi-k2.6',
+  'moonshotai/kimi-k2.6': 'kimi-k2.6',
   'minimax/minimax-m2.5': 'minimax',
   'minimax/minimax-m2.7': 'minimax-m2.7',
 }
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index 04f9b570cd..5f4490ff2a 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,8 +1,6 @@
 import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
 import { NextRequest } from 'next/server'
 
-import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models'
-
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
 
 import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
@@ -557,7 +555,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(response.status).toBe(200)
     })
 
-    it('lets freebuff use Kimi K2.5 through Fireworks availability rules', async () => {
+    it('lets freebuff use Kimi K2.6 through Fireworks availability rules', async () => {
       const fetchedBodies: Record<string, unknown>[] = []
       const fetchViaFireworks = mock(
         async (_url: string | URL | Request, init?: RequestInit) => {
@@ -565,7 +563,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           return new Response(
             JSON.stringify({
               id: 'test-id',
-              model: 'accounts/james-65d217/deployments/y5b3z17u',
+              model: 'accounts/james-65d217/deployments/j8ar2x0y',
               choices: [{ message: { content: 'test response' } }],
               usage: {
                 prompt_tokens: 10,
@@ -587,7 +585,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-new-free' },
           body: JSON.stringify({
-            model: 'moonshotai/kimi-k2.5',
+            model: 'moonshotai/kimi-k2.6',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -612,19 +610,13 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       })
 
       const body = await response.json()
-      if (isFreebuffDeploymentHours()) {
-        expect(response.status).toBe(200)
-        expect(fetchedBodies).toHaveLength(1)
-        expect(fetchedBodies[0].model).toBe(
-          'accounts/james-65d217/deployments/y5b3z17u',
-        )
-        expect(body.model).toBe('moonshotai/kimi-k2.5')
-        expect(body.provider).toBe('Fireworks')
-      } else {
-        expect(response.status).toBe(503)
-        expect(fetchedBodies).toHaveLength(0)
-        expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
-      }
+      expect(response.status).toBe(200)
+      expect(fetchedBodies).toHaveLength(1)
+      expect(fetchedBodies[0].model).toBe(
+        'accounts/james-65d217/deployments/j8ar2x0y',
+      )
+      expect(body.model).toBe('moonshotai/kimi-k2.6')
+      expect(body.provider).toBe('Fireworks')
     })
 
     it('skips credit check when in FREE mode even with 0 credits', async () => {
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index 2d33a1ae09..bbe31b64e0 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -158,7 +158,7 @@ describe('POST /api/v1/freebuff/session', () => {
   test('returns model_unavailable for Kimi outside deployment hours', async () => {
     const sessionDeps = makeSessionDeps()
     const resp = await postFreebuffSession(
-      makeReq('ok', { model: 'moonshotai/kimi-k2.5' }),
+      makeReq('ok', { model: 'moonshotai/kimi-k2.6' }),
       makeDeps(sessionDeps, 'u1'),
     )
     expect(resp.status).toBe(409)
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index 99078f5284..7e213e9e66 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -11,11 +11,12 @@ import {
 
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 
-const STANDARD_MODEL_ID = 'accounts/fireworks/models/kimi-k2p5'
-const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/y5b3z17u'
+const STANDARD_MODEL_ID = 'accounts/fireworks/models/kimi-k2p6'
+const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/j8ar2x0y'
 const IN_DEPLOYMENT_HOURS = new Date('2026-04-17T16:00:00Z') // Friday, 12pm ET / 9am PT
 const BEFORE_DEPLOYMENT_HOURS = new Date('2026-04-17T12:59:00Z') // Friday, 8:59am ET
 const AFTER_DEPLOYMENT_HOURS = new Date('2026-04-18T00:00:00Z') // Friday, 5pm PT
+const WEEKDAY_AFTER_DEPLOYMENT_HOURS = new Date('2026-04-21T00:01:00Z') // Monday, 5:01pm PT
 const WEEKEND_DEPLOYMENT_HOURS = new Date('2026-04-18T16:00:00Z') // Saturday
 
 function createMockLogger(): Logger {
@@ -33,6 +34,7 @@ describe('Fireworks deployment routing', () => {
       expect(isDeploymentHours(BEFORE_DEPLOYMENT_HOURS)).toBe(false)
       expect(isDeploymentHours(IN_DEPLOYMENT_HOURS)).toBe(true)
       expect(isDeploymentHours(AFTER_DEPLOYMENT_HOURS)).toBe(false)
+      expect(isDeploymentHours(WEEKDAY_AFTER_DEPLOYMENT_HOURS)).toBe(false)
     })
 
     it('is inactive on weekends', () => {
@@ -83,7 +85,7 @@ describe('Fireworks deployment routing', () => {
     })
 
     const minimalBody = {
-      model: 'moonshotai/kimi-k2.5',
+      model: 'moonshotai/kimi-k2.6',
       messages: [{ role: 'user' as const, content: 'test' }],
     }
 
@@ -98,7 +100,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -121,7 +123,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -154,7 +156,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -187,7 +189,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -214,7 +216,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -239,7 +241,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -283,7 +285,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -310,7 +312,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -338,7 +340,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { enabled: true },
         } as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -364,7 +366,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { effort: 'high' },
         } as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -390,7 +392,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { enabled: false, effort: 'high' },
         } as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -417,7 +419,7 @@ describe('Fireworks deployment routing', () => {
           reasoning: { effort: 'high' },
           tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }],
         } as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -443,7 +445,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning_effort: 'low',
         } as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -469,7 +471,7 @@ describe('Fireworks deployment routing', () => {
           reasoning_effort: 'low',
           tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }],
         } as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -496,7 +498,7 @@ describe('Fireworks deployment routing', () => {
 
       await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.5',
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts
index 6856f3f347..ff08822426 100644
--- a/web/src/llm-api/fireworks-config.ts
+++ b/web/src/llm-api/fireworks-config.ts
@@ -10,6 +10,6 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217'
 
 export const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
   // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
-  'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/y5b3z17u',
+  'moonshotai/kimi-k2.6': 'accounts/james-65d217/deployments/j8ar2x0y',
   // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd',
 }
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 138671c8aa..96d3510917 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -36,7 +36,7 @@ const FIREWORKS_MODEL_MAP: Record<string, string> = {
   'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
   'minimax/minimax-m2.7': 'accounts/fireworks/models/minimax-m2p7',
   'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
-  'moonshotai/kimi-k2.5': 'accounts/fireworks/models/kimi-k2p5',
+  'moonshotai/kimi-k2.6': 'accounts/fireworks/models/kimi-k2p6',
 }
 
 /** Flag to enable custom Fireworks deployments (set to false to use global API only) */
@@ -169,7 +169,7 @@ const FIREWORKS_PRICING_MAP: Record<string, FireworksPricing> = {
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.40 / 1_000_000,
   },
-  'moonshotai/kimi-k2.5': {
+  'moonshotai/kimi-k2.6': {
     inputCostPerToken: 0.60 / 1_000_000,
     cachedInputCostPerToken: 0.10 / 1_000_000,
     outputCostPerToken: 3.00 / 1_000_000,
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index e0e0aa956b..0a8b0744b9 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -180,12 +180,12 @@ describe('requestSession', () => {
   test('deployment-hours-only model is unavailable outside deployment hours', async () => {
     const state = await requestSession({
       userId: 'u1',
-      model: 'moonshotai/kimi-k2.5',
+      model: 'moonshotai/kimi-k2.6',
       deps,
     })
     expect(state).toEqual({
       status: 'model_unavailable',
-      requestedModel: 'moonshotai/kimi-k2.5',
+      requestedModel: 'moonshotai/kimi-k2.6',
       availableHours: '9am ET-5pm PT',
     })
     expect(deps.rows.size).toBe(0)
@@ -198,13 +198,13 @@ describe('requestSession', () => {
     deps._tick(new Date(deps._now().getTime() + 1000))
     await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
-    await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.5', deps })
+    await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.6', deps })
 
     const state = await getSessionState({ userId: 'u1', deps })
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.queueDepthByModel).toEqual({
       [DEFAULT_MODEL]: 2,
-      'moonshotai/kimi-k2.5': 1,
+      'moonshotai/kimi-k2.6': 1,
     })
   })
 
@@ -293,7 +293,7 @@ describe('requestSession', () => {
     })
     const s3 = await requestSession({
       userId: 'u3',
-      model: 'moonshotai/kimi-k2.5',
+      model: 'moonshotai/kimi-k2.6',
       deps: admitDeps,
     })
     expect(s2.status).toBe('queued')
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index c0b4d84c66..7d1c16c1f1 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -48,7 +48,7 @@ export function getSessionGraceMs(): number {
  * queue).
  */
 const INSTANT_ADMIT_CAPACITY: Record<string, number> = {
-  'moonshotai/kimi-k2.5': 100,
+  'moonshotai/kimi-k2.6': 100,
   'minimax/minimax-m2.7': 200,
 }
 

From 8a2ae6bd2eeb959d22e02a6cb2c5729568231a75 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Fri, 24 Apr 2026 14:58:18 -0700
Subject: [PATCH 3/3] feat: replace Kimi K2.6 with GLM 5.1 as freebuff
 deployment-hours model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Switch base2-free, editor-lite, code-reviewer-lite agents from kimi-k2.6 to z-ai/glm-5.1
- Update FREEBUFF_KIMI_MODEL_ID → FREEBUFF_GLM_MODEL_ID constant
- Update Fireworks deployment map (mjb4i7ea), model map, and pricing
- Remove moonshotai/kimi-k2.6 and kimi-k2.6:nitro from ModelName type
- Update freebuff model selector to show GLM first with 'Smartest' tagline
- Update all test files with new model IDs and deployment IDs
- Update docs and scripts to reference GLM instead of Kimi
---
 agents/base2/base2.ts                         |  2 +-
 agents/editor/editor-lite.ts                  |  2 +-
 agents/reviewer/code-reviewer-lite.ts         |  2 +-
 agents/types/agent-definition.ts              |  2 --
 .../components/freebuff-model-selector.tsx    | 18 +++++++---
 common/src/constants/free-agents.ts           |  6 ++--
 common/src/constants/freebuff-models.ts       |  8 ++---
 .../types/agent-definition.ts                 |  2 --
 docs/freebuff-waiting-room.md                 | 12 +++----
 scripts/test-fireworks-cache-intervals.ts     | 16 +++------
 scripts/test-fireworks-long.ts                | 15 ++------
 .../completions/__tests__/completions.test.ts | 27 ++++++++------
 .../session/__tests__/session.test.ts         |  4 +--
 .../__tests__/fireworks-deployment.test.ts    | 36 +++++++++----------
 web/src/llm-api/fireworks-config.ts           |  2 +-
 web/src/llm-api/fireworks.ts                  |  8 +----
 .../free-session/__tests__/public-api.test.ts | 14 ++++----
 web/src/server/free-session/config.ts         |  2 +-
 18 files changed, 82 insertions(+), 96 deletions(-)

diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index b1e24efff6..1a81f948bf 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -25,7 +25,7 @@ export function createBase2(
   const isFree = mode === 'free' || mode === 'lite'
 
   const isSonnet = false
-  const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7'
+  const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7'
 
   return {
     publisher,
diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts
index 9cb5675b5e..29225f0c29 100644
--- a/agents/editor/editor-lite.ts
+++ b/agents/editor/editor-lite.ts
@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
 import type { AgentDefinition } from '../types/agent-definition'
 
 const definition: AgentDefinition = {
-  ...createCodeEditor({ model: 'minimax' }),
+  ...createCodeEditor({ model: 'glm' }),
   id: 'editor-lite',
 }
 export default definition
diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts
index 888cadf4f7..feafb87c45 100644
--- a/agents/reviewer/code-reviewer-lite.ts
+++ b/agents/reviewer/code-reviewer-lite.ts
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
 const definition: SecretAgentDefinition = {
   id: 'code-reviewer-lite',
   publisher,
-  ...createReviewer('moonshotai/kimi-k2.6'),
+  ...createReviewer('z-ai/glm-5.1'),
 }
 
 export default definition
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
index 2fbfed0a49..3608f36315 100644
--- a/agents/types/agent-definition.ts
+++ b/agents/types/agent-definition.ts
@@ -423,8 +423,6 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
-  | 'moonshotai/kimi-k2.6'
-  | 'moonshotai/kimi-k2.6:nitro'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index 1ba966fd22..5abaac2724 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -6,6 +6,7 @@ import { Button } from './button'
 import {
   DEFAULT_FREEBUFF_MODEL_ID,
   FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+  FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_MODELS,
   isFreebuffModelAvailable,
 } from '@codebuff/common/constants/freebuff-models'
@@ -19,6 +20,11 @@ import { useTheme } from '../hooks/use-theme'
 
 import type { KeyEvent } from '@opentui/core'
 
+const FREEBUFF_MODEL_SELECTOR_MODELS = [
+  ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
+  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
+]
+
 /**
  * Dual-purpose model picker:
  *   - Pre-chat landing (session 'none'): user hasn't joined any queue. Picking
@@ -109,7 +115,7 @@ export const FreebuffModelSelector: React.FC = () => {
   const stackVertically = useMemo(() => {
     const BUTTON_CHROME = 4 // 2 border + 2 padding
     const GAP = 2
-    const total = FREEBUFF_MODELS.reduce((sum, model, idx) => {
+    const total = FREEBUFF_MODEL_SELECTOR_MODELS.reduce((sum, model, idx) => {
       const inner =
         2 /* indicator + space */ +
         model.displayName.length +
@@ -167,13 +173,15 @@ export const FreebuffModelSelector: React.FC = () => {
           }
           return
         }
-        const currentIdx = FREEBUFF_MODELS.findIndex((m) => m.id === focusedId)
+        const currentIdx = FREEBUFF_MODEL_SELECTOR_MODELS.findIndex(
+          (m) => m.id === focusedId,
+        )
         if (currentIdx === -1) return
-        const len = FREEBUFF_MODELS.length
+        const len = FREEBUFF_MODEL_SELECTOR_MODELS.length
         const nextIdx = isForward
           ? (currentIdx + 1) % len
           : (currentIdx - 1 + len) % len
-        const target = FREEBUFF_MODELS[nextIdx]
+        const target = FREEBUFF_MODEL_SELECTOR_MODELS[nextIdx]
         if (target) {
           key.preventDefault?.()
           setFocusedId(target.id)
@@ -198,7 +206,7 @@ export const FreebuffModelSelector: React.FC = () => {
           alignItems: 'flex-start',
         }}
       >
-        {FREEBUFF_MODELS.map((model) => {
+        {FREEBUFF_MODEL_SELECTOR_MODELS.map((model) => {
           // 'Selected' means the dot is filled and the label is bold. On the
           // landing screen ('none') this tracks the pre-focused pick; on the
           // queued screen it tracks the model the server has us on. Either
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index 4a2a4a147e..308e12df6d 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
   'base2-free': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.6',
+    'z-ai/glm-5.1',
   ]),
 
   // File exploration agents
@@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Editor for free mode
   'editor-lite': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.6',
+    'z-ai/glm-5.1',
   ]),
 
   // Code reviewer for free mode
   'code-reviewer-lite': new Set([
     'minimax/minimax-m2.7',
-    'moonshotai/kimi-k2.6',
+    'z-ai/glm-5.1',
   ]),
 }
 
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index 2f6da2ce0b..f1019c6fbf 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -18,7 +18,7 @@ export interface FreebuffModelOption {
 }
 
 export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT'
-export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
+export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
 
 export const FREEBUFF_MODELS = [
   {
@@ -28,9 +28,9 @@ export const FREEBUFF_MODELS = [
     availability: 'always',
   },
   {
-    id: FREEBUFF_KIMI_MODEL_ID,
-    displayName: 'Kimi K2.6',
-    tagline: 'Balanced',
+    id: FREEBUFF_GLM_MODEL_ID,
+    displayName: 'GLM 5.1',
+    tagline: 'Smartest',
     availability: 'deployment_hours',
   },
 ] as const satisfies readonly FreebuffModelOption[]
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index 2fbfed0a49..3608f36315 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -423,8 +423,6 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
-  | 'moonshotai/kimi-k2.6'
-  | 'moonshotai/kimi-k2.6:nitro'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md
index 153487897a..353bfb046b 100644
--- a/docs/freebuff-waiting-room.md
+++ b/docs/freebuff-waiting-room.md
@@ -5,7 +5,7 @@
 The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
 
 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
-2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; Kimi K2.6 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
+2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
 
 Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
@@ -149,8 +149,8 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r
 | Constant | Location | Default | Purpose |
 |---|---|---|---|
 | `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. |
-| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `kimi-k2.6` | Selectable models; each gets its own queue and admission slot. |
-| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `kimi-k2.6` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
+| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `glm-5.1` | Selectable models; each gets its own queue and admission slot. |
+| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
 | `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. |
 | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime |
 | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
@@ -185,7 +185,7 @@ Response shapes:
   "queueDepth": 43,        // size of this model's queue
   "queueDepthByModel": {   // snapshot of every model's queue — powers the
     "minimax/minimax-m2.7": 43, //  "N ahead" hint in the selector. Missing
-    "moonshotai/kimi-k2.6": 4   //  entries should be treated as 0.
+    "z-ai/glm-5.1": 4   //  entries should be treated as 0.
   },
   "estimatedWaitMs": 384000,
   "queuedAt": "2026-04-17T12:00:00Z"
@@ -285,7 +285,7 @@ waitMs = (position - 1) * 24_000
 - Position 1 → 0 (next tick admits you)
 - Position 2 → 24s, and so on.
 
-`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `moonshotai/kimi-k2.6` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a Kimi Fireworks incident or outside 9am ET-5pm PT, only Kimi's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
+`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a GLM Fireworks incident or outside 9am ET-5pm PT, only GLM's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
 
 ## CLI Integration (frontend-side contract)
 
@@ -324,7 +324,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
 | Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. |
 | Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. |
 | Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. |
-| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded Kimi deployment doesn't block MiniMax admissions. |
+| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. |
 | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy |
 
 ## Testing
diff --git a/scripts/test-fireworks-cache-intervals.ts b/scripts/test-fireworks-cache-intervals.ts
index 44bffd4b75..8d4e867406 100644
--- a/scripts/test-fireworks-cache-intervals.ts
+++ b/scripts/test-fireworks-cache-intervals.ts
@@ -13,7 +13,6 @@
  *
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
- *   kimi-k2.6           — moonshotai/kimi-k2.6
  *   minimax             — minimax/minimax-m2.5
  *
  * Flags:
@@ -25,11 +24,11 @@
  *   # Default glm-5.1 serverless with default intervals
  *   bun scripts/test-fireworks-cache-intervals.ts
  *
- *   # Custom Kimi deployment with a faster sweep
- *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.6 --deployment --intervals=30,60,120,300,600
+ *   # Custom GLM deployment with a faster sweep
+ *   bun scripts/test-fireworks-cache-intervals.ts glm-5.1 --deployment --intervals=30,60,120,300,600
  *
  *   # Long sweep up to 1 hour
- *   bun scripts/test-fireworks-cache-intervals.ts kimi-k2.6 --deployment --intervals=60,300,600,1200,1800,2700,3600
+ *   bun scripts/test-fireworks-cache-intervals.ts glm-5.1 --deployment --intervals=60,300,600,1200,1800,2700,3600
  */
 
 export {}
@@ -49,18 +48,11 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
   'glm-5.1': {
     id: 'z-ai/glm-5.1',
     standardModel: 'accounts/fireworks/models/glm-5p1',
+    deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
     inputCostPerToken: 1.4 / 1_000_000,
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.4 / 1_000_000,
   },
-  'kimi-k2.6': {
-    id: 'moonshotai/kimi-k2.6',
-    standardModel: 'accounts/fireworks/models/kimi-k2p6',
-    deploymentModel: 'accounts/james-65d217/deployments/j8ar2x0y',
-    inputCostPerToken: 0.6 / 1_000_000,
-    cachedInputCostPerToken: 0.1 / 1_000_000,
-    outputCostPerToken: 3.0 / 1_000_000,
-  },
   minimax: {
     id: 'minimax/minimax-m2.5',
     standardModel: 'accounts/fireworks/models/minimax-m2p5',
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
index 45561fbc42..a1e4950f8f 100644
--- a/scripts/test-fireworks-long.ts
+++ b/scripts/test-fireworks-long.ts
@@ -11,7 +11,6 @@
  *
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
- *   kimi-k2.6           — moonshotai/kimi-k2.6
  *   minimax             — minimax/minimax-m2.5
  *   minimax-m2.7        — minimax/minimax-m2.7
  *
@@ -19,7 +18,7 @@
  *   --deployment   Use custom deployment instead of serverless (standard API)
  *                  Serverless is the default
  * Examples:
- *   bun scripts/test-fireworks-long.ts kimi-k2.6 --deployment
+ *   bun scripts/test-fireworks-long.ts glm-5.1 --deployment
  */
 
 import { FIREWORKS_DEPLOYMENT_MAP } from '../web/src/llm-api/fireworks-config'
@@ -41,18 +40,11 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
   'glm-5.1': {
     id: 'z-ai/glm-5.1',
     standardModel: 'accounts/fireworks/models/glm-5p1',
+    deploymentModel: FIREWORKS_DEPLOYMENT_MAP['z-ai/glm-5.1'],
     inputCostPerToken: 1.40 / 1_000_000,
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.40 / 1_000_000,
   },
-  'kimi-k2.6': {
-    id: 'moonshotai/kimi-k2.6',
-    standardModel: 'accounts/fireworks/models/kimi-k2p6',
-    deploymentModel: FIREWORKS_DEPLOYMENT_MAP['moonshotai/kimi-k2.6'],
-    inputCostPerToken: 0.60 / 1_000_000,
-    cachedInputCostPerToken: 0.10 / 1_000_000,
-    outputCostPerToken: 3.00 / 1_000_000,
-  },
   minimax: {
     id: 'minimax/minimax-m2.5',
     standardModel: 'accounts/fireworks/models/minimax-m2p5',
@@ -75,9 +67,6 @@ const DEFAULT_MODEL = 'glm-5.1'
 const MODEL_ALIASES: Record<string, keyof typeof MODEL_CONFIGS> = {
   glm: 'glm-5.1',
   'z-ai/glm-5.1': 'glm-5.1',
-  kimi: 'kimi-k2.6',
-  'kimi-k2': 'kimi-k2.6',
-  'moonshotai/kimi-k2.6': 'kimi-k2.6',
   'minimax/minimax-m2.5': 'minimax',
   'minimax/minimax-m2.7': 'minimax-m2.7',
 }
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index 5f4490ff2a..1aac8800cd 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,6 +1,7 @@
 import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
 import { NextRequest } from 'next/server'
 
+import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models'
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
 
 import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
@@ -555,7 +556,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(response.status).toBe(200)
     })
 
-    it('lets freebuff use Kimi K2.6 through Fireworks availability rules', async () => {
+    it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => {
       const fetchedBodies: Record<string, unknown>[] = []
       const fetchViaFireworks = mock(
         async (_url: string | URL | Request, init?: RequestInit) => {
@@ -563,7 +564,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           return new Response(
             JSON.stringify({
               id: 'test-id',
-              model: 'accounts/james-65d217/deployments/j8ar2x0y',
+              model: 'accounts/james-65d217/deployments/mjb4i7ea',
               choices: [{ message: { content: 'test response' } }],
               usage: {
                 prompt_tokens: 10,
@@ -585,7 +586,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-new-free' },
           body: JSON.stringify({
-            model: 'moonshotai/kimi-k2.6',
+            model: 'z-ai/glm-5.1',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -610,13 +611,19 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       })
 
       const body = await response.json()
-      expect(response.status).toBe(200)
-      expect(fetchedBodies).toHaveLength(1)
-      expect(fetchedBodies[0].model).toBe(
-        'accounts/james-65d217/deployments/j8ar2x0y',
-      )
-      expect(body.model).toBe('moonshotai/kimi-k2.6')
-      expect(body.provider).toBe('Fireworks')
+      if (isFreebuffDeploymentHours()) {
+        expect(response.status).toBe(200)
+        expect(fetchedBodies).toHaveLength(1)
+        expect(fetchedBodies[0].model).toBe(
+          'accounts/james-65d217/deployments/mjb4i7ea',
+        )
+        expect(body.model).toBe('z-ai/glm-5.1')
+        expect(body.provider).toBe('Fireworks')
+      } else {
+        expect(response.status).toBe(503)
+        expect(fetchedBodies).toHaveLength(0)
+        expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+      }
     })
 
     it('skips credit check when in FREE mode even with 0 credits', async () => {
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index bbe31b64e0..ffcb8fd364 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -155,10 +155,10 @@ describe('POST /api/v1/freebuff/session', () => {
     expect(body.status).toBe('queued')
   })
 
-  test('returns model_unavailable for Kimi outside deployment hours', async () => {
+  test('returns model_unavailable for GLM outside deployment hours', async () => {
     const sessionDeps = makeSessionDeps()
     const resp = await postFreebuffSession(
-      makeReq('ok', { model: 'moonshotai/kimi-k2.6' }),
+      makeReq('ok', { model: 'z-ai/glm-5.1' }),
       makeDeps(sessionDeps, 'u1'),
     )
     expect(resp.status).toBe(409)
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index 7e213e9e66..58863c6742 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -11,8 +11,8 @@ import {
 
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 
-const STANDARD_MODEL_ID = 'accounts/fireworks/models/kimi-k2p6'
-const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/j8ar2x0y'
+const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1'
+const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea'
 const IN_DEPLOYMENT_HOURS = new Date('2026-04-17T16:00:00Z') // Friday, 12pm ET / 9am PT
 const BEFORE_DEPLOYMENT_HOURS = new Date('2026-04-17T12:59:00Z') // Friday, 8:59am ET
 const AFTER_DEPLOYMENT_HOURS = new Date('2026-04-18T00:00:00Z') // Friday, 5pm PT
@@ -85,7 +85,7 @@ describe('Fireworks deployment routing', () => {
     })
 
     const minimalBody = {
-      model: 'moonshotai/kimi-k2.6',
+      model: 'z-ai/glm-5.1',
       messages: [{ role: 'user' as const, content: 'test' }],
     }
 
@@ -100,7 +100,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -123,7 +123,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -156,7 +156,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -189,7 +189,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -216,7 +216,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -241,7 +241,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -285,7 +285,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -312,7 +312,7 @@ describe('Fireworks deployment routing', () => {
 
       const response = await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -340,7 +340,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { enabled: true },
         } as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -366,7 +366,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { effort: 'high' },
         } as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -392,7 +392,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning: { enabled: false, effort: 'high' },
         } as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -419,7 +419,7 @@ describe('Fireworks deployment routing', () => {
           reasoning: { effort: 'high' },
           tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }],
         } as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -445,7 +445,7 @@ describe('Fireworks deployment routing', () => {
           ...minimalBody,
           reasoning_effort: 'low',
         } as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -471,7 +471,7 @@ describe('Fireworks deployment routing', () => {
           reasoning_effort: 'low',
           tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }],
         } as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: false,
@@ -498,7 +498,7 @@ describe('Fireworks deployment routing', () => {
 
       await createFireworksRequestWithFallback({
         body: minimalBody as never,
-        originalModel: 'moonshotai/kimi-k2.6',
+        originalModel: 'z-ai/glm-5.1',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts
index ff08822426..5667282505 100644
--- a/web/src/llm-api/fireworks-config.ts
+++ b/web/src/llm-api/fireworks-config.ts
@@ -10,6 +10,6 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217'
 
 export const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
   // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
-  'moonshotai/kimi-k2.6': 'accounts/james-65d217/deployments/j8ar2x0y',
+  'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
   // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd',
 }
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 96d3510917..028ad42228 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -36,7 +36,6 @@ const FIREWORKS_MODEL_MAP: Record<string, string> = {
   'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
   'minimax/minimax-m2.7': 'accounts/fireworks/models/minimax-m2p7',
   'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
-  'moonshotai/kimi-k2.6': 'accounts/fireworks/models/kimi-k2p6',
 }
 
 /** Flag to enable custom Fireworks deployments (set to false to use global API only) */
@@ -97,7 +96,7 @@ function createFireworksRequest(params: {
 
   // Transform OpenRouter-style `reasoning` object into Fireworks' `reasoning_effort`.
   // Unlike OpenAI, Fireworks supports reasoning_effort together with function tools
-  // (e.g. GLM-4.5/5.1 and Kimi K2 are designed for interleaved reasoning + tool use).
+  // (e.g. GLM-4.5/5.1 are designed for interleaved reasoning + tool use).
   if (fireworksBody.reasoning && typeof fireworksBody.reasoning === 'object') {
     const reasoning = fireworksBody.reasoning as {
       enabled?: boolean
@@ -169,11 +168,6 @@ const FIREWORKS_PRICING_MAP: Record<string, FireworksPricing> = {
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.40 / 1_000_000,
   },
-  'moonshotai/kimi-k2.6': {
-    inputCostPerToken: 0.60 / 1_000_000,
-    cachedInputCostPerToken: 0.10 / 1_000_000,
-    outputCostPerToken: 3.00 / 1_000_000,
-  },
 }
 
 function getFireworksPricing(model: string): FireworksPricing {
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index 0a8b0744b9..a90bc800d4 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -180,12 +180,12 @@ describe('requestSession', () => {
   test('deployment-hours-only model is unavailable outside deployment hours', async () => {
     const state = await requestSession({
       userId: 'u1',
-      model: 'moonshotai/kimi-k2.6',
+      model: 'z-ai/glm-5.1',
       deps,
     })
     expect(state).toEqual({
       status: 'model_unavailable',
-      requestedModel: 'moonshotai/kimi-k2.6',
+      requestedModel: 'z-ai/glm-5.1',
       availableHours: '9am ET-5pm PT',
     })
     expect(deps.rows.size).toBe(0)
@@ -193,18 +193,18 @@ describe('requestSession', () => {
 
   test('queued response includes a per-model depth snapshot for the selector', async () => {
     deps._tick(new Date('2026-04-17T16:00:00Z'))
-    // Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both.
+    // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both.
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
     await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
-    await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.6', deps })
+    await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps })
 
     const state = await getSessionState({ userId: 'u1', deps })
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.queueDepthByModel).toEqual({
       [DEFAULT_MODEL]: 2,
-      'moonshotai/kimi-k2.6': 1,
+      'z-ai/glm-5.1': 1,
     })
   })
 
@@ -279,7 +279,7 @@ describe('requestSession', () => {
   })
 
   test('instant-admit: per-model capacities are independent', async () => {
-    // MiniMax saturated at 1 active, Kimi still has room.
+    // MiniMax saturated at 1 active, GLM still has room.
     const admitDeps = makeDeps({
       getInstantAdmitCapacity: (model) =>
         model === DEFAULT_MODEL ? 1 : 10,
@@ -293,7 +293,7 @@ describe('requestSession', () => {
     })
     const s3 = await requestSession({
       userId: 'u3',
-      model: 'moonshotai/kimi-k2.6',
+      model: 'z-ai/glm-5.1',
       deps: admitDeps,
     })
     expect(s2.status).toBe('queued')
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index 7d1c16c1f1..85bba7fa6f 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -48,7 +48,7 @@ export function getSessionGraceMs(): number {
  * queue).
  */
 const INSTANT_ADMIT_CAPACITY: Record<string, number> = {
-  'moonshotai/kimi-k2.6': 100,
+  'z-ai/glm-5.1': 50,
   'minimax/minimax-m2.7': 200,
 }