diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts
index 8b6c431ba..5c7b63928 100644
--- a/cli/src/commands/command-registry.ts
+++ b/cli/src/commands/command-registry.ts
@@ -3,13 +3,14 @@ import { CLAUDE_OAUTH_ENABLED } from '@codebuff/common/constants/claude-oauth'
 import { safeOpen } from '../utils/open-url'
 
 import { handleAdsEnable, handleAdsDisable } from './ads'
-import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders'
-import { useThemeStore } from '../hooks/use-theme'
 import { handleHelpCommand } from './help'
 import { handleImageCommand } from './image'
 import { handleInitializationFlowLocally } from './init'
+import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders'
 import { runBashCommand } from './router'
 import { handleUsageCommand } from './usage'
+import { endAndRejoinFreebuffSession } from '../hooks/use-freebuff-session'
+import { useThemeStore } from '../hooks/use-theme'
 import { WEBSITE_URL } from '../login/constants'
 import { useChatStore } from '../state/chat-store'
 import { useFeedbackStore } from '../state/feedback-store'
@@ -178,6 +179,7 @@ const FREEBUFF_REMOVED_COMMANDS = new Set([
 const FREEBUFF_ONLY_COMMANDS = new Set([
   'connect',
   'plan',
+  'end-session',
 ])
 
 const ALL_COMMANDS: CommandDefinition[] = [
@@ -611,6 +613,25 @@ const ALL_COMMANDS: CommandDefinition[] = [
       clearInput(params)
     },
   }),
+  // /end-session (freebuff-only) — end the active session early and re-queue. The
+  // hook flips status from 'active' → 'queued', which unmounts <Chat> and
+  // mounts <WaitingRoomScreen>, where the user can pick a different model.
+  defineCommand({
+    name: 'end-session',
+    handler: (params) => {
+      params.setMessages((prev) => [
+        ...prev,
+        getUserMessage(params.inputValue.trim()),
+        getSystemMessage('Ending session and returning to the waiting room…'),
+      ])
+      params.saveToHistory(params.inputValue.trim())
+      clearInput(params)
+      endAndRejoinFreebuffSession().catch(() => {
+        // The hook surfaces poll errors via the session store; nothing to do
+        // here beyond letting the chat history reflect the attempt.
+      })
+    },
+  }),
 ]
 
 export const COMMAND_REGISTRY: CommandDefinition[] = IS_FREEBUFF
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
new file mode 100644
index 000000000..63099ec1f
--- /dev/null
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -0,0 +1,130 @@
+import { TextAttributes } from '@opentui/core'
+import { useKeyboard } from '@opentui/react'
+import React, { useCallback, useMemo, useState } from 'react'
+
+import { Button } from './button'
+import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+
+import { switchFreebuffModel } from '../hooks/use-freebuff-session'
+import { useFreebuffModelStore } from '../state/freebuff-model-store'
+import { useFreebuffSessionStore } from '../state/freebuff-session-store'
+import { useTheme } from '../hooks/use-theme'
+
+import type { KeyEvent } from '@opentui/core'
+
+/**
+ * Lets the user pick which model's queue they're in. Tapping (or pressing the
+ * row's number key) on a different model triggers a re-POST: the server moves
+ * them to the back of the new model's queue.
+ *
+ * Each row shows a live "N ahead" count sourced from the server's
+ * `queueDepthByModel` snapshot so the choice is informed (e.g. "3 ahead" vs
+ * "12 ahead") rather than a blind preference toggle.
+ */
+export const FreebuffModelSelector: React.FC = () => {
+  const theme = useTheme()
+  const selectedModel = useFreebuffModelStore((s) => s.selectedModel)
+  const session = useFreebuffSessionStore((s) => s.session)
+  const [pending, setPending] = useState<string | null>(null)
+  const [hoveredId, setHoveredId] = useState<string | null>(null)
+
+  // For the user's current queue, "ahead" is `position - 1` (themselves don't
+  // count). For every other queue, switching would land them at the back, so
+  // it's that queue's full depth. Null before the first queued snapshot so
+  // the UI doesn't flash misleading zeros.
+  const aheadByModel = useMemo<Record<string, number> | null>(() => {
+    if (session?.status !== 'queued') return null
+    const depths = session.queueDepthByModel ?? {}
+    const out: Record<string, number> = {}
+    for (const { id } of FREEBUFF_MODELS) {
+      out[id] =
+        id === session.model ? Math.max(0, session.position - 1) : depths[id] ?? 0
+    }
+    return out
+  }, [session])
+
+  const pick = useCallback(
+    (modelId: string) => {
+      if (pending) return
+      if (modelId === selectedModel) return
+      setPending(modelId)
+      switchFreebuffModel(modelId).finally(() => setPending(null))
+    },
+    [pending, selectedModel],
+  )
+
+  // Number-key shortcuts (1-9) so keyboard-only users can switch without
+  // hunting for a clickable region.
+  useKeyboard(
+    useCallback(
+      (key: KeyEvent) => {
+        if (pending) return
+        const name = key.name ?? ''
+        if (!/^[1-9]$/.test(name)) return
+        const digit = Number(name)
+        if (digit > FREEBUFF_MODELS.length) return
+        const target = FREEBUFF_MODELS[digit - 1]
+        if (target && target.id !== selectedModel) {
+          key.preventDefault?.()
+          pick(target.id)
+        }
+      },
+      [pending, pick, selectedModel],
+    ),
+  )
+
+  return (
+    <box
+      style={{
+        flexDirection: 'column',
+        alignItems: 'flex-start',
+        gap: 0,
+      }}
+    >
+      <text style={{ fg: theme.muted, marginBottom: 1 }}>
+        Model — tap or press 1-{FREEBUFF_MODELS.length} to switch
+      </text>
+      {FREEBUFF_MODELS.map((model, idx) => {
+        const isSelected = model.id === selectedModel
+        const isPending = pending === model.id
+        const isHovered = hoveredId === model.id
+        const indicator = isSelected ? '●' : '○'
+        const indicatorColor = isSelected ? theme.primary : theme.muted
+        const labelColor = isSelected ? theme.foreground : theme.muted
+        const interactable = !pending && !isSelected
+        const ahead = aheadByModel?.[model.id]
+        const hint =
+          ahead === undefined
+            ? model.tagline
+            : ahead === 0
+              ? 'No wait'
+              : `${ahead} ahead`
+        return (
+          <Button
+            key={model.id}
+            onClick={() => pick(model.id)}
+            onMouseOver={() => interactable && setHoveredId(model.id)}
+            onMouseOut={() => setHoveredId((curr) => (curr === model.id ? null : curr))}
+            style={{ paddingLeft: 0, paddingRight: 1 }}
+          >
+            <text>
+              <span fg={indicatorColor}>{indicator} </span>
+              <span fg={theme.muted}>{idx + 1}. </span>
+              <span
+                fg={labelColor}
+                attributes={isSelected ? TextAttributes.BOLD : TextAttributes.NONE}
+              >
+                {model.displayName}
+              </span>
+              <span fg={theme.muted}>  {hint}</span>
+              {isPending && <span fg={theme.muted}>  switching…</span>}
+              {isHovered && interactable && !isPending && (
+                <span fg={theme.muted}>  ↵</span>
+              )}
+            </text>
+          </Button>
+        )
+      })}
+    </box>
+  )
+}
diff --git a/cli/src/components/status-bar.tsx b/cli/src/components/status-bar.tsx
index 857854b85..e8f29fe26 100644
--- a/cli/src/components/status-bar.tsx
+++ b/cli/src/components/status-bar.tsx
@@ -1,3 +1,4 @@
+import { getFreebuffModel } from '@codebuff/common/constants/freebuff-models'
 import { TextAttributes } from '@opentui/core'
 import React, { useEffect, useState } from 'react'
 
@@ -143,9 +144,14 @@ export const StatusBar = ({
       case 'idle':
         if (sessionProgress !== null) {
           const isUrgent = sessionProgress.remainingMs < COUNTDOWN_VISIBLE_MS
+          const modelName =
+            freebuffSession?.status === 'active'
+              ? getFreebuffModel(freebuffSession.model).displayName
+              : null
           return (
             <span fg={isUrgent ? theme.warning : theme.secondary}>
-              Free session · {formatSessionRemaining(sessionProgress.remainingMs)}
+              {modelName ? `${modelName} · ` : ''}Free session ·{' '}
+              {formatSessionRemaining(sessionProgress.remainingMs)}
             </span>
           )
         }
diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx
index 08e967d28..5ee240299 100644
--- a/cli/src/components/waiting-room-screen.tsx
+++ b/cli/src/components/waiting-room-screen.tsx
@@ -5,6 +5,7 @@ import React, { useMemo, useState } from 'react'
 import { AdBanner } from './ad-banner'
 import { Button } from './button'
 import { ChoiceAdBanner } from './choice-ad-banner'
+import { FreebuffModelSelector } from './freebuff-model-selector'
 import { ShimmerText } from './shimmer-text'
 import { useFreebuffCtrlCExit } from '../hooks/use-freebuff-ctrl-c-exit'
 import { useGravityAd } from '../hooks/use-gravity-ad'
@@ -200,6 +201,10 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
                   {formatElapsed(elapsedMs)}
                 </text>
               </box>
+
+              <box style={{ marginTop: 1 }}>
+                <FreebuffModelSelector />
+              </box>
             </>
           )}
 
diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts
index bd67811d3..fd2454087 100644
--- a/cli/src/data/slash-commands.ts
+++ b/cli/src/data/slash-commands.ts
@@ -47,6 +47,7 @@ const FREEBUFF_REMOVED_COMMAND_IDS = new Set([
 const FREEBUFF_ONLY_COMMAND_IDS = new Set([
   'connect',
   'plan',
+  'end-session',
 ])
 
 const ALL_SLASH_COMMANDS: SlashCommand[] = [
@@ -184,6 +185,11 @@ const ALL_SLASH_COMMANDS: SlashCommand[] = [
     label: 'theme:toggle',
     description: 'Toggle between light and dark mode',
   },
+  {
+    id: 'end-session',
+    label: 'end-session',
+    description: 'End your free session and return to the waiting room (lets you switch model)',
+  },
   {
     id: 'logout',
     label: 'logout',
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
index 06db946be..077382009 100644
--- a/cli/src/hooks/use-freebuff-session.ts
+++ b/cli/src/hooks/use-freebuff-session.ts
@@ -1,6 +1,10 @@
 import { env } from '@codebuff/common/env'
 import { useEffect } from 'react'
 
+import {
+  getSelectedFreebuffModel,
+  useFreebuffModelStore,
+} from '../state/freebuff-model-store'
 import { useFreebuffSessionStore } from '../state/freebuff-session-store'
 import { getAuthTokenDetails } from '../utils/auth'
 import { IS_FREEBUFF } from '../utils/constants'
@@ -16,6 +20,9 @@ const POLL_INTERVAL_ERROR_MS = 10_000
  *  account has rotated the id and respond with `{ status: 'superseded' }`. */
 const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id'
 
+/** Header sent on POST telling the server which model's queue to join. */
+const FREEBUFF_MODEL_HEADER = 'x-freebuff-model'
+
 /** Play the terminal bell so users get an audible notification on admission. */
 const playAdmissionSound = () => {
   try {
@@ -33,12 +40,15 @@ const sessionEndpoint = (): string => {
 async function callSession(
   method: 'POST' | 'GET' | 'DELETE',
   token: string,
-  opts: { instanceId?: string; signal?: AbortSignal } = {},
+  opts: { instanceId?: string; model?: string; signal?: AbortSignal } = {},
 ): Promise<FreebuffSessionResponse> {
   const headers: Record<string, string> = { Authorization: `Bearer ${token}` }
   if (method === 'GET' && opts.instanceId) {
     headers[FREEBUFF_INSTANCE_HEADER] = opts.instanceId
   }
+  if (method === 'POST' && opts.model) {
+    headers[FREEBUFF_MODEL_HEADER] = opts.model
+  }
   const resp = await fetch(sessionEndpoint(), {
     method,
     headers,
@@ -64,6 +74,17 @@ async function callSession(
       return body
     }
   }
+  // 409 from POST means the user picked a different model than their active
+  // session is bound to. Surface as a non-throw `model_locked` so the UI can
+  // show a confirmation prompt (DELETE then re-POST to switch).
+  if (resp.status === 409 && method === 'POST') {
+    const body = (await resp.json().catch(() => null)) as
+      | FreebuffSessionResponse
+      | null
+    if (body && body.status === 'model_locked') {
+      return body
+    }
+  }
   if (!resp.ok) {
     const text = await resp.text().catch(() => '')
     throw new Error(
@@ -95,6 +116,7 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null {
     case 'disabled':
     case 'superseded':
     case 'country_blocked':
+    case 'model_locked':
       return null
   }
 }
@@ -145,6 +167,41 @@ export async function refreshFreebuffSession(opts: { resetChat?: boolean } = {})
   await controller?.refresh()
 }
 
+/**
+ * User picked a different model in the waiting room. Persist the choice and
+ * re-POST so the server moves them to the back of the new model's queue. If
+ * the server has already admitted them on a different model, it responds
+ * with `model_locked`; the tick loop silently reverts the local selection to
+ * the locked model so the active session stays intact. Users who really want
+ * to switch can /end-session deliberately.
+ */
+export async function switchFreebuffModel(model: string): Promise<void> {
+  if (!IS_FREEBUFF) return
+  const { setSelectedModel } = useFreebuffModelStore.getState()
+  setSelectedModel(model)
+  await controller?.refresh()
+}
+
+/**
+ * End the current session and immediately rejoin the queue. Used by the
+ * "switch model" confirmation flow when the server returned `model_locked`,
+ * and by any UI that lets the user exit an active session early.
+ */
+export async function endAndRejoinFreebuffSession(): Promise<void> {
+  if (!IS_FREEBUFF) return
+  const { token } = getAuthTokenDetails()
+  if (!token) return
+  try {
+    await callSession('DELETE', token)
+  } catch {
+    // Best-effort — even if DELETE fails the re-POST below will eventually
+    // succeed once the server-side sweep catches up.
+  }
+  const { useChatStore } = await import('../state/chat-store')
+  useChatStore.getState().reset()
+  await controller?.refresh()
+}
+
 export function markFreebuffSessionSuperseded(): void {
   if (!IS_FREEBUFF) return
   controller?.abort()
@@ -159,6 +216,21 @@ export function markFreebuffSessionEnded(): void {
   controller?.apply({ status: 'ended' })
 }
 
+/** True when the session row represents a server-side slot the caller is
+ *  holding (queued, active, or in the post-expiry grace window with a live
+ *  instance id). DELETE only matters in those states; otherwise we'd fire a
+ *  spurious request the server has nothing to act on. */
+function shouldReleaseSlot(
+  current: FreebuffSessionResponse | null,
+): boolean {
+  if (!current) return false
+  return (
+    current.status === 'queued' ||
+    current.status === 'active' ||
+    (current.status === 'ended' && Boolean(current.instanceId))
+  )
+}
+
 /**
  * Best-effort DELETE of the caller's session row. Used by exit paths that
  * skip React unmount (process.exit on Ctrl+C) so the seat frees up quickly
@@ -167,13 +239,7 @@ export function markFreebuffSessionEnded(): void {
 export async function endFreebuffSessionBestEffort(): Promise<void> {
   if (!IS_FREEBUFF) return
   const current = useFreebuffSessionStore.getState().session
-  if (!current) return
-  // Only fire DELETE if we actually held a slot.
-  const heldSlot =
-    current.status === 'queued' ||
-    current.status === 'active' ||
-    (current.status === 'ended' && Boolean(current.instanceId))
-  if (!heldSlot) return
+  if (!shouldReleaseSlot(current)) return
   const { token } = getAuthTokenDetails()
   if (!token) return
   try {
@@ -250,14 +316,27 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
       // re-POST out from under an in-flight agent.
       const method: 'POST' | 'GET' = hasPosted ? 'GET' : 'POST'
       const instanceId = getFreebuffInstanceId()
+      const model = getSelectedFreebuffModel()
       try {
         const next = await callSession(method, token, {
           signal: abortController.signal,
           instanceId,
+          model,
         })
         if (cancelled) return
         hasPosted = true
 
+        // Race recovery: user picked a different model in the waiting room at
+        // the exact moment the server admitted them with the original model.
+        // Silently revert the local selection and re-tick so the next call
+        // (a GET) lands the actual active session. Users who really want to
+        // switch can /end-session deliberately.
+        if (next.status === 'model_locked') {
+          useFreebuffModelStore.getState().setSelectedModel(next.currentModel)
+          schedule(0)
+          return
+        }
+
         if (previousStatus === 'queued' && next.status === 'active') {
           playAdmissionSound()
         }
@@ -319,12 +398,7 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
 
       // Fire-and-forget DELETE. Only release if we actually held a slot so
       // we don't generate spurious DELETEs (e.g. HMR before POST completes).
-      if (
-        current &&
-        (current.status === 'queued' ||
-          current.status === 'active' ||
-          (current.status === 'ended' && current.instanceId))
-      ) {
+      if (shouldReleaseSlot(current)) {
         callSession('DELETE', token).catch(() => {})
       }
       setSession(null)
diff --git a/cli/src/state/freebuff-model-store.ts b/cli/src/state/freebuff-model-store.ts
new file mode 100644
index 000000000..182a38831
--- /dev/null
+++ b/cli/src/state/freebuff-model-store.ts
@@ -0,0 +1,41 @@
+import {
+  DEFAULT_FREEBUFF_MODEL_ID,
+  resolveFreebuffModel,
+} from '@codebuff/common/constants/freebuff-models'
+import { create } from 'zustand'
+
+import {
+  loadFreebuffModelPreference,
+  saveFreebuffModelPreference,
+} from '../utils/settings'
+
+/**
+ * Holds the user's currently-selected freebuff model. Initialized from the
+ * persisted settings file so freebuff defaults to whatever model the user
+ * last picked. Writing through `setSelectedModel` also persists to disk so
+ * the next launch picks it up without an explicit save call.
+ *
+ * Components in the waiting room read this to highlight the current row in
+ * the model picker; the session hook reads it to decide which queue to join.
+ */
+interface FreebuffModelStore {
+  selectedModel: string
+  setSelectedModel: (model: string) => void
+}
+
+export const useFreebuffModelStore = create<FreebuffModelStore>((set) => ({
+  selectedModel: resolveFreebuffModel(
+    loadFreebuffModelPreference() ?? DEFAULT_FREEBUFF_MODEL_ID,
+  ),
+  setSelectedModel: (model) => {
+    const resolved = resolveFreebuffModel(model)
+    saveFreebuffModelPreference(resolved)
+    set({ selectedModel: resolved })
+  },
+}))
+
+/** Imperative read for non-React callers (the session hook's tick loop and
+ *  the chat-completions metadata builder). */
+export function getSelectedFreebuffModel(): string {
+  return useFreebuffModelStore.getState().selectedModel
+}
diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts
index 203a9f7a9..59206eb84 100644
--- a/cli/src/utils/local-agent-registry.ts
+++ b/cli/src/utils/local-agent-registry.ts
@@ -7,11 +7,26 @@ import { loadLocalAgents as sdkLoadLocalAgents, loadMCPConfigSync } from '@codeb
 
 import type { MCPConfig } from '@codebuff/common/types/mcp'
 
+import { FREE_MODE_AGENT_MODELS } from '@codebuff/common/constants/free-agents'
+import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+
+import { getSelectedFreebuffModel } from '../state/freebuff-model-store'
 import { getProjectRoot } from '../project-files'
-import { AGENT_MODE_TO_ID, type AgentMode } from './constants'
+import { AGENT_MODE_TO_ID, IS_FREEBUFF, type AgentMode } from './constants'
 import { logger } from './logger'
 import * as bundledAgentsModule from '../agents/bundled-agents.generated'
 
+/** Agents whose hardcoded model gets swapped out for the user's currently
+ *  selected freebuff model. Derived from the server's
+ *  `FREE_MODE_AGENT_MODELS` — any agent whose allowlist contains every
+ *  freebuff model is safe to retarget client-side without tripping the
+ *  server's `free_mode_invalid_agent_model` rejection. */
+const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS: ReadonlySet<string> = new Set(
+  Object.entries(FREE_MODE_AGENT_MODELS)
+    .filter(([, allowed]) => FREEBUFF_MODELS.every((m) => allowed.has(m.id)))
+    .map(([agentId]) => agentId),
+)
+
 import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition'
 
 // ============================================================================
@@ -354,6 +369,20 @@ export const loadAgentDefinitions = (): AgentDefinition[] => {
     }
   }
 
+  // Override the model of free-mode agents to match the user's pick from the
+  // freebuff waiting room. Bundled definitions hardcode glm-5.1; we swap in
+  // whatever the user chose so the chat-completions request body carries the
+  // matching model and the server-side session gate doesn't reject it as a
+  // model mismatch.
+  if (IS_FREEBUFF) {
+    const selectedModel = getSelectedFreebuffModel()
+    for (const def of definitions) {
+      if (FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS.has(def.id)) {
+        def.model = selectedModel
+      }
+    }
+  }
+
   return definitions
 }
 
diff --git a/cli/src/utils/settings.ts b/cli/src/utils/settings.ts
index c469ae273..5dc901e69 100644
--- a/cli/src/utils/settings.ts
+++ b/cli/src/utils/settings.ts
@@ -1,6 +1,8 @@
 import fs from 'fs'
 import path from 'path'
 
+import { isFreebuffModelId } from '@codebuff/common/constants/freebuff-models'
+
 import { getConfigDir } from './auth'
 import { AGENT_MODES } from './constants'
 import { logger } from './logger'
@@ -20,6 +22,10 @@ const DEFAULT_SETTINGS: Settings = {
 export interface Settings {
   mode?: AgentMode
   adsEnabled?: boolean
+  /** Last model the user picked in the freebuff model selector. Restored on
+   *  next freebuff launch so users land in the queue for their preferred
+   *  model without re-picking. Persisted as the canonical model id. */
+  freebuffModel?: string
   /** @deprecated Use server-side fallbackToALaCarte setting instead */
   alwaysUseALaCarte?: boolean
   /** @deprecated Use server-side fallbackToALaCarte setting instead */
@@ -96,6 +102,12 @@ const validateSettings = (parsed: unknown): Settings => {
     settings.adsEnabled = obj.adsEnabled
   }
 
+  // Validate freebuffModel — drop unknown ids so a removed model doesn't
+  // strand the user on a non-existent queue.
+  if (typeof obj.freebuffModel === 'string' && isFreebuffModelId(obj.freebuffModel)) {
+    settings.freebuffModel = obj.freebuffModel
+  }
+
   // Validate alwaysUseALaCarte (legacy)
   if (typeof obj.alwaysUseALaCarte === 'boolean') {
     settings.alwaysUseALaCarte = obj.alwaysUseALaCarte
@@ -149,3 +161,19 @@ export const saveModePreference = (mode: AgentMode): void => {
   saveSettings({ mode })
 }
 
+/**
+ * Load the saved freebuff model preference. Returns undefined if none is
+ * saved yet — callers should fall back to DEFAULT_FREEBUFF_MODEL_ID.
+ */
+export const loadFreebuffModelPreference = (): string | undefined => {
+  return loadSettings().freebuffModel
+}
+
+/**
+ * Save the freebuff model preference. Called whenever the user picks a model
+ * in the waiting room so the next launch defaults to it.
+ */
+export const saveFreebuffModelPreference = (model: string): void => {
+  saveSettings({ freebuffModel: model })
+}
+
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
new file mode 100644
index 000000000..d71ebd619
--- /dev/null
+++ b/common/src/constants/freebuff-models.ts
@@ -0,0 +1,53 @@
+/**
+ * Models a freebuff user can pick between in the waiting-room model selector.
+ *
+ * Each model has its own queue (server keys queue position by `model`), so the
+ * list here is effectively the set of separate waiting lines. Order is the
+ * order shown in the UI.
+ */
+export interface FreebuffModelOption {
+  /** Stable ID used in the wire protocol and DB. Matches the model id passed
+   *  to the chat-completions endpoint. */
+  id: string
+  /** Short label for the selector UI. */
+  displayName: string
+  /** One-line description shown next to the label. */
+  tagline: string
+}
+
+export const FREEBUFF_MODELS = [
+  {
+    id: 'z-ai/glm-5.1',
+    displayName: 'GLM 5.1',
+    tagline: 'Smartest',
+  },
+  {
+    id: 'minimax/minimax-m2.7',
+    displayName: 'MiniMax M2.7',
+    tagline: 'Fastest',
+  },
+] as const satisfies readonly FreebuffModelOption[]
+
+export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
+
+export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_MODELS[0].id
+
+export function isFreebuffModelId(
+  id: string | null | undefined,
+): id is FreebuffModelId {
+  if (!id) return false
+  return FREEBUFF_MODELS.some((m) => m.id === id)
+}
+
+export function resolveFreebuffModel(
+  id: string | null | undefined,
+): FreebuffModelId {
+  return isFreebuffModelId(id) ? id : DEFAULT_FREEBUFF_MODEL_ID
+}
+
+export function getFreebuffModel(id: string): FreebuffModelOption {
+  return (
+    FREEBUFF_MODELS.find((m) => m.id === id) ??
+    FREEBUFF_MODELS.find((m) => m.id === DEFAULT_FREEBUFF_MODEL_ID)!
+  )
+}
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
index b2a6dabff..bb8936b41 100644
--- a/common/src/types/freebuff-session.ts
+++ b/common/src/types/freebuff-session.ts
@@ -21,15 +21,24 @@ export type FreebuffSessionServerResponse =
   | {
       status: 'queued'
       instanceId: string
-      /** 1-indexed position in the FIFO queue. */
+      /** Model the user is queued for. Each model has its own queue. */
+      model: string
+      /** 1-indexed position in the queue for `model`. */
       position: number
       queueDepth: number
+      /** Current depth of every model's queue, so the CLI can show a live
+       *  "N ahead" hint on each row of the model selector. Models with no
+       *  queued rows at snapshot time may be absent; the CLI should treat a
+       *  missing entry as 0. */
+      queueDepthByModel: Record<string, number>
       estimatedWaitMs: number
       queuedAt: string
     }
   | {
       status: 'active'
       instanceId: string
+      /** Model the active session is bound to — cannot change mid-session. */
+      model: string
       admittedAt: string
       expiresAt: string
       remainingMs: number
@@ -68,3 +77,13 @@ export type FreebuffSessionServerResponse =
       status: 'country_blocked'
       countryCode: string
     }
+  | {
+      /** User has an active session bound to a different model. Returned
+       *  from POST /session when they pick a new model without ending their
+       *  current session first. The CLI shows a confirmation prompt: "End
+       *  your active GLM session to switch?" → on confirm, DELETE then
+       *  re-POST with the new model. */
+      status: 'model_locked'
+      currentModel: string
+      requestedModel: string
+    }
diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md
index 604046715..b1384d7b6 100644
--- a/docs/freebuff-waiting-room.md
+++ b/docs/freebuff-waiting-room.md
@@ -2,13 +2,13 @@
 
 ## Overview
 
-The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployment. It has three jobs:
+The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
 
-1. **Drip-admit users** — admit at a steady trickle (default 1 per `ADMISSION_TICK_MS`, currently 15s) so load ramps up gradually rather than stampeding the deployment when the queue is long.
-2. **Gate on upstream health** — before each admission tick, probe the Fireworks metrics endpoint with a short timeout (`isFireworksAdmissible` in `web/src/server/free-session/admission.ts`). If it doesn't respond OK, admission halts until it does — this is the primary concurrency control, not a static cap.
+1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
+2. **Gate on per-deployment health** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` admit that tick; a degraded minimax-m2.7 no longer stalls glm-5.1 admissions.
 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
 
-Users who cannot be admitted immediately are placed in a FIFO queue and given an estimated wait time. Admitted users get a fixed-length session (default 1h) during which they can make free-mode requests subject to the existing per-user rate limits.
+Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
 
 The entire system is gated by the env flag `FREEBUFF_WAITING_ROOM_ENABLED`. When `false`, the gate is a no-op and the admission ticker does not start; free-mode traffic flows through unchanged.
 
@@ -33,28 +33,30 @@ flowchart LR
     SessionAPI["/api/v1/freebuff/session<br/>(GET, POST, DELETE)"]
     ChatAPI["/api/v1/chat/completions"]
     Gate[checkSessionAdmissible]
-    Ticker[Admission Ticker<br/>every 5s, 1 pod]
+    Ticker["Admission Ticker<br/>every ADMISSION_TICK_MS<br/>(all pods, per-model locks)"]
     Store[(free_session<br/>Postgres)]
-    Probe[isFireworksAdmissible<br/>Fireworks metrics GET]
+    Probe["getFleetHealth<br/>Fireworks metrics GET<br/>(cached ~25s)"]
 
-    CLI -- "POST on startup<br/>(gets instance_id)" --> SessionAPI
+    CLI -- "POST on startup<br/>(model + gets instance_id)" --> SessionAPI
     CLI -- "GET to poll state" --> SessionAPI
     CLI -- "chat requests<br/>include instance_id" --> ChatAPI
     SessionAPI --> Store
     ChatAPI --> Gate
     Gate --> Store
-    Ticker --> Store
+    Ticker -- "per-model admit" --> Store
     Ticker --> Probe
 ```
 
 ### Components
 
-- **`free_session` table** (Postgres) — single source of truth for queue + active-session state. One row per user (PK on `user_id`).
-- **Public API** (`web/src/server/free-session/public-api.ts`) — `requestSession`, `getSessionState`, `endUserSession`, `checkSessionAdmissible`. Pure business logic; DI-friendly.
-- **Store** (`web/src/server/free-session/store.ts`) — all DB ops. Transaction boundaries and advisory locks live here.
-- **Admission ticker** (`web/src/server/free-session/admission.ts`) — self-scheduling timer that runs every 5s, sweeps expired rows, and admits queued users up to capacity.
+- **`free_session` table** (Postgres) — single source of truth for queue + active-session state. One row per user (PK on `user_id`), with a `model` column recording which queue the row belongs to.
+- **Model registry** (`common/src/constants/freebuff-models.ts`) — `FREEBUFF_MODELS` is the authoritative list of selectable models. Adding a new freebuff model means adding an entry here; the admission ticker iterates this list every tick.
+- **Public API** (`web/src/server/free-session/public-api.ts`) — `requestSession`, `getSessionState`, `endUserSession`, `checkSessionAdmissible`. Pure business logic; DI-friendly. `requestSession` accepts the user's chosen `model` and can return `model_locked` when a session is already active on a different model.
+- **Store** (`web/src/server/free-session/store.ts`) — all DB ops. Transaction boundaries and per-model advisory locks live here.
+- **Fleet health probe** (`web/src/server/free-session/fireworks-health.ts`) — `getFleetHealth()` does a single HTTP GET against the Fireworks metrics endpoint and returns a `Record<modelId, 'healthy' | 'degraded' | 'unhealthy'>`. Cached ~25s (under the Fireworks 30s exporter cadence and 6 req/min rate limit). Models without a dedicated deployment in `FIREWORKS_DEPLOYMENT_MAP` (e.g. serverless) are absent from the map and treated as `healthy` at call sites.
+- **Admission ticker** (`web/src/server/free-session/admission.ts`) — self-scheduling timer that runs every `ADMISSION_TICK_MS`. Each tick sweeps expired rows once, resolves fleet health once, then admits one queued user per model in parallel (each guarded by a model-keyed advisory lock).
 - **HTTP routes** (`web/src/app/api/v1/freebuff/session/`) — thin wrappers that resolve the API key → `userId` and delegate to the public API.
-- **Chat-completions gate** (`web/src/app/api/v1/chat/completions/_post.ts`) — for free-mode requests, calls `checkSessionAdmissible(userId, claimedInstanceId)` after the rate-limit check and rejects non-admissible requests with a structured error.
+- **Chat-completions gate** (`web/src/app/api/v1/chat/completions/_post.ts`) — for free-mode requests, calls `checkSessionAdmissible(userId, claimedInstanceId)` after the rate-limit check and rejects non-admissible requests with a structured error. The admitted session's `model` is what gets sent to the upstream.
 
 ## Database Schema
 
@@ -65,6 +67,7 @@ CREATE TABLE free_session (
   user_id             text PRIMARY KEY REFERENCES "user"(id) ON DELETE CASCADE,
   status              free_session_status NOT NULL,
   active_instance_id  text NOT NULL,
+  model               text NOT NULL,
   queued_at           timestamptz NOT NULL DEFAULT now(),
   admitted_at         timestamptz,
   expires_at          timestamptz,
@@ -72,16 +75,18 @@ CREATE TABLE free_session (
   updated_at          timestamptz NOT NULL DEFAULT now()
 );
 
-CREATE INDEX idx_free_session_queue  ON free_session (status, queued_at);
+-- Per-model dequeue: WHERE status='queued' AND model=$1 ORDER BY queued_at
+CREATE INDEX idx_free_session_queue  ON free_session (status, model, queued_at);
 CREATE INDEX idx_free_session_expiry ON free_session (expires_at);
 ```
 
-Migration: `packages/internal/src/db/migrations/0043_vengeful_boomer.sql`.
+Migrations: `packages/internal/src/db/migrations/0043_vengeful_boomer.sql` (initial table) and `0044_violet_stingray.sql` (added the `model` column and rebuilt the queue index).
 
 **Design notes**
 
 - **PK on `user_id`** is the structural enforcement of "one session per account". No app-logic race can produce two rows for one user.
 - **`active_instance_id`** rotates on every `POST /session` call. This is how we enforce one-CLI-at-a-time (see [Single-instance enforcement](#single-instance-enforcement)).
+- **`model` column.** Populated by the POST handler; determines which queue the row belongs to while queued and is fixed for the life of an active session. Switching models while an active session is live is rejected (`model_locked`, 409).
 - **All timestamps server-supplied.** The client never sends `queued_at`, `admitted_at`, or `expires_at` — they are either `DEFAULT now()` or computed server-side during admission.
 - **FK CASCADE on user delete** keeps the table clean without a background job.
 
@@ -127,18 +132,26 @@ The rotation is important: it happens even if the caller is already in the `acti
 
 ## Admission Loop
 
-One pod runs the admission loop at a time, coordinated via Postgres advisory lock. All pods start a ticker on boot, but each tick acquires `pg_try_advisory_xact_lock(FREEBUFF_ADMISSION_LOCK_ID)` inside a transaction; if already held, the tick is a no-op on that pod. The lock is automatically released when the transaction commits.
+All pods start a ticker on boot. Coordination is by **per-model** Postgres advisory locks: the lock id is `FREEBUFF_ADMISSION_LOCK_ID + hashStringToInt32(model)`, so different models can admit concurrently across pods while a single model is still serialized. Each per-model attempt takes the lock inside a transaction via `pg_try_advisory_xact_lock`; if the lock is held by another pod, that model is a no-op on this pod for this tick. The lock is released automatically when the transaction commits.
 
 Each tick does (in order):
 
-1. **Sweep expired.** `DELETE FROM free_session WHERE status='active' AND expires_at < now() - grace`. Runs regardless of upstream health so zombie sessions are cleaned up even during an outage.
-2. **Admit.** `admitFromQueue()` first calls `isFireworksAdmissible()` (short-timeout GET against the Fireworks metrics endpoint). If the probe fails, returns `{ skipped: 'health' }` — admission pauses and the queue grows until recovery. Otherwise opens a transaction, takes `pg_try_advisory_xact_lock(FREEBUFF_ADMISSION_LOCK_ID)`, and `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT 1 FOR UPDATE SKIP LOCKED` → `UPDATE` the row to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. One admit per tick keeps Fireworks from a thundering herd of newly-admitted CLIs.
+1. **Sweep expired.** `DELETE FROM free_session WHERE status='active' AND expires_at < now() - grace`. Runs once per tick regardless of upstream health so zombie sessions are cleaned up even during an outage.
+2. **Fleet health probe.** `getFleetHealth()` returns a `Record<modelId, 'healthy' | 'degraded' | 'unhealthy'>`. One HTTP call per tick (cached ~25s across pods) covers every model. Deployment absent from the fleet map (serverless) defaults to `healthy` at the call site.
+3. **Admit per model, in parallel.** For each model in `FREEBUFF_MODELS`, call `admitFromQueue({ model, health, sessionLengthMs, now })`:
+   - If `health !== 'healthy'`, returns `{ admitted: [], skipped: health }` without touching Postgres — the model's queue pauses and grows until recovery.
+   - Otherwise opens a transaction, takes the per-model advisory lock, and `SELECT ... WHERE status='queued' AND model=$1 ORDER BY queued_at, user_id LIMIT 1 FOR UPDATE SKIP LOCKED` → `UPDATE` the row to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. One admit per model per tick keeps Fireworks from a thundering herd of newly-admitted CLIs.
+
+The final tick result carries a `queueDepthByModel` map and a single `skipped` reason (the first non-null skip across models) for observability.
 
 ### Tunables
 
 | Constant | Location | Default | Purpose |
 |---|---|---|---|
-| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. One user is admitted per tick. |
+| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. |
+| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `glm-5.1`, `minimax-m2.7` | Selectable models; each gets its own queue and admission slot. |
+| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | glm-5.1 only | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
+| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. |
 | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime |
 | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
 
@@ -148,12 +161,14 @@ All endpoints authenticate via the standard `Authorization: Bearer <api-key>` or
 
 ### `POST /api/v1/freebuff/session`
 
-**Called by the CLI on startup.** Idempotent. Semantics:
+**Called by the CLI on startup and whenever the user picks a different model in the waiting room.** Body: `{ "model": "<freebuff model id>" }` (optional; falls back to the default model if omitted or unknown). Idempotent. Semantics:
 
-- No existing row → create with `status='queued'`, fresh `active_instance_id`, `queued_at=now()`.
-- Existing queued row → rotate `active_instance_id`, preserve `queued_at` (no queue jump).
-- Existing active+unexpired row → rotate `active_instance_id`, preserve `status`/`admitted_at`/`expires_at`.
-- Existing active+expired row → reset to queued with fresh `queued_at` (re-queue at back).
+- No existing row → create with `status='queued'`, `model` = requested, fresh `active_instance_id`, `queued_at=now()`.
+- Existing queued row, **same model** → rotate `active_instance_id`, preserve `queued_at` (no queue jump).
+- Existing queued row, **different model** → switch `model` and reset `queued_at=now()` (move to back of the new model's queue). Rotating `active_instance_id`.
+- Existing active+unexpired row, **same model** → rotate `active_instance_id`, preserve `status`/`admitted_at`/`expires_at`.
+- Existing active+unexpired row, **different model** → reject with `model_locked` (HTTP 409); `active_instance_id` is **not** rotated so the other CLI stays valid. Client must DELETE the session before switching.
+- Existing active+expired row → reset to queued with fresh `queued_at` and the requested `model` (re-queue at back).
 
 Response shapes:
 
@@ -165,9 +180,14 @@ Response shapes:
 {
   "status": "queued",
   "instanceId": "e47…",
-  "position": 17,          // 1-indexed
-  "queueDepth": 43,
-  "estimatedWaitMs": 3600000,
+  "model": "z-ai/glm-5.1",
+  "position": 17,          // 1-indexed within this model's queue
+  "queueDepth": 43,        // size of this model's queue
+  "queueDepthByModel": {   // snapshot of every model's queue — powers the
+    "z-ai/glm-5.1": 43,    //  "N ahead" hint in the selector. Missing
+    "minimax/minimax-m2.7": 4  //  entries should be treated as 0.
+  },
+  "estimatedWaitMs": 384000,
   "queuedAt": "2026-04-17T12:00:00Z"
 }
 
@@ -175,6 +195,7 @@ Response shapes:
 {
   "status": "active",
   "instanceId": "e47…",
+  "model": "z-ai/glm-5.1",
   "admittedAt": "2026-04-17T12:00:00Z",
   "expiresAt":  "2026-04-17T13:00:00Z",
   "remainingMs": 3600000
@@ -192,6 +213,15 @@ Response shapes:
   "gracePeriodEndsAt": "2026-04-17T13:30:00Z",
   "gracePeriodRemainingMs": 1800000
 }
+
+// POST only: user asked for a different model while an active session is
+// bound to `currentModel`. HTTP 409. CLI must DELETE /session and re-POST
+// to actually switch.
+{
+  "status": "model_locked",
+  "currentModel": "z-ai/glm-5.1",
+  "requestedModel": "minimax/minimax-m2.7"
+}
 ```
 
 ### `GET /api/v1/freebuff/session`
@@ -246,29 +276,30 @@ This is a **trust-the-client** design: the server still admits requests during t
 
 ## Estimated Wait Time
 
-Computed in `session-view.ts` as a rough one-minute-per-spot-ahead estimate:
+Computed in `session-view.ts` (`WAIT_MS_PER_SPOT_AHEAD = 24_000`) as a rough per-spot estimate within the user's own model queue:
 
 ```
-waitMs = (position - 1) * 60_000
+waitMs = (position - 1) * 24_000
 ```
 
 - Position 1 → 0 (next tick admits you)
-- Position 2 → one minute, and so on.
+- Position 2 → 24s, and so on.
 
-This estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses (during a Fireworks incident admission halts entirely), so the real wait can be longer or shorter.
+`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses (during a per-deployment Fireworks incident only the affected model's queue stalls; healthy models keep draining), so the real wait can be longer or shorter.
 
 ## CLI Integration (frontend-side contract)
 
 The CLI:
 
-1. **On startup**, calls `POST /api/v1/freebuff/session`. Stores `instanceId` in memory (not on disk — startup must re-admit).
-2. **Loops while `status === 'queued'`:** polls `GET /api/v1/freebuff/session` (with `X-Freebuff-Instance-Id`) every ~5s and renders `position / queueDepth / estimatedWaitMs`.
-3. **When `status === 'active'`**, renders `remainingMs` as a countdown. Re-polls GET every ~30s to stay honest with server-side state.
-4. **When `status === 'ended'`** (the server-side draining/grace shape, with `instanceId`), hides the input and shows the Enter-to-rejoin banner while still forwarding the instance id on outgoing chat requests so in-flight agent work can finish.
-5. **When `status === 'superseded'`**, stops polling and shows the "close the other CLI" screen.
-6. **On every chat request**, includes `codebuff_metadata.freebuff_instance_id: <stored id>`.
-7. **Handles chat-gate errors:** the same statuses are reachable via the gate's 409/410/428/429 for fast in-flight feedback, and the CLI calls the matching `markFreebuff*` helper to flip local state without waiting for the next poll.
-8. **On clean exit**, calls `DELETE /api/v1/freebuff/session` so the next user can be admitted sooner.
+1. **On startup**, calls `POST /api/v1/freebuff/session` with the user's persisted model choice. Stores `instanceId` in memory (not on disk — startup must re-admit).
+2. **Loops while `status === 'queued'`:** polls `GET /api/v1/freebuff/session` (with `X-Freebuff-Instance-Id`) every ~5s and renders `position / queueDepth / estimatedWaitMs` alongside the selected model.
+3. **Model switch from the waiting room** → re-POSTs with the new model id. Server moves the row to the back of the new model's queue. If the server responds `model_locked` (we already got admitted on the old model in the meantime), the tick loop silently reverts the local selection to the locked model rather than interrupting the active session — users who really want to switch can `/end-session` deliberately.
+4. **When `status === 'active'`**, renders `remainingMs` as a countdown. Re-polls GET every ~30s to stay honest with server-side state. Chat completions use the admitted session's model for the rest of the session.
+5. **When `status === 'ended'`** (the server-side draining/grace shape, with `instanceId`), hides the input and shows the Enter-to-rejoin banner while still forwarding the instance id on outgoing chat requests so in-flight agent work can finish.
+6. **When `status === 'superseded'`**, stops polling and shows the "close the other CLI" screen.
+7. **On every chat request**, includes `codebuff_metadata.freebuff_instance_id: <stored id>`.
+8. **Handles chat-gate errors:** the same statuses are reachable via the gate's 409/410/428/429 for fast in-flight feedback, and the CLI calls the matching `markFreebuff*` helper to flip local state without waiting for the next poll.
+9. **On clean exit**, calls `DELETE /api/v1/freebuff/session` so the next user can be admitted sooner.
 
 The `disabled` response means the server has the waiting room turned off. CLI treats it identically to `active` with infinite remaining time — no countdown, and chat requests can omit `freebuff_instance_id` entirely.
 
@@ -276,7 +307,8 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
 
 - **`/api/v1/freebuff/session` routes** are stateless per pod; all state lives in Postgres. Any pod can serve any request.
 - **Chat completions gate** is a single `SELECT` per free-mode request. At high QPS this is the hottest path — the `user_id` PK lookup is O(1). If it ever becomes a problem, the obvious fix is to cache the session row for ~1s per pod.
-- **Admission loop** runs on every pod but is serialized by `pg_try_advisory_xact_lock`. At any given tick, exactly one pod actually admits; the rest early-return.
+- **Admission loop** runs on every pod. Per-model advisory locks serialize admission *within* each model while allowing different models to admit on different pods concurrently. At any given tick, exactly one pod actually admits for each model; the rest early-return on that model's lock.
+- **Fleet health probe** is cached per-pod (`HEALTH_CACHE_TTL_MS`, 25s). Each pod hits the Fireworks metrics endpoint at most ~2.4/min, staying under the 6 req/min account rate limit with a comfortable margin.
 
 ## Abuse Resistance Summary
 
@@ -288,9 +320,11 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
 | Client-forged timestamps | All timestamps server-supplied (`DEFAULT now()` or explicit) |
 | Queue jumping via timestamp manipulation | `queued_at` is server-supplied; FIFO order is server-determined |
 | Repeatedly calling POST to reset queue position | POST preserves `queued_at` for already-queued users |
-| Two pods admitting the same user | `SELECT ... FOR UPDATE SKIP LOCKED` + advisory xact lock |
-| Spamming POST/GET to starve admission tick | Admission uses Postgres advisory lock; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. |
-| Fireworks metrics endpoint down / slow | `isFireworksAdmissible()` fails closed (timeout or non-OK) → admission pauses, queue grows |
+| Two pods admitting the same user | Per-model `SELECT ... FOR UPDATE SKIP LOCKED` + per-model advisory xact lock |
+| Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. |
+| Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. |
+| Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. |
+| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded minimax-m2.7 doesn't block glm-5.1 admissions. |
 | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy |
 
 ## Testing
@@ -298,8 +332,9 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
 Pure logic covered by `web/src/server/free-session/__tests__/*.test.ts`:
 
 - `session-view.test.ts` — wait-time estimation, row→response mapping
-- `public-api.test.ts` — all status transitions via in-memory DI store
-- `admission.test.ts` — tick behaviour with mocked store + health checks
+- `public-api.test.ts` — all status transitions via in-memory DI store (including `model_locked` and cross-model switching)
+- `admission.test.ts` — tick behaviour with mocked store + per-model health (healthy/degraded/unhealthy, absent-entry-defaults-to-healthy for serverless models)
+- `fireworks-health.test.ts` — `classifyOne` decision table: KV-blocks thresholds, 5xx fraction, prefill queue p90 histogram, per-deployment independence
 
 Handler tests in `web/src/app/api/v1/freebuff/session/__tests__/session.test.ts` cover auth + request routing with a mocked `SessionDeps`.
 
diff --git a/packages/internal/src/db/migrations/0044_violet_stingray.sql b/packages/internal/src/db/migrations/0044_violet_stingray.sql
new file mode 100644
index 000000000..e6942d1d9
--- /dev/null
+++ b/packages/internal/src/db/migrations/0044_violet_stingray.sql
@@ -0,0 +1,7 @@
+DROP INDEX "idx_free_session_queue";--> statement-breakpoint
+-- Backfill any in-flight rows with the previous sole free-mode model. The
+-- column is supposed to be required going forward, so we set a temporary
+-- default to ride out the migration and drop it immediately after.
+ALTER TABLE "free_session" ADD COLUMN "model" text NOT NULL DEFAULT 'z-ai/glm-5.1';--> statement-breakpoint
+ALTER TABLE "free_session" ALTER COLUMN "model" DROP DEFAULT;--> statement-breakpoint
+CREATE INDEX "idx_free_session_queue" ON "free_session" USING btree ("status","model","queued_at");
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/0044_snapshot.json b/packages/internal/src/db/migrations/meta/0044_snapshot.json
new file mode 100644
index 000000000..847f32bba
--- /dev/null
+++ b/packages/internal/src/db/migrations/meta/0044_snapshot.json
@@ -0,0 +1,3214 @@
+{
+  "id": "108f2bd2-7ddc-4c15-b351-28f2b55d5348",
+  "prevId": "7c9172ed-5f73-4bf8-93cc-2c7e6d82a9ad",
+  "version": "7",
+  "dialect": "postgresql",
+  "tables": {
+    "public.account": {
+      "name": "account",
+      "schema": "",
+      "columns": {
+        "userId": {
+          "name": "userId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider": {
+          "name": "provider",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "providerAccountId": {
+          "name": "providerAccountId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "refresh_token": {
+          "name": "refresh_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "access_token": {
+          "name": "access_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "token_type": {
+          "name": "token_type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "scope": {
+          "name": "scope",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "id_token": {
+          "name": "id_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "session_state": {
+          "name": "session_state",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "account_userId_user_id_fk": {
+          "name": "account_userId_user_id_fk",
+          "tableFrom": "account",
+          "tableTo": "user",
+          "columnsFrom": [
+            "userId"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "account_provider_providerAccountId_pk": {
+          "name": "account_provider_providerAccountId_pk",
+          "columns": [
+            "provider",
+            "providerAccountId"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.ad_impression": {
+      "name": "ad_impression",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "ad_text": {
+          "name": "ad_text",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cta": {
+          "name": "cta",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "''"
+        },
+        "url": {
+          "name": "url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "favicon": {
+          "name": "favicon",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "click_url": {
+          "name": "click_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "imp_url": {
+          "name": "imp_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "payout": {
+          "name": "payout",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "credits_granted": {
+          "name": "credits_granted",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "grant_operation_id": {
+          "name": "grant_operation_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "served_at": {
+          "name": "served_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "impression_fired_at": {
+          "name": "impression_fired_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "clicked_at": {
+          "name": "clicked_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_ad_impression_user": {
+          "name": "idx_ad_impression_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "served_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_ad_impression_imp_url": {
+          "name": "idx_ad_impression_imp_url",
+          "columns": [
+            {
+              "expression": "imp_url",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "ad_impression_user_id_user_id_fk": {
+          "name": "ad_impression_user_id_user_id_fk",
+          "tableFrom": "ad_impression",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "ad_impression_imp_url_unique": {
+          "name": "ad_impression_imp_url_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "imp_url"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_config": {
+      "name": "agent_config",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "version": {
+          "name": "version",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "publisher_id": {
+          "name": "publisher_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "major": {
+          "name": "major",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "minor": {
+          "name": "minor",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "patch": {
+          "name": "patch",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "data": {
+          "name": "data",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_agent_config_publisher": {
+          "name": "idx_agent_config_publisher",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_config_publisher_id_publisher_id_fk": {
+          "name": "agent_config_publisher_id_publisher_id_fk",
+          "tableFrom": "agent_config",
+          "tableTo": "publisher",
+          "columnsFrom": [
+            "publisher_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "agent_config_publisher_id_id_version_pk": {
+          "name": "agent_config_publisher_id_id_version_pk",
+          "columns": [
+            "publisher_id",
+            "id",
+            "version"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_run": {
+      "name": "agent_run",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "publisher_id": {
+          "name": "publisher_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(agent_id, '/', 1)\n             ELSE NULL\n           END",
+            "type": "stored"
+          }
+        },
+        "agent_name": {
+          "name": "agent_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n             ELSE agent_id\n           END",
+            "type": "stored"
+          }
+        },
+        "agent_version": {
+          "name": "agent_version",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(agent_id, '@', 2)\n             ELSE NULL\n           END",
+            "type": "stored"
+          }
+        },
+        "ancestor_run_ids": {
+          "name": "ancestor_run_ids",
+          "type": "text[]",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "root_run_id": {
+          "name": "root_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END",
+            "type": "stored"
+          }
+        },
+        "parent_run_id": {
+          "name": "parent_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END",
+            "type": "stored"
+          }
+        },
+        "depth": {
+          "name": "depth",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)",
+            "type": "stored"
+          }
+        },
+        "duration_ms": {
+          "name": "duration_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+            "type": "stored"
+          }
+        },
+        "total_steps": {
+          "name": "total_steps",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "direct_credits": {
+          "name": "direct_credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0'"
+        },
+        "total_credits": {
+          "name": "total_credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0'"
+        },
+        "status": {
+          "name": "status",
+          "type": "agent_run_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'running'"
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_agent_run_user_id": {
+          "name": "idx_agent_run_user_id",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_parent": {
+          "name": "idx_agent_run_parent",
+          "columns": [
+            {
+              "expression": "parent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_root": {
+          "name": "idx_agent_run_root",
+          "columns": [
+            {
+              "expression": "root_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_agent_id": {
+          "name": "idx_agent_run_agent_id",
+          "columns": [
+            {
+              "expression": "agent_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_publisher": {
+          "name": "idx_agent_run_publisher",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_status": {
+          "name": "idx_agent_run_status",
+          "columns": [
+            {
+              "expression": "status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'running'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_ancestors_gin": {
+          "name": "idx_agent_run_ancestors_gin",
+          "columns": [
+            {
+              "expression": "ancestor_run_ids",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        },
+        "idx_agent_run_completed_publisher_agent": {
+          "name": "idx_agent_run_completed_publisher_agent",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_recent": {
+          "name": "idx_agent_run_completed_recent",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_version": {
+          "name": "idx_agent_run_completed_version",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_version",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_user": {
+          "name": "idx_agent_run_completed_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_run_user_id_user_id_fk": {
+          "name": "agent_run_user_id_user_id_fk",
+          "tableFrom": "agent_run",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_step": {
+      "name": "agent_step",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "agent_run_id": {
+          "name": "agent_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "step_number": {
+          "name": "step_number",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "duration_ms": {
+          "name": "duration_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+            "type": "stored"
+          }
+        },
+        "credits": {
+          "name": "credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'0'"
+        },
+        "child_run_ids": {
+          "name": "child_run_ids",
+          "type": "text[]",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "spawned_count": {
+          "name": "spawned_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "array_length(child_run_ids, 1)",
+            "type": "stored"
+          }
+        },
+        "message_id": {
+          "name": "message_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "status": {
+          "name": "status",
+          "type": "agent_step_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'completed'"
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "unique_step_number_per_run": {
+          "name": "unique_step_number_per_run",
+          "columns": [
+            {
+              "expression": "agent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "step_number",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_step_run_id": {
+          "name": "idx_agent_step_run_id",
+          "columns": [
+            {
+              "expression": "agent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_step_children_gin": {
+          "name": "idx_agent_step_children_gin",
+          "columns": [
+            {
+              "expression": "child_run_ids",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_step_agent_run_id_agent_run_id_fk": {
+          "name": "agent_step_agent_run_id_agent_run_id_fk",
+          "tableFrom": "agent_step",
+          "tableTo": "agent_run",
+          "columnsFrom": [
+            "agent_run_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.credit_ledger": {
+      "name": "credit_ledger",
+      "schema": "",
+      "columns": {
+        "operation_id": {
+          "name": "operation_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "principal": {
+          "name": "principal",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "balance": {
+          "name": "balance",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "grant_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "priority": {
+          "name": "priority",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_credit_ledger_active_balance": {
+          "name": "idx_credit_ledger_active_balance",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "balance",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "priority",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_credit_ledger_org": {
+          "name": "idx_credit_ledger_org",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_credit_ledger_subscription": {
+          "name": "idx_credit_ledger_subscription",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "credit_ledger_user_id_user_id_fk": {
+          "name": "credit_ledger_user_id_user_id_fk",
+          "tableFrom": "credit_ledger",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "credit_ledger_org_id_org_id_fk": {
+          "name": "credit_ledger_org_id_org_id_fk",
+          "tableFrom": "credit_ledger",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.encrypted_api_keys": {
+      "name": "encrypted_api_keys",
+      "schema": "",
+      "columns": {
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "api_key_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "api_key": {
+          "name": "api_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "encrypted_api_keys_user_id_user_id_fk": {
+          "name": "encrypted_api_keys_user_id_user_id_fk",
+          "tableFrom": "encrypted_api_keys",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "encrypted_api_keys_user_id_type_pk": {
+          "name": "encrypted_api_keys_user_id_type_pk",
+          "columns": [
+            "user_id",
+            "type"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.fingerprint": {
+      "name": "fingerprint",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "sig_hash": {
+          "name": "sig_hash",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.free_session": {
+      "name": "free_session",
+      "schema": "",
+      "columns": {
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "free_session_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "active_instance_id": {
+          "name": "active_instance_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "queued_at": {
+          "name": "queued_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "admitted_at": {
+          "name": "admitted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_free_session_queue": {
+          "name": "idx_free_session_queue",
+          "columns": [
+            {
+              "expression": "status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "model",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "queued_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_free_session_expiry": {
+          "name": "idx_free_session_expiry",
+          "columns": [
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "free_session_user_id_user_id_fk": {
+          "name": "free_session_user_id_user_id_fk",
+          "tableFrom": "free_session",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.git_eval_results": {
+      "name": "git_eval_results",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "cost_mode": {
+          "name": "cost_mode",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "reasoner_model": {
+          "name": "reasoner_model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "agent_model": {
+          "name": "agent_model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "metadata": {
+          "name": "metadata",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cost": {
+          "name": "cost",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "is_public": {
+          "name": "is_public",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.limit_override": {
+      "name": "limit_override",
+      "schema": "",
+      "columns": {
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "credits_per_block": {
+          "name": "credits_per_block",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "block_duration_hours": {
+          "name": "block_duration_hours",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "weekly_credit_limit": {
+          "name": "weekly_credit_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "limit_override_user_id_user_id_fk": {
+          "name": "limit_override_user_id_user_id_fk",
+          "tableFrom": "limit_override",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.message": {
+      "name": "message",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "finished_at": {
+          "name": "finished_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "client_id": {
+          "name": "client_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "client_request_id": {
+          "name": "client_request_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "request": {
+          "name": "request",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_message": {
+          "name": "last_message",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "\"message\".\"request\" -> -1",
+            "type": "stored"
+          }
+        },
+        "reasoning_text": {
+          "name": "reasoning_text",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "response": {
+          "name": "response",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "input_tokens": {
+          "name": "input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "cache_creation_input_tokens": {
+          "name": "cache_creation_input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_read_input_tokens": {
+          "name": "cache_read_input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "reasoning_tokens": {
+          "name": "reasoning_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "output_tokens": {
+          "name": "output_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cost": {
+          "name": "cost",
+          "type": "numeric(100, 20)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "credits": {
+          "name": "credits",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "byok": {
+          "name": "byok",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "latency_ms": {
+          "name": "latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "ttft_ms": {
+          "name": "ttft_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "repo_url": {
+          "name": "repo_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "message_user_id_idx": {
+          "name": "message_user_id_idx",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_finished_at_user_id_idx": {
+          "name": "message_finished_at_user_id_idx",
+          "columns": [
+            {
+              "expression": "finished_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_org_id_idx": {
+          "name": "message_org_id_idx",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_org_id_finished_at_idx": {
+          "name": "message_org_id_finished_at_idx",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "finished_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "message_user_id_user_id_fk": {
+          "name": "message_user_id_user_id_fk",
+          "tableFrom": "message",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "message_org_id_org_id_fk": {
+          "name": "message_org_id_org_id_fk",
+          "tableFrom": "message",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org": {
+      "name": "org",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "slug": {
+          "name": "slug",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "owner_id": {
+          "name": "owner_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "current_period_start": {
+          "name": "current_period_start",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "current_period_end": {
+          "name": "current_period_end",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_enabled": {
+          "name": "auto_topup_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "auto_topup_threshold": {
+          "name": "auto_topup_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "auto_topup_amount": {
+          "name": "auto_topup_amount",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "credit_limit": {
+          "name": "credit_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "billing_alerts": {
+          "name": "billing_alerts",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "usage_alerts": {
+          "name": "usage_alerts",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "weekly_reports": {
+          "name": "weekly_reports",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "org_owner_id_user_id_fk": {
+          "name": "org_owner_id_user_id_fk",
+          "tableFrom": "org",
+          "tableTo": "user",
+          "columnsFrom": [
+            "owner_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "org_slug_unique": {
+          "name": "org_slug_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "slug"
+          ]
+        },
+        "org_stripe_customer_id_unique": {
+          "name": "org_stripe_customer_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "stripe_customer_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_feature": {
+      "name": "org_feature",
+      "schema": "",
+      "columns": {
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "feature": {
+          "name": "feature",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "config": {
+          "name": "config",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "is_active": {
+          "name": "is_active",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_org_feature_active": {
+          "name": "idx_org_feature_active",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "is_active",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_feature_org_id_org_id_fk": {
+          "name": "org_feature_org_id_org_id_fk",
+          "tableFrom": "org_feature",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "org_feature_org_id_feature_pk": {
+          "name": "org_feature_org_id_feature_pk",
+          "columns": [
+            "org_id",
+            "feature"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_invite": {
+      "name": "org_invite",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "role": {
+          "name": "role",
+          "type": "org_role",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "token": {
+          "name": "token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "invited_by": {
+          "name": "invited_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "accepted_at": {
+          "name": "accepted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "accepted_by": {
+          "name": "accepted_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_org_invite_token": {
+          "name": "idx_org_invite_token",
+          "columns": [
+            {
+              "expression": "token",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_invite_email": {
+          "name": "idx_org_invite_email",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "email",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_invite_expires": {
+          "name": "idx_org_invite_expires",
+          "columns": [
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_invite_org_id_org_id_fk": {
+          "name": "org_invite_org_id_org_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_invite_invited_by_user_id_fk": {
+          "name": "org_invite_invited_by_user_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "user",
+          "columnsFrom": [
+            "invited_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "org_invite_accepted_by_user_id_fk": {
+          "name": "org_invite_accepted_by_user_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "user",
+          "columnsFrom": [
+            "accepted_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "org_invite_token_unique": {
+          "name": "org_invite_token_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "token"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_member": {
+      "name": "org_member",
+      "schema": "",
+      "columns": {
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "role": {
+          "name": "role",
+          "type": "org_role",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "joined_at": {
+          "name": "joined_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "org_member_org_id_org_id_fk": {
+          "name": "org_member_org_id_org_id_fk",
+          "tableFrom": "org_member",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_member_user_id_user_id_fk": {
+          "name": "org_member_user_id_user_id_fk",
+          "tableFrom": "org_member",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "org_member_org_id_user_id_pk": {
+          "name": "org_member_org_id_user_id_pk",
+          "columns": [
+            "org_id",
+            "user_id"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_repo": {
+      "name": "org_repo",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_url": {
+          "name": "repo_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_name": {
+          "name": "repo_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_owner": {
+          "name": "repo_owner",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "approved_by": {
+          "name": "approved_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "approved_at": {
+          "name": "approved_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "is_active": {
+          "name": "is_active",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        }
+      },
+      "indexes": {
+        "idx_org_repo_active": {
+          "name": "idx_org_repo_active",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "is_active",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_repo_unique": {
+          "name": "idx_org_repo_unique",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "repo_url",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_repo_org_id_org_id_fk": {
+          "name": "org_repo_org_id_org_id_fk",
+          "tableFrom": "org_repo",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_repo_approved_by_user_id_fk": {
+          "name": "org_repo_approved_by_user_id_fk",
+          "tableFrom": "org_repo",
+          "tableTo": "user",
+          "columnsFrom": [
+            "approved_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.publisher": {
+      "name": "publisher",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "verified": {
+          "name": "verified",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "bio": {
+          "name": "bio",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "avatar_url": {
+          "name": "avatar_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_by": {
+          "name": "created_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "publisher_user_id_user_id_fk": {
+          "name": "publisher_user_id_user_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "publisher_org_id_org_id_fk": {
+          "name": "publisher_org_id_org_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "publisher_created_by_user_id_fk": {
+          "name": "publisher_created_by_user_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "user",
+          "columnsFrom": [
+            "created_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {
+        "publisher_single_owner": {
+          "name": "publisher_single_owner",
+          "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n    (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)"
+        }
+      },
+      "isRLSEnabled": false
+    },
+    "public.referral": {
+      "name": "referral",
+      "schema": "",
+      "columns": {
+        "referrer_id": {
+          "name": "referrer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "referred_id": {
+          "name": "referred_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "referral_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'pending'"
+        },
+        "credits": {
+          "name": "credits",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "is_legacy": {
+          "name": "is_legacy",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "referral_referrer_id_user_id_fk": {
+          "name": "referral_referrer_id_user_id_fk",
+          "tableFrom": "referral",
+          "tableTo": "user",
+          "columnsFrom": [
+            "referrer_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "referral_referred_id_user_id_fk": {
+          "name": "referral_referred_id_user_id_fk",
+          "tableFrom": "referral",
+          "tableTo": "user",
+          "columnsFrom": [
+            "referred_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "referral_referrer_id_referred_id_pk": {
+          "name": "referral_referrer_id_referred_id_pk",
+          "columns": [
+            "referrer_id",
+            "referred_id"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.session": {
+      "name": "session",
+      "schema": "",
+      "columns": {
+        "sessionToken": {
+          "name": "sessionToken",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "userId": {
+          "name": "userId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires": {
+          "name": "expires",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "fingerprint_id": {
+          "name": "fingerprint_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "type": {
+          "name": "type",
+          "type": "session_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'web'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "session_userId_user_id_fk": {
+          "name": "session_userId_user_id_fk",
+          "tableFrom": "session",
+          "tableTo": "user",
+          "columnsFrom": [
+            "userId"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "session_fingerprint_id_fingerprint_id_fk": {
+          "name": "session_fingerprint_id_fingerprint_id_fk",
+          "tableFrom": "session",
+          "tableTo": "fingerprint",
+          "columnsFrom": [
+            "fingerprint_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.subscription": {
+      "name": "subscription",
+      "schema": "",
+      "columns": {
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_price_id": {
+          "name": "stripe_price_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "tier": {
+          "name": "tier",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "scheduled_tier": {
+          "name": "scheduled_tier",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "status": {
+          "name": "status",
+          "type": "subscription_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'active'"
+        },
+        "billing_period_start": {
+          "name": "billing_period_start",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "billing_period_end": {
+          "name": "billing_period_end",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cancel_at_period_end": {
+          "name": "cancel_at_period_end",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "canceled_at": {
+          "name": "canceled_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_subscription_customer": {
+          "name": "idx_subscription_customer",
+          "columns": [
+            {
+              "expression": "stripe_customer_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_subscription_user": {
+          "name": "idx_subscription_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_subscription_status": {
+          "name": "idx_subscription_status",
+          "columns": [
+            {
+              "expression": "status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"subscription\".\"status\" = 'active'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "subscription_user_id_user_id_fk": {
+          "name": "subscription_user_id_user_id_fk",
+          "tableFrom": "subscription",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.sync_failure": {
+      "name": "sync_failure",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "provider": {
+          "name": "provider",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "last_attempt_at": {
+          "name": "last_attempt_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "retry_count": {
+          "name": "retry_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 1
+        },
+        "last_error": {
+          "name": "last_error",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {
+        "idx_sync_failure_retry": {
+          "name": "idx_sync_failure_retry",
+          "columns": [
+            {
+              "expression": "retry_count",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "last_attempt_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"sync_failure\".\"retry_count\" < 5",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.user": {
+      "name": "user",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "password": {
+          "name": "password",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "emailVerified": {
+          "name": "emailVerified",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "image": {
+          "name": "image",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "next_quota_reset": {
+          "name": "next_quota_reset",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now() + INTERVAL '1 month'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "referral_code": {
+          "name": "referral_code",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'ref-' || gen_random_uuid()"
+        },
+        "referral_limit": {
+          "name": "referral_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 5
+        },
+        "discord_id": {
+          "name": "discord_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "handle": {
+          "name": "handle",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_enabled": {
+          "name": "auto_topup_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "auto_topup_threshold": {
+          "name": "auto_topup_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_amount": {
+          "name": "auto_topup_amount",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "banned": {
+          "name": "banned",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "fallback_to_a_la_carte": {
+          "name": "fallback_to_a_la_carte",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "user_email_unique": {
+          "name": "user_email_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "email"
+          ]
+        },
+        "user_stripe_customer_id_unique": {
+          "name": "user_stripe_customer_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "stripe_customer_id"
+          ]
+        },
+        "user_referral_code_unique": {
+          "name": "user_referral_code_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "referral_code"
+          ]
+        },
+        "user_discord_id_unique": {
+          "name": "user_discord_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "discord_id"
+          ]
+        },
+        "user_handle_unique": {
+          "name": "user_handle_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "handle"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.verificationToken": {
+      "name": "verificationToken",
+      "schema": "",
+      "columns": {
+        "identifier": {
+          "name": "identifier",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "token": {
+          "name": "token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires": {
+          "name": "expires",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "verificationToken_identifier_token_pk": {
+          "name": "verificationToken_identifier_token_pk",
+          "columns": [
+            "identifier",
+            "token"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    }
+  },
+  "enums": {
+    "public.referral_status": {
+      "name": "referral_status",
+      "schema": "public",
+      "values": [
+        "pending",
+        "completed"
+      ]
+    },
+    "public.agent_run_status": {
+      "name": "agent_run_status",
+      "schema": "public",
+      "values": [
+        "running",
+        "completed",
+        "failed",
+        "cancelled"
+      ]
+    },
+    "public.agent_step_status": {
+      "name": "agent_step_status",
+      "schema": "public",
+      "values": [
+        "running",
+        "completed",
+        "skipped"
+      ]
+    },
+    "public.api_key_type": {
+      "name": "api_key_type",
+      "schema": "public",
+      "values": [
+        "anthropic",
+        "gemini",
+        "openai"
+      ]
+    },
+    "public.free_session_status": {
+      "name": "free_session_status",
+      "schema": "public",
+      "values": [
+        "queued",
+        "active"
+      ]
+    },
+    "public.grant_type": {
+      "name": "grant_type",
+      "schema": "public",
+      "values": [
+        "free",
+        "referral",
+        "referral_legacy",
+        "subscription",
+        "purchase",
+        "admin",
+        "organization",
+        "ad"
+      ]
+    },
+    "public.org_role": {
+      "name": "org_role",
+      "schema": "public",
+      "values": [
+        "owner",
+        "admin",
+        "member"
+      ]
+    },
+    "public.session_type": {
+      "name": "session_type",
+      "schema": "public",
+      "values": [
+        "web",
+        "pat",
+        "cli"
+      ]
+    },
+    "public.subscription_status": {
+      "name": "subscription_status",
+      "schema": "public",
+      "values": [
+        "incomplete",
+        "incomplete_expired",
+        "trialing",
+        "active",
+        "past_due",
+        "canceled",
+        "unpaid",
+        "paused"
+      ]
+    }
+  },
+  "schemas": {},
+  "sequences": {},
+  "roles": {},
+  "policies": {},
+  "views": {},
+  "_meta": {
+    "columns": {},
+    "schemas": {},
+    "tables": {}
+  }
+}
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json
index 137086659..bba4ab5ed 100644
--- a/packages/internal/src/db/migrations/meta/_journal.json
+++ b/packages/internal/src/db/migrations/meta/_journal.json
@@ -309,6 +309,13 @@
       "when": 1776461642346,
       "tag": "0043_vengeful_boomer",
       "breakpoints": true
+    },
+    {
+      "idx": 44,
+      "version": "7",
+      "when": 1776719872222,
+      "tag": "0044_violet_stingray",
+      "breakpoints": true
     }
   ]
 }
\ No newline at end of file
diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts
index cd7762eee..ba481c89a 100644
--- a/packages/internal/src/db/schema.ts
+++ b/packages/internal/src/db/schema.ts
@@ -823,6 +823,10 @@ export const freeSession = pgTable(
       .references(() => user.id, { onDelete: 'cascade' }),
     status: freeSessionStatusEnum('status').notNull(),
     active_instance_id: text('active_instance_id').notNull(),
+    /** Which freebuff model this row is queued for / locked to. Each model has
+     *  its own queue (admission picks one queued user per model per tick) and
+     *  the model is fixed for the life of an active session. */
+    model: text('model').notNull(),
     queued_at: timestamp('queued_at', {
       mode: 'date',
       withTimezone: true,
@@ -851,8 +855,8 @@ export const freeSession = pgTable(
       .defaultNow(),
   },
   (table) => [
-    // Dequeue: SELECT ... WHERE status='queued' ORDER BY queued_at LIMIT N
-    index('idx_free_session_queue').on(table.status, table.queued_at),
+    // Per-model dequeue: WHERE status='queued' AND model=$1 ORDER BY queued_at
+    index('idx_free_session_queue').on(table.status, table.model, table.queued_at),
     // Expiry sweep: SELECT ... WHERE status='active' AND expires_at < now()
     index('idx_free_session_expiry').on(table.expires_at),
   ],
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index f3640f4a3..8809697f3 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -124,6 +124,7 @@ const STATUS_BY_GATE_CODE = {
   waiting_room_queued: 429,
   session_superseded: 409,
   session_expired: 410,
+  session_model_mismatch: 409,
   freebuff_update_required: 426,
 } satisfies Record<GateRejectCode, number>
 
@@ -394,6 +395,7 @@ export async function postChatCompletions(params: {
         userId,
         userEmail: userInfo.email,
         claimedInstanceId,
+        requestedModel: typedBody.model,
       })
       if (!gate.ok) {
         trackEvent({
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index eef464fee..3b9db7a49 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -12,6 +12,8 @@ import type { SessionDeps } from '@/server/free-session/public-api'
 import type { InternalSessionRow } from '@/server/free-session/types'
 import type { NextRequest } from 'next/server'
 
+const DEFAULT_MODEL = 'z-ai/glm-5.1'
+
 function makeReq(
   apiKey: string | null,
   opts: { instanceId?: string; cfCountry?: string } = {},
@@ -37,16 +39,24 @@ function makeSessionDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
     graceMs: 30 * 60 * 1000,
     now: () => now,
     getSessionRow: async (userId) => rows.get(userId) ?? null,
-    queueDepth: async () => [...rows.values()].filter((r) => r.status === 'queued').length,
+    queueDepthsByModel: async () => {
+      const out: Record<string, number> = {}
+      for (const r of rows.values()) {
+        if (r.status !== 'queued') continue
+        out[r.model] = (out[r.model] ?? 0) + 1
+      }
+      return out
+    },
     queuePositionFor: async () => 1,
     endSession: async (userId) => {
       rows.delete(userId)
     },
-    joinOrTakeOver: async ({ userId, now }) => {
+    joinOrTakeOver: async ({ userId, model, now }) => {
       const r: InternalSessionRow = {
         user_id: userId,
         status: 'queued',
         active_instance_id: `inst-${++instanceCounter}`,
+        model,
         queued_at: now,
         admitted_at: null,
         expires_at: null,
@@ -157,6 +167,7 @@ describe('GET /api/v1/freebuff/session', () => {
       user_id: 'u1',
       status: 'active',
       active_instance_id: 'real-id',
+      model: DEFAULT_MODEL,
       queued_at: new Date(),
       admitted_at: new Date(),
       expires_at: new Date(Date.now() + 60_000),
@@ -180,6 +191,7 @@ describe('DELETE /api/v1/freebuff/session', () => {
       user_id: 'u1',
       status: 'active',
       active_instance_id: 'x',
+      model: DEFAULT_MODEL,
       queued_at: new Date(),
       admitted_at: new Date(),
       expires_at: new Date(Date.now() + 60_000),
diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts
index 6f1ae0664..073e7522f 100644
--- a/web/src/app/api/v1/freebuff/session/_handlers.ts
+++ b/web/src/app/api/v1/freebuff/session/_handlers.ts
@@ -39,6 +39,8 @@ function countryBlockedResponse(req: NextRequest): NextResponse | null {
 /** Header the CLI uses to identify which instance is polling. Used by GET to
  *  detect when another CLI on the same account has rotated the id. */
 export const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id'
+/** Header the CLI sends on POST to pick which model's queue to join. */
+export const FREEBUFF_MODEL_HEADER = 'x-freebuff-model'
 
 export interface FreebuffSessionDeps {
   getUserInfoFromApiKey: GetUserInfoFromApiKeyFn
@@ -122,13 +124,20 @@ export async function postFreebuffSession(
   const blocked = countryBlockedResponse(req)
   if (blocked) return blocked
 
+  const requestedModel = req.headers.get(FREEBUFF_MODEL_HEADER) ?? ''
+
   try {
     const state = await requestSession({
       userId: auth.userId,
       userEmail: auth.userEmail,
+      model: requestedModel,
       deps: deps.sessionDeps,
     })
-    return NextResponse.json(state, { status: 200 })
+    // model_locked is a 409 so it's distinguishable from a normal queued/active
+    // response on the client. The CLI translates it into a "switch model?"
+    // confirmation prompt.
+    const status = state.status === 'model_locked' ? 409 : 200
+    return NextResponse.json(state, { status })
   } catch (error) {
     return serverError(deps, 'POST', auth.userId, error)
   }
diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts
index a10a29713..43fe11a4c 100644
--- a/web/src/server/free-session/__tests__/admission.test.ts
+++ b/web/src/server/free-session/__tests__/admission.test.ts
@@ -3,9 +3,10 @@ import { describe, expect, test } from 'bun:test'
 import { runAdmissionTick } from '../admission'
 
 import type { AdmissionDeps } from '../admission'
-import type { FireworksHealth } from '../fireworks-health'
+import type { FireworksHealth, FleetHealth } from '../fireworks-health'
 
 const NOW = new Date('2026-04-17T12:00:00Z')
+const TEST_MODEL = 'test-model'
 
 function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDeps & {
   calls: { admit: number }
@@ -16,10 +17,9 @@ function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDep
     sweepExpired: async () => 0,
     queueDepth: async () => 0,
     activeCount: async () => 0,
-    getFireworksHealth: async () => 'healthy',
-    admitFromQueue: async ({ getFireworksHealth }) => {
+    getFleetHealth: async () => ({}),
+    admitFromQueue: async ({ health }) => {
       calls.admit += 1
-      const health = await getFireworksHealth()
       if (health !== 'healthy') {
         return { admitted: [], skipped: health }
       }
@@ -28,11 +28,18 @@ function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDep
     sessionLengthMs: 60 * 60 * 1000,
     graceMs: 30 * 60 * 1000,
     now: () => NOW,
+    // Default to a single model so per-tick assertions (admitted: 1) stay
+    // crisp regardless of how many production models are registered.
+    models: [TEST_MODEL],
     ...overrides,
   }
   return deps
 }
 
+function fleet(health: FireworksHealth, model: string = TEST_MODEL): FleetHealth {
+  return { [model]: health }
+}
+
 describe('runAdmissionTick', () => {
   test('admits one user per tick when healthy', async () => {
     const deps = makeAdmissionDeps()
@@ -41,18 +48,18 @@ describe('runAdmissionTick', () => {
     expect(result.skipped).toBeNull()
   })
 
-  test('skips admission when Fireworks is degraded', async () => {
+  test('skips admission when the model deployment is degraded', async () => {
     const deps = makeAdmissionDeps({
-      getFireworksHealth: async () => 'degraded' as FireworksHealth,
+      getFleetHealth: async () => fleet('degraded'),
     })
     const result = await runAdmissionTick(deps)
     expect(result.admitted).toBe(0)
     expect(result.skipped).toBe('degraded')
   })
 
-  test('skips admission when Fireworks is unhealthy', async () => {
+  test('skips admission when the model deployment is unhealthy', async () => {
     const deps = makeAdmissionDeps({
-      getFireworksHealth: async () => 'unhealthy' as FireworksHealth,
+      getFleetHealth: async () => fleet('unhealthy'),
     })
     const result = await runAdmissionTick(deps)
     expect(result.admitted).toBe(0)
@@ -66,13 +73,38 @@ describe('runAdmissionTick', () => {
         swept = 3
         return 3
       },
-      getFireworksHealth: async () => 'unhealthy' as FireworksHealth,
+      getFleetHealth: async () => fleet('unhealthy'),
     })
     const result = await runAdmissionTick(deps)
     expect(swept).toBe(3)
     expect(result.expired).toBe(3)
   })
 
+  test('admits per-model based on per-deployment health', async () => {
+    // Two models: 'good' is healthy, 'bad' is degraded. A single tick should
+    // admit 1 from 'good' and skip 'bad', surfacing the worst skip reason.
+    const deps = makeAdmissionDeps({
+      models: ['good', 'bad'],
+      getFleetHealth: async () => ({ good: 'healthy', bad: 'degraded' }),
+    })
+    const result = await runAdmissionTick(deps)
+    expect(result.admitted).toBe(1)
+    expect(result.skipped).toBe('degraded')
+  })
+
+  test('absent fleet entry defaults to healthy (serverless model)', async () => {
+    // Model isn't in the fleet map (e.g. served via Fireworks serverless).
+    // Admission should proceed rather than stall waiting for a probe that
+    // will never include this deployment.
+    const deps = makeAdmissionDeps({
+      models: ['serverless-model'],
+      getFleetHealth: async () => ({}),
+    })
+    const result = await runAdmissionTick(deps)
+    expect(result.admitted).toBe(1)
+    expect(result.skipped).toBeNull()
+  })
+
   test('propagates expiry count and admit count together', async () => {
     const deps = makeAdmissionDeps({
       sweepExpired: async () => 2,
diff --git a/web/src/server/free-session/__tests__/fireworks-health.test.ts b/web/src/server/free-session/__tests__/fireworks-health.test.ts
index 3475769cd..b05fe8df9 100644
--- a/web/src/server/free-session/__tests__/fireworks-health.test.ts
+++ b/web/src/server/free-session/__tests__/fireworks-health.test.ts
@@ -4,7 +4,7 @@ import {
   KV_BLOCKS_DEGRADED_FRACTION,
   KV_BLOCKS_UNHEALTHY_FRACTION,
   PREFILL_QUEUE_P90_DEGRADED_MS,
-  classify,
+  classifyOne,
 } from '../fireworks-health'
 
 type PromSample = { name: string; labels: Record<string, string>; value: number }
@@ -57,7 +57,7 @@ function errors(code: string, rate: number): PromSample {
 describe('fireworks health classifier', () => {
   test('healthy when queue well under the threshold', () => {
     const samples: PromSample[] = [kvBlocks(0.5), ...prefillQueueBuckets(150)]
-    expect(classify(samples, [DEPLOY])).toBe('healthy')
+    expect(classifyOne(samples, DEPLOY)).toBe('healthy')
   })
 
   test('degraded when prefill queue p90 exceeds the threshold', () => {
@@ -65,7 +65,7 @@ describe('fireworks health classifier', () => {
       kvBlocks(0.5),
       ...prefillQueueBuckets(PREFILL_QUEUE_P90_DEGRADED_MS + 500),
     ]
-    expect(classify(samples, [DEPLOY])).toBe('degraded')
+    expect(classifyOne(samples, DEPLOY)).toBe('degraded')
   })
 
   test('degraded when KV blocks cross the soft threshold (leading indicator)', () => {
@@ -73,7 +73,7 @@ describe('fireworks health classifier', () => {
       kvBlocks(KV_BLOCKS_DEGRADED_FRACTION + 0.01),
       ...prefillQueueBuckets(300),
     ]
-    expect(classify(samples, [DEPLOY])).toBe('degraded')
+    expect(classifyOne(samples, DEPLOY)).toBe('degraded')
   })
 
   test('unhealthy when KV blocks exceed the backstop', () => {
@@ -81,7 +81,7 @@ describe('fireworks health classifier', () => {
       kvBlocks(KV_BLOCKS_UNHEALTHY_FRACTION + 0.005),
       ...prefillQueueBuckets(300),
     ]
-    expect(classify(samples, [DEPLOY])).toBe('unhealthy')
+    expect(classifyOne(samples, DEPLOY)).toBe('unhealthy')
   })
 
   test('unhealthy when 5xx error fraction exceeds the threshold', () => {
@@ -91,7 +91,7 @@ describe('fireworks health classifier', () => {
       requests(1),
       errors('500', 0.2),
     ]
-    expect(classify(samples, [DEPLOY])).toBe('unhealthy')
+    expect(classifyOne(samples, DEPLOY)).toBe('unhealthy')
   })
 
   test('ignores high error fraction when traffic is too low to be meaningful', () => {
@@ -101,14 +101,17 @@ describe('fireworks health classifier', () => {
       requests(0.05),
       errors('500', 0.05),
     ]
-    expect(classify(samples, [DEPLOY])).toBe('healthy')
+    expect(classifyOne(samples, DEPLOY)).toBe('healthy')
   })
 
   test('healthy with no data yet (new deployment, no events)', () => {
-    expect(classify([], [DEPLOY])).toBe('healthy')
+    expect(classifyOne([], DEPLOY)).toBe('healthy')
   })
 
-  test('worst-of across multiple deployments — unhealthy wins over degraded', () => {
+  test('classifies deployments independently — one bad deployment does not affect another', () => {
+    // The fleet probe builds the result by classifying each deployment
+    // separately, so a saturated 'other' deployment leaves DEPLOY's
+    // (only-degraded) verdict intact.
     const other = 'other123'
     const samples: PromSample[] = [
       kvBlocks(0.5),
@@ -119,6 +122,7 @@ describe('fireworks health classifier', () => {
         value: KV_BLOCKS_UNHEALTHY_FRACTION + 0.005,
       },
     ]
-    expect(classify(samples, [DEPLOY, other])).toBe('unhealthy')
+    expect(classifyOne(samples, DEPLOY)).toBe('degraded')
+    expect(classifyOne(samples, other)).toBe('unhealthy')
   })
 })
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index b19f24ea0..7585d8927 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -6,12 +6,14 @@ import {
   getSessionState,
   requestSession,
 } from '../public-api'
+import { FreeSessionModelLockedError } from '../store'
 
 import type { SessionDeps } from '../public-api'
 import type { InternalSessionRow } from '../types'
 
 const SESSION_LEN = 60 * 60 * 1000
 const GRACE_MS = 30 * 60 * 1000
+const DEFAULT_MODEL = 'z-ai/glm-5.1'
 
 function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
   rows: Map<string, InternalSessionRow>
@@ -41,15 +43,18 @@ function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
     endSession: async (userId) => {
       rows.delete(userId)
     },
-    queueDepth: async () => {
-      let n = 0
-      for (const r of rows.values()) if (r.status === 'queued') n++
-      return n
+    queueDepthsByModel: async () => {
+      const out: Record<string, number> = {}
+      for (const r of rows.values()) {
+        if (r.status !== 'queued') continue
+        out[r.model] = (out[r.model] ?? 0) + 1
+      }
+      return out
     },
-    queuePositionFor: async ({ userId, queuedAt }) => {
+    queuePositionFor: async ({ userId, model, queuedAt }) => {
       let pos = 0
       for (const r of rows.values()) {
-        if (r.status !== 'queued') continue
+        if (r.status !== 'queued' || r.model !== model) continue
         if (
           r.queued_at.getTime() < queuedAt.getTime() ||
           (r.queued_at.getTime() === queuedAt.getTime() && r.user_id <= userId)
@@ -59,7 +64,7 @@ function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
       }
       return pos
     },
-    joinOrTakeOver: async ({ userId, now }) => {
+    joinOrTakeOver: async ({ userId, model, now }) => {
       const existing = rows.get(userId)
       const nextInstance = newInstanceId()
       if (!existing) {
@@ -67,6 +72,7 @@ function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
           user_id: userId,
           status: 'queued',
           active_instance_id: nextInstance,
+          model,
           queued_at: now,
           admitted_at: null,
           expires_at: null,
@@ -81,17 +87,25 @@ function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
         existing.expires_at &&
         existing.expires_at.getTime() > now.getTime()
       ) {
+        if (existing.model !== model) {
+          throw new FreeSessionModelLockedError(existing.model)
+        }
         existing.active_instance_id = nextInstance
         existing.updated_at = now
         return existing
       }
       if (existing.status === 'queued') {
         existing.active_instance_id = nextInstance
+        if (existing.model !== model) {
+          existing.model = model
+          existing.queued_at = now
+        }
         existing.updated_at = now
         return existing
       }
       existing.status = 'queued'
       existing.active_instance_id = nextInstance
+      existing.model = model
       existing.queued_at = now
       existing.admitted_at = null
       existing.expires_at = null
@@ -111,13 +125,17 @@ describe('requestSession', () => {
 
   test('disabled flag returns { status: disabled } and does not touch DB', async () => {
     const offDeps = makeDeps({ isWaitingRoomEnabled: () => false })
-    const state = await requestSession({ userId: 'u1', deps: offDeps })
+    const state = await requestSession({
+      userId: 'u1',
+      model: DEFAULT_MODEL,
+      deps: offDeps,
+    })
     expect(state).toEqual({ status: 'disabled' })
     expect(offDeps.rows.size).toBe(0)
   })
 
   test('first call puts user in queue at position 1', async () => {
-    const state = await requestSession({ userId: 'u1', deps })
+    const state = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     expect(state.status).toBe('queued')
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.position).toBe(1)
@@ -125,18 +143,34 @@ describe('requestSession', () => {
     expect(state.instanceId).toBe('inst-1')
   })
 
+  test('queued response includes a per-model depth snapshot for the selector', async () => {
+    // Seed 2 users in glm + 1 in minimax so the returned map captures both.
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
+    deps._tick(new Date(deps._now().getTime() + 1000))
+    await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
+    deps._tick(new Date(deps._now().getTime() + 1000))
+    await requestSession({ userId: 'u3', model: 'minimax/minimax-m2.7', deps })
+
+    const state = await getSessionState({ userId: 'u1', deps })
+    if (state.status !== 'queued') throw new Error('unreachable')
+    expect(state.queueDepthByModel).toEqual({
+      [DEFAULT_MODEL]: 2,
+      'minimax/minimax-m2.7': 1,
+    })
+  })
+
   test('second call from same user rotates instance id, keeps queue position', async () => {
-    await requestSession({ userId: 'u1', deps })
-    const second = await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
+    const second = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     if (second.status !== 'queued') throw new Error('unreachable')
     expect(second.position).toBe(1)
     expect(second.instanceId).toBe('inst-2')
   })
 
   test('multiple users queue in FIFO order', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
-    await requestSession({ userId: 'u2', deps })
+    await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
 
     const s1 = await getSessionState({ userId: 'u1', deps })
     const s2 = await getSessionState({ userId: 'u2', deps })
@@ -147,13 +181,13 @@ describe('requestSession', () => {
 
   test('active unexpired session → rotate instance id, preserve active state', async () => {
     // Prime a user into active state manually.
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = deps._now()
     row.expires_at = new Date(deps._now().getTime() + SESSION_LEN)
 
-    const second = await requestSession({ userId: 'u1', deps })
+    const second = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     expect(second.status).toBe('active')
     if (second.status !== 'active') throw new Error('unreachable')
     expect(second.instanceId).not.toBe('inst-1') // rotated
@@ -178,7 +212,7 @@ describe('getSessionState', () => {
   })
 
   test('active session with matching instance id returns active', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = deps._now()
@@ -193,7 +227,7 @@ describe('getSessionState', () => {
   })
 
   test('active session with mismatched instance id returns superseded', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = deps._now()
@@ -210,7 +244,7 @@ describe('getSessionState', () => {
   test('omitted claimedInstanceId on active session returns active (read-only)', async () => {
     // Polling without an id (e.g. very first GET before POST has resolved)
     // must not be classified as superseded — only an explicit mismatch is.
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = deps._now()
@@ -221,7 +255,7 @@ describe('getSessionState', () => {
   })
 
   test('row inside grace window returns ended (with instanceId)', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000)
@@ -239,7 +273,7 @@ describe('getSessionState', () => {
   })
 
   test('row past grace window returns none', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN)
@@ -305,7 +339,7 @@ describe('checkSessionAdmissible', () => {
   })
 
   test('queued session → waiting_room_queued', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const result = await checkSessionAdmissible({
       userId: 'u1',
       claimedInstanceId: 'inst-1',
@@ -316,7 +350,7 @@ describe('checkSessionAdmissible', () => {
   })
 
   test('active + matching instance id → ok', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = deps._now()
@@ -333,7 +367,7 @@ describe('checkSessionAdmissible', () => {
   })
 
   test('active + wrong instance id → session_superseded', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = deps._now()
@@ -351,7 +385,7 @@ describe('checkSessionAdmissible', () => {
   test('missing instance id → freebuff_update_required (pre-waiting-room CLI)', async () => {
     // Classified up front regardless of row state: old clients never send an
     // id, so we surface a distinct code that maps to 426 Upgrade Required.
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = deps._now()
@@ -367,7 +401,7 @@ describe('checkSessionAdmissible', () => {
   })
 
   test('active inside grace window → ok with reason=draining', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000)
@@ -385,7 +419,7 @@ describe('checkSessionAdmissible', () => {
   })
 
   test('active past the grace window → session_expired', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN)
@@ -401,7 +435,7 @@ describe('checkSessionAdmissible', () => {
   })
 
   test('draining + wrong instance id still rejects with session_superseded', async () => {
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000)
@@ -420,7 +454,7 @@ describe('checkSessionAdmissible', () => {
 describe('endUserSession', () => {
   test('removes row', async () => {
     const deps = makeDeps()
-    await requestSession({ userId: 'u1', deps })
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     expect(deps.rows.has('u1')).toBe(true)
     await endUserSession({ userId: 'u1', deps })
     expect(deps.rows.has('u1')).toBe(false)
@@ -432,6 +466,7 @@ describe('endUserSession', () => {
       user_id: 'u1',
       status: 'active',
       active_instance_id: 'x',
+      model: DEFAULT_MODEL,
       queued_at: new Date(),
       admitted_at: null,
       expires_at: null,
diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts
index 681072b30..52dc82c12 100644
--- a/web/src/server/free-session/__tests__/session-view.test.ts
+++ b/web/src/server/free-session/__tests__/session-view.test.ts
@@ -7,12 +7,15 @@ import type { InternalSessionRow } from '../types'
 const WAIT_PER_SPOT_MS = 24_000
 const GRACE_MS = 30 * 60_000
 
+const TEST_MODEL = 'z-ai/glm-5.1'
+
 function row(overrides: Partial<InternalSessionRow> = {}): InternalSessionRow {
   const now = new Date('2026-04-17T12:00:00Z')
   return {
     user_id: 'u1',
     status: 'queued',
     active_instance_id: 'inst-1',
+    model: TEST_MODEL,
     queued_at: now,
     admitted_at: null,
     expires_at: null,
@@ -41,13 +44,13 @@ describe('toSessionStateResponse', () => {
   const now = new Date('2026-04-17T12:00:00Z')
   const baseArgs = {
     graceMs: GRACE_MS,
+    queueDepthByModel: {},
   }
 
   test('returns null when row is null', () => {
     const view = toSessionStateResponse({
       row: null,
       position: 0,
-      queueDepth: 0,
       ...baseArgs,
       now,
     })
@@ -58,15 +61,17 @@ describe('toSessionStateResponse', () => {
     const view = toSessionStateResponse({
       row: row({ status: 'queued' }),
       position: 3,
-      queueDepth: 10,
       ...baseArgs,
+      queueDepthByModel: { [TEST_MODEL]: 10, 'minimax/minimax-m2.7': 4 },
       now,
     })
     expect(view).toEqual({
       status: 'queued',
       instanceId: 'inst-1',
+      model: TEST_MODEL,
       position: 3,
       queueDepth: 10,
+      queueDepthByModel: { [TEST_MODEL]: 10, 'minimax/minimax-m2.7': 4 },
       estimatedWaitMs: 2 * WAIT_PER_SPOT_MS,
       queuedAt: now.toISOString(),
     })
@@ -78,13 +83,13 @@ describe('toSessionStateResponse', () => {
     const view = toSessionStateResponse({
       row: row({ status: 'active', admitted_at: admittedAt, expires_at: expiresAt }),
       position: 0,
-      queueDepth: 0,
       ...baseArgs,
       now,
     })
     expect(view).toEqual({
       status: 'active',
       instanceId: 'inst-1',
+      model: TEST_MODEL,
       admittedAt: admittedAt.toISOString(),
       expiresAt: expiresAt.toISOString(),
       remainingMs: 50 * 60_000,
@@ -97,7 +102,6 @@ describe('toSessionStateResponse', () => {
     const view = toSessionStateResponse({
       row: row({ status: 'active', admitted_at: admittedAt, expires_at: expiresAt }),
       position: 0,
-      queueDepth: 0,
       ...baseArgs,
       now,
     })
@@ -119,7 +123,6 @@ describe('toSessionStateResponse', () => {
         expires_at: new Date(now.getTime() - GRACE_MS - 1),
       }),
       position: 0,
-      queueDepth: 0,
       ...baseArgs,
       now,
     })
diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts
index 7c0097c70..4ec532daf 100644
--- a/web/src/server/free-session/admission.ts
+++ b/web/src/server/free-session/admission.ts
@@ -1,29 +1,34 @@
+import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+
 import {
   ADMISSION_TICK_MS,
   getSessionGraceMs,
   getSessionLengthMs,
   isWaitingRoomEnabled,
 } from './config'
-import { getFireworksHealth } from './fireworks-health'
+import { getFleetHealth } from './fireworks-health'
 import { activeCount, admitFromQueue, queueDepth, sweepExpired } from './store'
 
-import type { FireworksHealth } from './fireworks-health'
+import type { FireworksHealth, FleetHealth } from './fireworks-health'
 
 import { logger } from '@/util/logger'
 
 export interface AdmissionDeps {
   sweepExpired: (now: Date, graceMs: number) => Promise<number>
-  queueDepth: () => Promise<number>
+  queueDepth: (params: { model: string }) => Promise<number>
   activeCount: () => Promise<number>
   admitFromQueue: (params: {
+    model: string
     sessionLengthMs: number
     now: Date
-    getFireworksHealth: () => Promise<FireworksHealth>
+    health: FireworksHealth
   }) => Promise<{ admitted: { user_id: string }[]; skipped: FireworksHealth | null }>
-  getFireworksHealth: () => Promise<FireworksHealth>
+  getFleetHealth: () => Promise<FleetHealth>
   /** Plain values, not thunks — these never change at runtime. */
   sessionLengthMs: number
   graceMs: number
+  /** Models to run admission ticks for. Defaults to the full model registry. */
+  models?: readonly string[]
   now?: () => Date
 }
 
@@ -33,11 +38,13 @@ const defaultDeps: AdmissionDeps = {
   activeCount,
   admitFromQueue,
   // FREEBUFF_DEV_FORCE_ADMIT lets local `dev:freebuff` drive the full
-  // waiting-room → admitted → ended flow without a real upstream.
-  getFireworksHealth:
+  // waiting-room → admitted → ended flow without a real upstream. Returning
+  // an empty fleet means every model resolves to the absence-default of
+  // 'healthy' below.
+  getFleetHealth:
     process.env.FREEBUFF_DEV_FORCE_ADMIT === 'true'
-      ? async () => 'healthy'
-      : getFireworksHealth,
+      ? async () => ({})
+      : getFleetHealth,
   get sessionLengthMs() {
     return getSessionLengthMs()
   },
@@ -49,7 +56,8 @@ const defaultDeps: AdmissionDeps = {
 export interface AdmissionTickResult {
   expired: number
   admitted: number
-  queueDepth: number
+  /** Per-model queue depth at the end of the tick. */
+  queueDepthByModel: Record<string, number>
   activeCount: number
   skipped: FireworksHealth | null
 }
@@ -57,16 +65,15 @@ export interface AdmissionTickResult {
 /**
  * Run a single admission tick:
  *   1. Expire sessions past their expires_at + grace.
- *   2. Attempt to admit one queued user. Admission proceeds only when the
- *      upstream health probe reports `healthy`; `degraded` and `unhealthy`
- *      both pause admission so the deployment can catch up.
+ *   2. For each model, attempt to admit one queued user. Admission proceeds
+ *      only when the upstream health probe reports `healthy`; `degraded` and
+ *      `unhealthy` both pause admission so the deployment can catch up.
  *
- * Admission drips at (1 / ADMISSION_TICK_MS), which drives utilization up
- * slowly; once the probe stops returning `healthy`, step 2 halts admission
- * until the upstream recovers.
+ * Per-model admission means heavier models can sit cold without starving
+ * lighter ones. Admission still drips at (1 / ADMISSION_TICK_MS) per model.
  *
  * Returns counts for observability. Safe to call concurrently across pods —
- * admitFromQueue takes an advisory xact lock.
+ * admitFromQueue takes a per-model advisory xact lock.
  */
 export async function runAdmissionTick(
   deps: AdmissionDeps = defaultDeps,
@@ -74,20 +81,42 @@ export async function runAdmissionTick(
   const now = (deps.now ?? (() => new Date()))()
   const expired = await deps.sweepExpired(now, deps.graceMs)
 
-  const { admitted, skipped } = await deps.admitFromQueue({
-    sessionLengthMs: deps.sessionLengthMs,
-    now,
-    getFireworksHealth: deps.getFireworksHealth,
-  })
+  const models = deps.models ?? FREEBUFF_MODELS.map((m) => m.id)
+
+  // One probe per tick covers every model — the Fireworks metrics endpoint
+  // returns all deployments in a single response. Models without a dedicated
+  // deployment (e.g. serverless) aren't in the map; treat their absence as
+  // 'healthy' so admission continues. TODO: when those models move to their
+  // own deployments, drop the absence-default and require an explicit entry.
+  const fleet = await deps.getFleetHealth()
+
+  // Run per-model admission in parallel — they only contend on independent
+  // advisory locks and a single update each.
+  const perModel = await Promise.all(
+    models.map(async (model) => {
+      const health = fleet[model] ?? 'healthy'
+      const { admitted, skipped } = await deps.admitFromQueue({
+        model,
+        sessionLengthMs: deps.sessionLengthMs,
+        now,
+        health,
+      })
+      const depth = await deps.queueDepth({ model })
+      return { model, admittedCount: admitted.length, depth, skipped }
+    }),
+  )
+
+  const active = await deps.activeCount()
+  const totalAdmitted = perModel.reduce((s, r) => s + r.admittedCount, 0)
+  const queueDepthByModel = Object.fromEntries(
+    perModel.map((r) => [r.model, r.depth]),
+  )
+  const skipped = perModel.find((r) => r.skipped)?.skipped ?? null
 
-  const [depth, active] = await Promise.all([
-    deps.queueDepth(),
-    deps.activeCount(),
-  ])
   return {
     expired,
-    admitted: admitted.length,
-    queueDepth: depth,
+    admitted: totalAdmitted,
+    queueDepthByModel,
     activeCount: active,
     skipped,
   }
@@ -109,7 +138,7 @@ function runTick() {
           metric: 'freebuff_waiting_room',
           admitted: result.admitted,
           expired: result.expired,
-          queueDepth: result.queueDepth,
+          queueDepthByModel: result.queueDepthByModel,
           activeCount: result.activeCount,
           skipped: result.skipped,
         },
diff --git a/web/src/server/free-session/fireworks-health.ts b/web/src/server/free-session/fireworks-health.ts
index cef6be01c..15f1bb124 100644
--- a/web/src/server/free-session/fireworks-health.ts
+++ b/web/src/server/free-session/fireworks-health.ts
@@ -52,25 +52,35 @@ const HEALTH_CHECK_TIMEOUT_MS = 5_000
  *  pod hits the endpoint at most ~2.4/min. */
 const HEALTH_CACHE_TTL_MS = 25_000
 
-type CacheEntry = { expiresAt: number; health: FireworksHealth }
+/** Map of model id → FireworksHealth. Only includes models that have a
+ *  dedicated Fireworks deployment in `FIREWORKS_DEPLOYMENT_MAP`. Models served
+ *  via the Fireworks serverless API (no deployment id) are not present —
+ *  callers should treat their absence as 'healthy' for now.
+ *  TODO: when serverless models move to dedicated deployments, drop the
+ *        absence-means-healthy fallback at the call site. */
+export type FleetHealth = Record<string, FireworksHealth>
+
+type CacheEntry = { expiresAt: number; fleet: FleetHealth }
 let cache: CacheEntry | null = null
 
 export function __resetFireworksHealthCacheForTests(): void {
   cache = null
 }
 
-export async function getFireworksHealth(): Promise<FireworksHealth> {
+export async function getFleetHealth(): Promise<FleetHealth> {
   const now = Date.now()
-  if (cache && cache.expiresAt > now) return cache.health
+  if (cache && cache.expiresAt > now) return cache.fleet
 
-  const health = await probe()
-  cache = { expiresAt: now + HEALTH_CACHE_TTL_MS, health }
-  return health
+  const fleet = await probe()
+  cache = { expiresAt: now + HEALTH_CACHE_TTL_MS, fleet }
+  return fleet
 }
 
-async function probe(): Promise<FireworksHealth> {
+async function probe(): Promise<FleetHealth> {
   const apiKey = env.FIREWORKS_API_KEY
-  if (!apiKey) return 'unhealthy'
+  // Mark every deployment-mapped model unhealthy when we can't authenticate
+  // the probe. Serverless models (absent from the map) keep their default.
+  if (!apiKey) return allDeploymentsAt('unhealthy')
 
   const controller = new AbortController()
   const timeout = setTimeout(() => controller.abort(), HEALTH_CHECK_TIMEOUT_MS)
@@ -81,18 +91,15 @@ async function probe(): Promise<FireworksHealth> {
       headers: { Authorization: `Bearer ${apiKey}` },
       signal: controller.signal,
     })
-    if (!response.ok) return 'unhealthy'
+    if (!response.ok) return allDeploymentsAt('unhealthy')
     body = await response.text()
   } catch {
-    return 'unhealthy'
+    return allDeploymentsAt('unhealthy')
   } finally {
     clearTimeout(timeout)
   }
 
-  const deploymentIds = Object.values(FIREWORKS_DEPLOYMENT_MAP).map(
-    (name) => name.split('/').pop()!,
-  )
-  if (deploymentIds.length === 0) return 'healthy'
+  if (Object.keys(FIREWORKS_DEPLOYMENT_MAP).length === 0) return {}
 
   const { samples, newestTimestampMs } = parsePrometheus(body)
 
@@ -104,27 +111,26 @@ async function probe(): Promise<FireworksHealth> {
       { ageMs: Date.now() - newestTimestampMs },
       '[FireworksHealth] unhealthy: metrics snapshot is stale',
     )
-    return 'unhealthy'
+    return allDeploymentsAt('unhealthy')
   }
 
-  return classify(samples, deploymentIds)
+  const fleet: FleetHealth = {}
+  for (const [modelId, deploymentName] of Object.entries(FIREWORKS_DEPLOYMENT_MAP)) {
+    const deploymentId = deploymentName.split('/').pop()!
+    fleet[modelId] = classifyOne(samples, deploymentId)
+  }
+  return fleet
 }
 
-/** Treat the whole fleet as degraded/unhealthy if any single deployment is. */
-export function classify(
-  samples: PromSample[],
-  deploymentIds: string[],
-): FireworksHealth {
-  let worst: FireworksHealth = 'healthy'
-  for (const deploymentId of deploymentIds) {
-    const h = classifyOne(samples, deploymentId)
-    if (h === 'unhealthy') return 'unhealthy'
-    if (h === 'degraded') worst = 'degraded'
+function allDeploymentsAt(health: FireworksHealth): FleetHealth {
+  const out: FleetHealth = {}
+  for (const modelId of Object.keys(FIREWORKS_DEPLOYMENT_MAP)) {
+    out[modelId] = health
   }
-  return worst
+  return out
 }
 
-function classifyOne(samples: PromSample[], deploymentId: string): FireworksHealth {
+export function classifyOne(samples: PromSample[], deploymentId: string): FireworksHealth {
   const kvBlocks = scalarFor(
     samples,
     'generator_kv_blocks_fraction:avg_by_deployment',
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 74af009cc..be4506eb1 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -1,3 +1,8 @@
+import {
+  isFreebuffModelId as isSelectableFreebuffModel,
+  resolveFreebuffModel,
+} from '@codebuff/common/constants/freebuff-models'
+
 import {
   getSessionGraceMs,
   isWaitingRoomBypassedForEmail,
@@ -5,9 +10,10 @@ import {
 } from './config'
 import {
   endSession,
+  FreeSessionModelLockedError,
   getSessionRow,
   joinOrTakeOver,
-  queueDepth,
+  queueDepthsByModel,
   queuePositionFor,
 } from './store'
 import { toSessionStateResponse } from './session-view'
@@ -17,10 +23,18 @@ import type { InternalSessionRow, SessionStateResponse } from './types'
 
 export interface SessionDeps {
   getSessionRow: (userId: string) => Promise<InternalSessionRow | null>
-  joinOrTakeOver: (params: { userId: string; now: Date }) => Promise<InternalSessionRow>
+  joinOrTakeOver: (params: {
+    userId: string
+    model: string
+    now: Date
+  }) => Promise<InternalSessionRow>
   endSession: (userId: string) => Promise<void>
-  queueDepth: () => Promise<number>
-  queuePositionFor: (params: { userId: string; queuedAt: Date }) => Promise<number>
+  queueDepthsByModel: () => Promise<Record<string, number>>
+  queuePositionFor: (params: {
+    userId: string
+    model: string
+    queuedAt: Date
+  }) => Promise<number>
   isWaitingRoomEnabled: () => boolean
   /** Plain values, not getters: these never change at runtime. The deps
    *  interface uses values rather than thunks so tests can pass numbers
@@ -33,7 +47,7 @@ const defaultDeps: SessionDeps = {
   getSessionRow,
   joinOrTakeOver,
   endSession,
-  queueDepth,
+  queueDepthsByModel,
   queuePositionFor,
   isWaitingRoomEnabled,
   get graceMs() {
@@ -51,39 +65,62 @@ async function viewForRow(
   deps: SessionDeps,
   row: InternalSessionRow,
 ): Promise<SessionStateResponse | null> {
-  const [position, depth] =
+  const [position, depthsByModel] =
     row.status === 'queued'
       ? await Promise.all([
-          deps.queuePositionFor({ userId, queuedAt: row.queued_at }),
-          deps.queueDepth(),
+          deps.queuePositionFor({
+            userId,
+            model: row.model,
+            queuedAt: row.queued_at,
+          }),
+          deps.queueDepthsByModel(),
         ])
-      : [0, 0]
+      : [0, {}]
   return toSessionStateResponse({
     row,
     position,
-    queueDepth: depth,
+    queueDepthByModel: depthsByModel,
     graceMs: deps.graceMs,
     now: nowOf(deps),
   })
 }
 
+export type RequestSessionResult =
+  | SessionStateResponse
+  | {
+      /** User asked to queue/switch to a different model while their active
+       *  session is still bound to another. The CLI must end the existing
+       *  session first (DELETE /session) before re-queueing. */
+      status: 'model_locked'
+      currentModel: string
+      requestedModel: string
+    }
+
 /**
- * Client calls this on CLI startup. Semantics:
- *   - Waiting room disabled → { status: 'disabled' }
- *   - No existing session → create queued row, fresh instance_id
- *   - Existing active (unexpired) → rotate instance_id (takeover), preserve state
- *   - Existing queued → rotate instance_id, preserve queue position
- *   - Existing expired → re-queue at the back with fresh instance_id
+ * Client calls this on CLI startup with the model they want to use.
+ * Semantics:
+ *   - Waiting room disabled → { status: 'disabled' } (model still respected
+ *     downstream by chat-completions)
+ *   - No existing session → create queued row for `model`, fresh instance_id
+ *   - Existing active (unexpired), same model → rotate instance_id (takeover)
+ *   - Existing active (unexpired), different model → { status: 'model_locked' }
+ *   - Existing queued, same model → rotate instance_id, preserve position
+ *   - Existing queued, different model → switch to new model and join the
+ *     back of that model's queue
+ *   - Existing expired → re-queue at the back of `model`'s queue with fresh
+ *     instance_id
  *
- * `joinOrTakeOver` always returns a row that maps to a non-null view (queued
- * or active-unexpired), so the cast below is sound.
+ * `joinOrTakeOver` (when it doesn't throw) always returns a row that maps to
+ * a non-null view (queued or active-unexpired), so the cast below is sound.
  */
 export async function requestSession(params: {
   userId: string
+  model: string
   userEmail?: string | null | undefined
   deps?: SessionDeps
-}): Promise<SessionStateResponse> {
+}): Promise<RequestSessionResult> {
   const deps = params.deps ?? defaultDeps
+  const model = resolveFreebuffModel(params.model)
   if (
     !deps.isWaitingRoomEnabled() ||
     isWaitingRoomBypassedForEmail(params.userEmail)
@@ -91,7 +128,23 @@ export async function requestSession(params: {
     return { status: 'disabled' }
   }
 
-  const row = await deps.joinOrTakeOver({ userId: params.userId, now: nowOf(deps) })
+  let row: InternalSessionRow
+  try {
+    row = await deps.joinOrTakeOver({
+      userId: params.userId,
+      model,
+      now: nowOf(deps),
+    })
+  } catch (err) {
+    if (err instanceof FreeSessionModelLockedError) {
+      return {
+        status: 'model_locked',
+        currentModel: err.currentModel,
+        requestedModel: model,
+      }
+    }
+    throw err
+  }
   const view = await viewForRow(params.userId, deps, row)
   if (!view) {
     throw new Error(
@@ -171,6 +224,9 @@ export type SessionGateResult =
   | { ok: false; code: 'waiting_room_queued'; message: string }
   | { ok: false; code: 'session_superseded'; message: string }
   | { ok: false; code: 'session_expired'; message: string }
+  /** Active session locked to a different model than the one requested. The
+   *  CLI should restart its session (DELETE then POST) to switch models. */
+  | { ok: false; code: 'session_model_mismatch'; message: string }
   /** Pre-waiting-room CLI that never sends an instance id. Surfaced as a
    *  distinct code so the caller can prompt the user to restart. */
   | { ok: false; code: 'freebuff_update_required'; message: string }
@@ -190,6 +246,10 @@ export async function checkSessionAdmissible(params: {
   userId: string
   userEmail?: string | null | undefined
   claimedInstanceId: string | null | undefined
+  /** Model the chat-completions request is for. When provided, the gate
+   *  rejects requests whose model doesn't match the active session's model
+   *  so a stale CLI tab can't slip a request through under the wrong model. */
+  requestedModel?: string | null | undefined
   deps?: SessionDeps
 }): Promise<SessionGateResult> {
   const deps = params.deps ?? defaultDeps
@@ -254,6 +314,23 @@ export async function checkSessionAdmissible(params: {
     }
   }
 
+  // Reject requests for a model the session isn't bound to. Sub-agents may
+  // legitimately use other models (Gemini Flash etc.) so we only enforce this
+  // when the caller provides a requestedModel — and only against the set of
+  // selectable freebuff models (resolveFreebuffModel returns the canonical id
+  // or the default for anything outside the registry).
+  if (
+    params.requestedModel &&
+    isSelectableFreebuffModel(params.requestedModel) &&
+    params.requestedModel !== row.model
+  ) {
+    return {
+      ok: false,
+      code: 'session_model_mismatch',
+      message: `This session is bound to ${row.model}; restart freebuff to switch models.`,
+    }
+  }
+
   if (expiresAtMs > nowMs) {
     return {
       ok: true,
diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts
index 582e78814..599b44911 100644
--- a/web/src/server/free-session/session-view.ts
+++ b/web/src/server/free-session/session-view.ts
@@ -12,11 +12,13 @@ import type { InternalSessionRow, SessionStateResponse } from './types'
 export function toSessionStateResponse(params: {
   row: InternalSessionRow | null
   position: number
-  queueDepth: number
+  /** Snapshot of every model's queue depth at response time. Only consumed
+   *  by the `queued` variant — active/ended don't need the selector. */
+  queueDepthByModel: Record<string, number>
   graceMs: number
   now: Date
 }): SessionStateResponse | null {
-  const { row, position, queueDepth, graceMs, now } = params
+  const { row, position, queueDepthByModel, graceMs, now } = params
   if (!row) return null
 
   if (row.status === 'active' && row.expires_at) {
@@ -26,6 +28,7 @@ export function toSessionStateResponse(params: {
       return {
         status: 'active',
         instanceId: row.active_instance_id,
+        model: row.model,
         admittedAt: (row.admitted_at ?? row.created_at).toISOString(),
         expiresAt: row.expires_at.toISOString(),
         remainingMs: expiresAtMs - nowMs,
@@ -48,8 +51,10 @@ export function toSessionStateResponse(params: {
     return {
       status: 'queued',
       instanceId: row.active_instance_id,
+      model: row.model,
       position,
-      queueDepth,
+      queueDepth: queueDepthByModel[row.model] ?? 0,
+      queueDepthByModel,
       estimatedWaitMs: estimateWaitMs({ position }),
       queuedAt: row.queued_at.toISOString(),
     }
diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts
index 34f4ad712..b0cd22b97 100644
--- a/web/src/server/free-session/store.ts
+++ b/web/src/server/free-session/store.ts
@@ -26,21 +26,37 @@ export async function getSessionRow(
  * Join the queue (or take over an existing row with a new instance_id).
  *
  * Semantics:
- *   - If no row exists: insert status=queued, fresh instance_id, queued_at=now.
- *   - If row exists and active+unexpired: rotate instance_id (takeover),
- *     preserve status/admitted_at/expires_at.
- *   - If row exists and expired: reset to queued with fresh instance_id
- *     and fresh queued_at — effectively re-queue at the back.
- *   - If row exists and already queued: rotate instance_id, preserve
- *     queued_at so user keeps their place in line.
+ *   - If no row exists: insert status=queued for `model`, fresh instance_id,
+ *     queued_at=now.
+ *   - If row exists and active+unexpired and model matches: rotate
+ *     instance_id (takeover), preserve status/admitted_at/expires_at.
+ *   - If row exists and active+unexpired but the user picked a different
+ *     model: reject with `model_locked` — the active session is bound to the
+ *     model it was admitted with. The CLI should end the session first.
+ *   - If row exists and expired: reset to queued with fresh instance_id,
+ *     fresh queued_at, and the requested model — effectively re-queue at
+ *     the back of the new model's queue.
+ *   - If row exists and already queued: if model matches, rotate
+ *     instance_id and preserve queued_at; if model differs, switch model
+ *     and reset queued_at to now (move to back of the new queue).
  *
  * Never trusts client-supplied timestamps or instance ids.
  */
+export class FreeSessionModelLockedError extends Error {
+  constructor(public readonly currentModel: string) {
+    super(
+      `Active session is locked to model ${currentModel}; end the session before switching.`,
+    )
+    this.name = 'FreeSessionModelLockedError'
+  }
+}
+
 export async function joinOrTakeOver(params: {
   userId: string
+  model: string
   now: Date
 }): Promise<InternalSessionRow> {
-  const { userId, now } = params
+  const { userId, model, now } = params
   const nextInstanceId = newInstanceId()
 
   // postgres-js does NOT coerce raw JS Date values when they're interpolated
@@ -54,12 +70,21 @@ export async function joinOrTakeOver(params: {
   // column references resolve to the existing row.
   //
   // Decision table (pre-update state → post-update state):
-  //   no row                     → INSERT: status=queued, queued_at=now
-  //   active & expires_at > now  → rotate instance_id only (takeover)
-  //   queued                     → rotate instance_id, preserve queued_at
+  //   no row                     → INSERT: status=queued, queued_at=now,
+  //                                model=$model
+  //   active & expires_at > now  →
+  //     same model: rotate instance_id only (takeover)
+  //     diff model: throw FreeSessionModelLockedError post-fetch (we can't
+  //       easily express the reject-without-update branch in a single UPSERT;
+  //       see below)
+  //   queued, same model         → rotate instance_id, preserve queued_at
+  //   queued, diff model         → switch model, reset queued_at=now
+  //                                (move to back of new queue)
   //   active & expired           → re-queue at back: status=queued,
-  //                                queued_at=now, admitted_at/expires_at=null
+  //                                queued_at=now, model=$model,
+  //                                admitted_at/expires_at=null
   const activeUnexpired = sql`${schema.freeSession.status} = 'active' AND ${schema.freeSession.expires_at} > ${nowIso}`
+  const sameModel = sql`${schema.freeSession.model} = ${model}`
 
   const [row] = await db
     .insert(schema.freeSession)
@@ -67,6 +92,7 @@ export async function joinOrTakeOver(params: {
       user_id: userId,
       status: 'queued',
       active_instance_id: nextInstanceId,
+      model,
       queued_at: now,
       created_at: now,
       updated_at: now,
@@ -74,12 +100,24 @@ export async function joinOrTakeOver(params: {
     .onConflictDoUpdate({
       target: schema.freeSession.user_id,
       set: {
-        active_instance_id: nextInstanceId,
+        // For active+unexpired rows the instance_id only rotates if the model
+        // matches; otherwise we keep the existing id so the active session
+        // stays valid for the other CLI/tab. We then detect the mismatch
+        // post-update and throw, so the caller can return a clean error.
+        active_instance_id: sql`CASE
+          WHEN ${activeUnexpired} AND NOT (${sameModel}) THEN ${schema.freeSession.active_instance_id}
+          ELSE ${nextInstanceId}
+        END`,
         updated_at: now,
         status: sql`CASE WHEN ${activeUnexpired} THEN 'active'::free_session_status ELSE 'queued'::free_session_status END`,
+        // Keep model when active+unexpired (locked); switch otherwise.
+        model: sql`CASE
+          WHEN ${activeUnexpired} THEN ${schema.freeSession.model}
+          ELSE ${model}
+        END`,
         queued_at: sql`CASE
-          WHEN ${schema.freeSession.status} = 'queued' THEN ${schema.freeSession.queued_at}
           WHEN ${activeUnexpired} THEN ${schema.freeSession.queued_at}
+          WHEN ${schema.freeSession.status} = 'queued' AND ${sameModel} THEN ${schema.freeSession.queued_at}
           ELSE ${nowIso}
         END`,
         admitted_at: sql`CASE WHEN ${activeUnexpired} THEN ${schema.freeSession.admitted_at} ELSE NULL END`,
@@ -91,6 +129,13 @@ export async function joinOrTakeOver(params: {
   if (!row) {
     throw new Error(`joinOrTakeOver returned no row for user=${userId}`)
   }
+
+  // Active sessions are locked to their original model — surface a typed
+  // error so the public API can translate it into a structured response.
+  if (row.status === 'active' && row.model !== model) {
+    throw new FreeSessionModelLockedError(row.model)
+  }
+
   return row as InternalSessionRow
 }
 
@@ -100,14 +145,37 @@ export async function endSession(userId: string): Promise<void> {
     .where(eq(schema.freeSession.user_id, userId))
 }
 
-export async function queueDepth(): Promise<number> {
+export async function queueDepth(params: { model: string }): Promise<number> {
   const rows = await db
     .select({ n: count() })
     .from(schema.freeSession)
-    .where(eq(schema.freeSession.status, 'queued'))
+    .where(
+      and(
+        eq(schema.freeSession.status, 'queued'),
+        eq(schema.freeSession.model, params.model),
+      ),
+    )
   return Number(rows[0]?.n ?? 0)
 }
 
+/**
+ * Single-query read of queued-row counts bucketed by model. Powers the
+ * per-model "N ahead" hint in the waiting-room model selector — one round-trip
+ * covers every model's queue depth, so the UI stays cheap to refresh.
+ * Models with no queued rows are absent from the map; callers should default
+ * missing keys to 0.
+ */
+export async function queueDepthsByModel(): Promise<Record<string, number>> {
+  const rows = await db
+    .select({ model: schema.freeSession.model, n: count() })
+    .from(schema.freeSession)
+    .where(eq(schema.freeSession.status, 'queued'))
+    .groupBy(schema.freeSession.model)
+  const out: Record<string, number> = {}
+  for (const row of rows) out[row.model] = Number(row.n)
+  return out
+}
+
 export async function activeCount(): Promise<number> {
   const rows = await db
     .select({ n: count() })
@@ -118,6 +186,7 @@ export async function activeCount(): Promise<number> {
 
 export async function queuePositionFor(params: {
   userId: string
+  model: string
   queuedAt: Date
 }): Promise<number> {
   const rows = await db
@@ -126,6 +195,7 @@ export async function queuePositionFor(params: {
     .where(
       and(
         eq(schema.freeSession.status, 'queued'),
+        eq(schema.freeSession.model, params.model),
         sql`(${schema.freeSession.queued_at}, ${schema.freeSession.user_id}) <= (${params.queuedAt.toISOString()}::timestamptz, ${params.userId})`,
       ),
     )
@@ -152,34 +222,42 @@ export async function sweepExpired(now: Date, graceMs: number): Promise<number>
 }
 
 /**
- * Atomically admit one queued user, gated by the upstream health probe and
- * guarded by an advisory xact lock so only one pod admits per tick.
+ * Atomically admit one queued user for a specific model, gated by the
+ * upstream health for that model's deployment and guarded by an advisory
+ * xact lock so only one pod admits per tick (per model).
+ *
+ * Each model has its own queue; this admits the longest-waiting user from
+ * the given model's queue. Health is passed in (resolved by the caller from
+ * a single fleet probe) rather than fetched here, so a slow probe doesn't
+ * hold a Postgres connection open.
  *
  * Return semantics:
  *   - `{ admitted: [row], skipped: null }` — admitted one user
  *   - `{ admitted: [], skipped: null }` — empty queue or another pod held the lock
- *   - `{ admitted: [], skipped: 'degraded' | 'unhealthy' }` — probe blocked admission
+ *   - `{ admitted: [], skipped: 'degraded' | 'unhealthy' }` — health blocked admission
  *
  * Only `healthy` admits; `degraded` and `unhealthy` both pause admission (the
  * distinction is for observability — degraded means "upstream loaded",
- * unhealthy means "upstream unreachable or saturated"). The probe runs before
- * the transaction so a slow probe doesn't hold a Postgres connection open.
+ * unhealthy means "upstream unreachable or saturated").
  */
 export async function admitFromQueue(params: {
+  model: string
   sessionLengthMs: number
   now: Date
-  getFireworksHealth: () => Promise<FireworksHealth>
+  health: FireworksHealth
 }): Promise<{ admitted: InternalSessionRow[]; skipped: FireworksHealth | null }> {
-  const { sessionLengthMs, now, getFireworksHealth } = params
+  const { model, sessionLengthMs, now, health } = params
 
-  const health = await getFireworksHealth()
   if (health !== 'healthy') {
     return { admitted: [], skipped: health }
   }
 
   return db.transaction(async (tx) => {
+    // Per-model lock: hashing the model into the lock id lets distinct model
+    // queues admit concurrently while still serializing within a single queue.
+    const modelLockId = FREEBUFF_ADMISSION_LOCK_ID + hashStringToInt32(model)
     const lockResult = await tx.execute<{ acquired: unknown }>(
-      sql`SELECT pg_try_advisory_xact_lock(${FREEBUFF_ADMISSION_LOCK_ID}) AS acquired`,
+      sql`SELECT pg_try_advisory_xact_lock(${modelLockId}) AS acquired`,
     )
     if (
       !coerceBool(
@@ -192,7 +270,12 @@ export async function admitFromQueue(params: {
     const candidates = await tx
       .select({ user_id: schema.freeSession.user_id })
       .from(schema.freeSession)
-      .where(eq(schema.freeSession.status, 'queued'))
+      .where(
+        and(
+          eq(schema.freeSession.status, 'queued'),
+          eq(schema.freeSession.model, model),
+        ),
+      )
       .orderBy(asc(schema.freeSession.queued_at), asc(schema.freeSession.user_id))
       .limit(1)
       .for('update', { skipLocked: true })
@@ -220,3 +303,12 @@ export async function admitFromQueue(params: {
     return { admitted: admitted as InternalSessionRow[], skipped: null }
   })
 }
+
+/** Stable 31-bit hash so model-keyed advisory lock ids don't overflow int4. */
+function hashStringToInt32(s: string): number {
+  let h = 0
+  for (let i = 0; i < s.length; i++) {
+    h = (h * 31 + s.charCodeAt(i)) | 0
+  }
+  return Math.abs(h) % 0x40000000
+}
diff --git a/web/src/server/free-session/types.ts b/web/src/server/free-session/types.ts
index 2f56e2c4d..f46a3ad52 100644
--- a/web/src/server/free-session/types.ts
+++ b/web/src/server/free-session/types.ts
@@ -15,6 +15,8 @@ export interface InternalSessionRow {
   user_id: string
   status: FreeSessionStatus
   active_instance_id: string
+  /** Freebuff model id this row is queued for (or locked to, once active). */
+  model: string
   queued_at: Date
   admitted_at: Date | null
   expires_at: Date | null