diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts
index 030857c8d..36d6b75c5 100644
--- a/agents/__tests__/editor.test.ts
+++ b/agents/__tests__/editor.test.ts
@@ -67,6 +67,11 @@ describe('editor agent', () => {
       expect(glmEditor.model).toBe('z-ai/glm-5.1')
     })
 
+    test('creates minimax editor', () => {
+      const minimaxEditor = createCodeEditor({ model: 'minimax' })
+      expect(minimaxEditor.model).toBe('minimax/minimax-m2.7')
+    })
+
     test('gpt-5 editor does not include think tags in instructions', () => {
       const gpt5Editor = createCodeEditor({ model: 'gpt-5' })
       expect(gpt5Editor.instructionsPrompt).not.toContain('<think>')
@@ -79,6 +84,12 @@ describe('editor agent', () => {
       expect(glmEditor.instructionsPrompt).not.toContain('</think>')
     })
 
+    test('minimax editor does not include think tags in instructions', () => {
+      const minimaxEditor = createCodeEditor({ model: 'minimax' })
+      expect(minimaxEditor.instructionsPrompt).not.toContain('<think>')
+      expect(minimaxEditor.instructionsPrompt).not.toContain('</think>')
+    })
+
     test('opus editor includes think tags in instructions', () => {
       const opusEditor = createCodeEditor({ model: 'opus' })
       expect(opusEditor.instructionsPrompt).toContain('<think>')
diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts
index 3d208aa13..c98544d0f 100644
--- a/agents/editor/editor.ts
+++ b/agents/editor/editor.ts
@@ -4,7 +4,7 @@ import { publisher } from '../constants'
 import type { AgentDefinition } from '../types/agent-definition'
 
 export const createCodeEditor = (options: {
-  model: 'gpt-5' | 'opus' | 'glm'
+  model: 'gpt-5' | 'opus' | 'glm' | 'minimax'
 }): Omit<AgentDefinition, 'id'> => {
   const { model } = options
   return {
@@ -12,6 +12,8 @@ export const createCodeEditor = (options: {
     model:
       options.model === 'gpt-5'
         ? 'openai/gpt-5.1'
+        : options.model === 'minimax'
+          ? 'minimax/minimax-m2.7'
         : options.model === 'glm'
           ? 'z-ai/glm-5.1'
           : 'anthropic/claude-opus-4.7',
@@ -65,7 +67,7 @@ OR for new files or major rewrites:
 }
 </codebuff_tool_call>
 
-${model === 'gpt-5' || model === 'glm'
+${model === 'gpt-5' || model === 'glm' || model === 'minimax'
         ? ''
         : `Before you start writing your implementation, you should use <think> tags to think about the best way to implement the changes.
 
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
index b28a77c31..3608f3631 100644
--- a/agents/types/agent-definition.ts
+++ b/agents/types/agent-definition.ts
@@ -423,8 +423,6 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
-  | 'moonshotai/kimi-k2.5'
-  | 'moonshotai/kimi-k2.5:nitro'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/cli/src/app.tsx b/cli/src/app.tsx
index 0661d7d3c..cac6e20ec 100644
--- a/cli/src/app.tsx
+++ b/cli/src/app.tsx
@@ -380,6 +380,7 @@ const AuthedSurface = ({
   //   'queued' → waiting our turn
   //   'country_blocked' → terminal region-gate message
   //   'banned' → terminal account-banned message
+  //   'rate_limited' → hit per-model session quota; terminal for this run
   //
   // 'ended' deliberately falls through to <Chat>: the agent may still be
   // finishing work under the server-side grace period, and the chat surface
@@ -390,7 +391,8 @@ const AuthedSurface = ({
       session.status === 'queued' ||
       session.status === 'none' ||
       session.status === 'country_blocked' ||
-      session.status === 'banned')
+      session.status === 'banned' ||
+      session.status === 'rate_limited')
   ) {
     return <WaitingRoomScreen session={session} error={sessionError} />
   }
diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx
index 6663c7e1e..09727ea6e 100644
--- a/cli/src/chat.tsx
+++ b/cli/src/chat.tsx
@@ -174,7 +174,11 @@ export const Chat = ({
   })
   const hasSubscription = subscriptionData?.hasSubscription ?? false
 
-  const { adData, recordImpression } = useGravityAd({ enabled: IS_FREEBUFF || !hasSubscription })
+  const { adData, recordImpression } = useGravityAd({
+    enabled: IS_FREEBUFF || !hasSubscription,
+    provider: 'gravity',
+    fallbackProvider: 'carbon',
+  })
 
   // Set initial mode from CLI flag on mount
   useEffect(() => {
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index a33d89540..0850a0bd7 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -3,9 +3,16 @@ import { useKeyboard } from '@opentui/react'
 import React, { useCallback, useEffect, useMemo, useState } from 'react'
 
 import { Button } from './button'
-import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+import {
+  FALLBACK_FREEBUFF_MODEL_ID,
+  FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+  FREEBUFF_GLM_MODEL_ID,
+  FREEBUFF_MODELS,
+  isFreebuffModelAvailable,
+} from '@codebuff/common/constants/freebuff-models'
 
 import { joinFreebuffQueue } from '../hooks/use-freebuff-session'
+import { useNow } from '../hooks/use-now'
 import { useFreebuffModelStore } from '../state/freebuff-model-store'
 import { useFreebuffSessionStore } from '../state/freebuff-session-store'
 import { useTerminalDimensions } from '../hooks/use-terminal-dimensions'
@@ -13,6 +20,11 @@ import { useTheme } from '../hooks/use-theme'
 
 import type { KeyEvent } from '@opentui/core'
 
+const FREEBUFF_MODEL_SELECTOR_MODELS = [
+  ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
+  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
+]
+
 /**
  * Dual-purpose model picker:
  *   - Pre-chat landing (session 'none'): user hasn't joined any queue. Picking
@@ -33,7 +45,9 @@ export const FreebuffModelSelector: React.FC = () => {
   const theme = useTheme()
   const { terminalWidth } = useTerminalDimensions()
   const selectedModel = useFreebuffModelStore((s) => s.selectedModel)
+  const setSelectedModel = useFreebuffModelStore((s) => s.setSelectedModel)
   const session = useFreebuffSessionStore((s) => s.session)
+  const now = useNow(60_000)
   const [pending, setPending] = useState<string | null>(null)
   const [hoveredId, setHoveredId] = useState<string | null>(null)
   // Keyboard cursor — separate from the actually-selected model so that
@@ -45,6 +59,20 @@ export const FreebuffModelSelector: React.FC = () => {
     setFocusedId(selectedModel)
   }, [selectedModel])
 
+  useEffect(() => {
+    // Landing-screen safety net: if the in-memory selection becomes
+    // unavailable (e.g. deployment hours close while the picker is open),
+    // swap to the always-available fallback so Enter doesn't POST a model
+    // the server will immediately reject. In-memory only — the user's saved
+    // preference (e.g. GLM) is preserved for the next launch.
+    if (
+      (session?.status === 'none' || !session) &&
+      !isFreebuffModelAvailable(selectedModel, new Date(now))
+    ) {
+      setSelectedModel(FALLBACK_FREEBUFF_MODEL_ID)
+    }
+  }, [now, selectedModel, session, setSelectedModel])
+
   // Landing ('none'): depths come from the server snapshot, no "self" to
   // subtract. In-queue ('queued'): for the user's queue, "ahead" is
   // `position - 1` (themselves don't count); for every other queue, switching
@@ -85,18 +113,22 @@ export const FreebuffModelSelector: React.FC = () => {
   )
 
   // Decide row vs column layout based on whether both buttons actually fit
-  // side-by-side. Each button's inner text is "● {displayName} · {tagline}  {hint}",
+  // side-by-side. Each button's inner text is
+  // "● {displayName} · {tagline} · {hours}  {hint}",
   // plus 2 cols of border and 2 cols of padding. Buttons are separated by a
   // gap of 2. If the total exceeds the terminal width, stack vertically.
   const stackVertically = useMemo(() => {
     const BUTTON_CHROME = 4 // 2 border + 2 padding
     const GAP = 2
-    const total = FREEBUFF_MODELS.reduce((sum, model, idx) => {
+    const total = FREEBUFF_MODEL_SELECTOR_MODELS.reduce((sum, model, idx) => {
       const inner =
         2 /* indicator + space */ +
         model.displayName.length +
         3 /* " · " */ +
         model.tagline.length +
+        (model.availability === 'deployment_hours'
+          ? 3 + FREEBUFF_DEPLOYMENT_HOURS_LABEL.length
+          : 0) +
         2 /* "  " */ +
         hintWidth
       return sum + inner + BUTTON_CHROME + (idx > 0 ? GAP : 0)
@@ -115,10 +147,11 @@ export const FreebuffModelSelector: React.FC = () => {
     (modelId: string) => {
       if (pending) return
       if (modelId === committedModelId) return
+      if (!isFreebuffModelAvailable(modelId, new Date(now))) return
       setPending(modelId)
       joinFreebuffQueue(modelId).finally(() => setPending(null))
     },
-    [pending, committedModelId],
+    [pending, committedModelId, now],
   )
 
   // Tab / Shift+Tab and arrow keys move the focus highlight only; Enter or
@@ -136,25 +169,30 @@ export const FreebuffModelSelector: React.FC = () => {
         const isCommit = name === 'return' || name === 'enter' || name === 'space'
         if (!isForward && !isBackward && !isCommit) return
         if (isCommit) {
-          if (focusedId !== committedModelId) {
+          if (
+            focusedId !== committedModelId &&
+            isFreebuffModelAvailable(focusedId, new Date(now))
+          ) {
             key.preventDefault?.()
             pick(focusedId)
           }
           return
         }
-        const currentIdx = FREEBUFF_MODELS.findIndex((m) => m.id === focusedId)
+        const currentIdx = FREEBUFF_MODEL_SELECTOR_MODELS.findIndex(
+          (m) => m.id === focusedId,
+        )
         if (currentIdx === -1) return
-        const len = FREEBUFF_MODELS.length
+        const len = FREEBUFF_MODEL_SELECTOR_MODELS.length
         const nextIdx = isForward
           ? (currentIdx + 1) % len
           : (currentIdx - 1 + len) % len
-        const target = FREEBUFF_MODELS[nextIdx]
+        const target = FREEBUFF_MODEL_SELECTOR_MODELS[nextIdx]
         if (target) {
           key.preventDefault?.()
           setFocusedId(target.id)
         }
       },
-      [pending, pick, focusedId, committedModelId],
+      [pending, pick, focusedId, committedModelId, now],
     ),
   )
 
@@ -173,7 +211,7 @@ export const FreebuffModelSelector: React.FC = () => {
           alignItems: 'flex-start',
         }}
       >
-        {FREEBUFF_MODELS.map((model) => {
+        {FREEBUFF_MODEL_SELECTOR_MODELS.map((model) => {
           // 'Selected' means the dot is filled and the label is bold. On the
           // landing screen ('none') this tracks the pre-focused pick; on the
           // queued screen it tracks the model the server has us on. Either
@@ -181,15 +219,22 @@ export const FreebuffModelSelector: React.FC = () => {
           const isSelected = model.id === selectedModel
           const isHovered = hoveredId === model.id
           const isFocused = focusedId === model.id && !isSelected
+          const isAvailable = isFreebuffModelAvailable(model.id, new Date(now))
           const indicator = isSelected ? '●' : '○'
           const indicatorColor = isSelected ? theme.primary : theme.muted
-          const labelColor = isSelected ? theme.foreground : theme.muted
+          const labelColor = isSelected && isAvailable ? theme.foreground : theme.muted
           // Clickable whenever picking would actually do something — i.e.
           // anything except re-picking the queue we're already in.
-          const interactable = !pending && model.id !== committedModelId
+          const interactable = !pending && isAvailable && model.id !== committedModelId
           const ahead = aheadByModel?.[model.id]
           const hint =
-            ahead === undefined ? '' : ahead === 0 ? 'No wait' : `${ahead} ahead`
+            !isAvailable
+              ? 'Closed'
+              : ahead === undefined
+                ? ''
+                : ahead === 0
+                  ? 'No wait'
+                  : `${ahead} ahead`
 
           const borderColor = isSelected
             ? theme.primary
@@ -202,7 +247,7 @@ export const FreebuffModelSelector: React.FC = () => {
               key={model.id}
               onClick={() => {
                 setFocusedId(model.id)
-                pick(model.id)
+                if (isAvailable) pick(model.id)
               }}
               onMouseOver={() => interactable && setHoveredId(model.id)}
               onMouseOut={() => setHoveredId((curr) => (curr === model.id ? null : curr))}
@@ -223,6 +268,9 @@ export const FreebuffModelSelector: React.FC = () => {
                   {model.displayName}
                 </span>
                 <span fg={theme.muted}> · {model.tagline}</span>
+                {model.availability === 'deployment_hours' && (
+                  <span fg={theme.muted}> · {FREEBUFF_DEPLOYMENT_HOURS_LABEL}</span>
+                )}
                 <span fg={theme.muted}>  {hint.padEnd(hintWidth)}</span>
               </text>
             </Button>
diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx
index e67823f7a..f2a09022e 100644
--- a/cli/src/components/waiting-room-screen.tsx
+++ b/cli/src/components/waiting-room-screen.tsx
@@ -43,6 +43,18 @@ const formatElapsed = (ms: number): string => {
   return `${minutes}m ${seconds.toString().padStart(2, '0')}s`
 }
 
+/** "in ~3h 20m" / "in ~45 min" / "in under a minute". Used on the
+ *  rate-limited screen so users know when they can try again. */
+const formatRetryAfter = (ms: number): string => {
+  if (!Number.isFinite(ms) || ms <= 0) return 'any moment now'
+  const minutes = Math.round(ms / 60_000)
+  if (minutes < 1) return 'under a minute'
+  if (minutes < 60) return `${minutes} min`
+  const hours = Math.floor(minutes / 60)
+  const rem = minutes % 60
+  return rem === 0 ? `${hours}h` : `${hours}h ${rem}m`
+}
+
 export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
   session,
   error,
@@ -72,11 +84,12 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
   // Always enable ads in the waiting room — this is where monetization lives.
   // forceStart bypasses the "wait for first user message" gate inside the hook,
   // which would otherwise block ads here since no conversation exists yet.
-  // Uses Carbon (BuySellAds); in-chat ads still use the Gravity default.
+  // Try Gravity first, then fall back to Carbon when Gravity doesn't fill.
   const { adData, recordImpression } = useGravityAd({
     enabled: true,
     forceStart: true,
-    provider: 'carbon',
+    provider: 'gravity',
+    fallbackProvider: 'carbon',
   })
 
   useFreebuffCtrlCExit()
@@ -216,6 +229,18 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
                   <span>Elapsed  </span>
                   {formatElapsed(elapsedMs)}
                 </text>
+                {/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only
+                    rendered for rate-limited models so the Minimax queue stays
+                    clutter-free. */}
+                {session.rateLimit && (
+                  <text style={{ fg: theme.muted, alignSelf: 'flex-start' }}>
+                    <span>Sessions </span>
+                    <span fg={theme.foreground}>
+                      {session.rateLimit.recentCount} / {session.rateLimit.limit}
+                    </span>
+                    <span> used in last {session.rateLimit.windowHours}h</span>
+                  </text>
+                )}
               </box>
             </>
           )}
@@ -253,11 +278,34 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
                 ⚠ Account unavailable
               </text>
               <text style={{ fg: theme.muted, wrapMode: 'word' }}>
-                This account can't use freebuff. If you think this is a
+                This account has been suspended and can't use freebuff. If you think this is a
                 mistake, contact support@codebuff.com. Press Ctrl+C to exit.
               </text>
             </>
           )}
+
+          {/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
+              last 20h). Terminal for this run — the user can exit and come
+              back once the oldest session in the window rolls off. */}
+          {session?.status === 'rate_limited' && (
+            <>
+              <text style={{ fg: theme.secondary, marginBottom: 1 }}>
+                ⚠ Session limit reached
+              </text>
+              <text style={{ fg: theme.muted, wrapMode: 'word' }}>
+                You've used{' '}
+                <span fg={theme.foreground}>
+                  {session.recentCount} of {session.limit}
+                </span>{' '}
+                hour-long sessions on {session.model} in the last{' '}
+                {session.windowHours}h. Try again in{' '}
+                <span fg={theme.foreground}>
+                  {formatRetryAfter(session.retryAfterMs)}
+                </span>
+                . Press Ctrl+C to exit.
+              </text>
+            </>
+          )}
         </box>
       </box>
 
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
index 79deea1cf..b7a91eb1e 100644
--- a/cli/src/hooks/use-freebuff-session.ts
+++ b/cli/src/hooks/use-freebuff-session.ts
@@ -1,4 +1,8 @@
 import { env } from '@codebuff/common/env'
+import {
+  FALLBACK_FREEBUFF_MODEL_ID,
+  resolveFreebuffModel,
+} from '@codebuff/common/constants/freebuff-models'
 import { useEffect } from 'react'
 
 import {
@@ -9,6 +13,7 @@ import { useFreebuffSessionStore } from '../state/freebuff-session-store'
 import { getAuthTokenDetails } from '../utils/auth'
 import { IS_FREEBUFF } from '../utils/constants'
 import { logger } from '../utils/logger'
+import { saveFreebuffModelPreference } from '../utils/settings'
 
 import type { FreebuffSessionResponse } from '../types/freebuff-session'
 
@@ -75,14 +80,31 @@ async function callSession(
       return body
     }
   }
-  // 409 from POST means the user picked a different model than their active
-  // session is bound to. Surface as a non-throw `model_locked` so the UI can
-  // show a confirmation prompt (DELETE then re-POST to switch).
+  // 409 from POST means the selected model cannot be joined right now, either
+  // because an active session is locked to another model or because a
+  // Surface model-switch conflicts and temporary model availability closures
+  // as non-throw states.
   if (resp.status === 409 && method === 'POST') {
     const body = (await resp.json().catch(() => null)) as
       | FreebuffSessionResponse
       | null
-    if (body && body.status === 'model_locked') {
+    if (
+      body &&
+      (body.status === 'model_locked' || body.status === 'model_unavailable')
+    ) {
+      return body
+    }
+  }
+  // 429 from POST is the per-model session-quota reject (e.g. too many GLM
+  // sessions in the last 20h). Terminal for the current poll — the CLI shows
+  // a screen explaining the limit and when the user can try again. The 429
+  // status (rather than 200) keeps older CLIs in their error path so they
+  // back off instead of tight-polling an unrecognized 200 body.
+  if (resp.status === 429 && method === 'POST') {
+    const body = (await resp.json().catch(() => null)) as
+      | FreebuffSessionResponse
+      | null
+    if (body && body.status === 'rate_limited') {
       return body
     }
   }
@@ -119,6 +141,8 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null {
     case 'country_blocked':
     case 'banned':
     case 'model_locked':
+    case 'rate_limited':
+    case 'model_unavailable':
       return null
   }
 }
@@ -260,7 +284,13 @@ export function returnToFreebuffLanding(
  */
 export function joinFreebuffQueue(model: string): Promise<void> {
   if (!IS_FREEBUFF) return Promise.resolve()
-  useFreebuffModelStore.getState().setSelectedModel(model)
+  // This is the only explicit user-pick path (called from the picker on
+  // click / Enter), so persistence belongs here — and ONLY here. Server-
+  // driven flips (`model_locked`, `model_unavailable`, takeover) go
+  // through `setSelectedModel` directly, which never writes to disk.
+  const resolved = resolveFreebuffModel(model)
+  useFreebuffModelStore.getState().setSelectedModel(resolved)
+  saveFreebuffModelPreference(resolved)
   return restartFreebuffSession('rejoin')
 }
 
@@ -398,6 +428,19 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
           schedule(0)
           return
         }
+        if (next.status === 'model_unavailable') {
+          // Server says the requested model isn't available right now (e.g.
+          // GLM outside deployment hours). Flip to the always-available
+          // fallback for this run. In-memory only — `setSelectedModel`
+          // doesn't persist, so the user's saved preference (e.g. GLM)
+          // is preserved for their next launch during deployment hours.
+          useFreebuffModelStore
+            .getState()
+            .setSelectedModel(FALLBACK_FREEBUFF_MODEL_ID)
+          nextMethod = 'GET'
+          schedule(0)
+          return
+        }
 
         // Startup takeover: the initial probe GET saw we already hold a seat
         // (from a prior CLI instance). POST now to rotate our instance id so
diff --git a/cli/src/hooks/use-gravity-ad.ts b/cli/src/hooks/use-gravity-ad.ts
index e52b4bdd8..36a18faae 100644
--- a/cli/src/hooks/use-gravity-ad.ts
+++ b/cli/src/hooks/use-gravity-ad.ts
@@ -108,12 +108,15 @@ export const useGravityAd = (options?: {
   /** Skip the "wait for first user message" gate. Used by the freebuff
    *  waiting room, which has no conversation but still needs ads. */
   forceStart?: boolean
-  /** Which ad network to query. Defaults to Gravity. */
+  /** Primary ad network to query. Defaults to Gravity. */
   provider?: AdProvider
+  /** Backup ad network to try when the primary returns no fill or errors. */
+  fallbackProvider?: AdProvider
 }): GravityAdState => {
   const enabled = options?.enabled ?? true
   const forceStart = options?.forceStart ?? false
   const provider: AdProvider = options?.provider ?? 'gravity'
+  const fallbackProvider = options?.fallbackProvider
   const [ad, setAd] = useState<AdResponse | null>(null)
   const [adData, setAdData] = useState<AdData | null>(null)
   const [isLoading, setIsLoading] = useState(false)
@@ -278,49 +281,63 @@ export const useGravityAd = (options?: {
       }
     }
 
-    try {
-      const response = await fetch(`${WEBSITE_URL}/api/v1/ads`, {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          Authorization: `Bearer ${authToken}`,
-        },
-        body: JSON.stringify({
-          provider,
-          messages: adMessages,
-          sessionId: useChatStore.getState().chatSessionId,
-          device: getDeviceInfo(),
-          // Carbon requires a real browser-ish useragent for targeting/fraud
-          // detection. Gravity ignores it. We source one centrally so every
-          // provider that needs it sees the same value.
-          userAgent: getAdUserAgent(),
-        }),
-      })
+    const providersToTry =
+      fallbackProvider && fallbackProvider !== provider
+        ? [provider, fallbackProvider]
+        : [provider]
 
-      if (!response.ok) {
-        logger.warn(
-          { provider, status: response.status, response: await response.json() },
-          '[ads] Web API returned error',
-        )
-        return null
-      }
+    for (const providerToTry of providersToTry) {
+      try {
+        const response = await fetch(`${WEBSITE_URL}/api/v1/ads`, {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+            Authorization: `Bearer ${authToken}`,
+          },
+          body: JSON.stringify({
+            provider: providerToTry,
+            messages: adMessages,
+            sessionId: useChatStore.getState().chatSessionId,
+            device: getDeviceInfo(),
+            // Carbon requires a real browser-ish useragent for targeting/fraud
+            // detection. Gravity ignores it. We source one centrally so every
+            // provider that needs it sees the same value.
+            userAgent: getAdUserAgent(),
+          }),
+        })
 
-      const data = await response.json()
-      const variant = data.variant ?? 'banner'
+        if (!response.ok) {
+          logger.warn(
+            {
+              provider: providerToTry,
+              status: response.status,
+              response: await response.json(),
+            },
+            '[ads] Web API returned error',
+          )
+          continue
+        }
 
-      if (variant === 'choice' && Array.isArray(data.ads) && data.ads.length > 0) {
-        return { variant: 'choice', ads: data.ads as AdResponse[] }
-      }
+        const data = await response.json()
+        const variant = data.variant ?? 'banner'
 
-      if (data.ad) {
-        return { variant: 'banner', ad: data.ad as AdResponse }
-      }
+        if (
+          variant === 'choice' &&
+          Array.isArray(data.ads) &&
+          data.ads.length > 0
+        ) {
+          return { variant: 'choice', ads: data.ads as AdResponse[] }
+        }
 
-      return null
-    } catch (err) {
-      logger.error({ err }, '[ads] Failed to fetch ad')
-      return null
+        if (data.ad) {
+          return { variant: 'banner', ad: data.ad as AdResponse }
+        }
+      } catch (err) {
+        logger.error({ err, provider: providerToTry }, '[ads] Failed to fetch ad')
+      }
     }
+
+    return null
   }
 
   // Update tick function (uses ref to avoid useCallback dependency issues)
@@ -413,7 +430,7 @@ export const useGravityAd = (options?: {
       clearInterval(id)
       ctrlRef.current.intervalId = null
     }
-  }, [shouldStart, shouldHideAds])
+  }, [shouldStart, shouldHideAds, provider, fallbackProvider])
 
   // Don't return ad when ads should be hidden
   const visible = shouldStart && !shouldHideAds
diff --git a/cli/src/state/freebuff-model-store.ts b/cli/src/state/freebuff-model-store.ts
index 182a38831..c602d8464 100644
--- a/cli/src/state/freebuff-model-store.ts
+++ b/cli/src/state/freebuff-model-store.ts
@@ -1,19 +1,23 @@
 import {
   DEFAULT_FREEBUFF_MODEL_ID,
+  resolveAvailableFreebuffModel,
   resolveFreebuffModel,
 } from '@codebuff/common/constants/freebuff-models'
 import { create } from 'zustand'
 
-import {
-  loadFreebuffModelPreference,
-  saveFreebuffModelPreference,
-} from '../utils/settings'
+import { loadFreebuffModelPreference } from '../utils/settings'
 
 /**
  * Holds the user's currently-selected freebuff model. Initialized from the
  * persisted settings file so freebuff defaults to whatever model the user
- * last picked. Writing through `setSelectedModel` also persists to disk so
- * the next launch picks it up without an explicit save call.
+ * last picked.
+ *
+ * `setSelectedModel` is in-memory only — it does NOT persist. Persistence
+ * happens exclusively in `joinFreebuffQueue` (the explicit-pick path), so
+ * server-driven auto-flips (`model_locked`, `model_unavailable`, takeover)
+ * can update the in-memory selection without overwriting the user's saved
+ * preference. The latter previously caused users to get permanently flipped
+ * to the fallback model after a single auto-fallback.
  *
  * Components in the waiting room read this to highlight the current row in
  * the model picker; the session hook reads it to decide which queue to join.
@@ -24,14 +28,11 @@ interface FreebuffModelStore {
 }
 
 export const useFreebuffModelStore = create<FreebuffModelStore>((set) => ({
-  selectedModel: resolveFreebuffModel(
+  selectedModel: resolveAvailableFreebuffModel(
     loadFreebuffModelPreference() ?? DEFAULT_FREEBUFF_MODEL_ID,
   ),
-  setSelectedModel: (model) => {
-    const resolved = resolveFreebuffModel(model)
-    saveFreebuffModelPreference(resolved)
-    set({ selectedModel: resolved })
-  },
+  setSelectedModel: (model) =>
+    set({ selectedModel: resolveFreebuffModel(model) }),
 }))
 
 /** Imperative read for non-React callers (the session hook's tick loop and
diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts
index 59206eb84..6106b3928 100644
--- a/cli/src/utils/local-agent-registry.ts
+++ b/cli/src/utils/local-agent-registry.ts
@@ -370,7 +370,7 @@ export const loadAgentDefinitions = (): AgentDefinition[] => {
   }
 
   // Override the model of free-mode agents to match the user's pick from the
-  // freebuff waiting room. Bundled definitions hardcode glm-5.1; we swap in
+  // freebuff waiting room. Bundled definitions hardcode a free model; we swap in
   // whatever the user chose so the chat-completions request body carries the
   // matching model and the server-side session gate doesn't reject it as a
   // model mismatch.
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index e44c74cc6..308e12df6 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -26,7 +26,10 @@ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const
  */
 export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
-  'base2-free': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+  'base2-free': new Set([
+    'minimax/minimax-m2.7',
+    'z-ai/glm-5.1',
+  ]),
 
   // File exploration agents
   'file-picker': new Set(['google/gemini-2.5-flash-lite']),
@@ -41,10 +44,16 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   'basher': new Set(['google/gemini-3.1-flash-lite-preview']),
 
   // Editor for free mode
-  'editor-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+  'editor-lite': new Set([
+    'minimax/minimax-m2.7',
+    'z-ai/glm-5.1',
+  ]),
 
   // Code reviewer for free mode
-  'code-reviewer-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+  'code-reviewer-lite': new Set([
+    'minimax/minimax-m2.7',
+    'z-ai/glm-5.1',
+  ]),
 }
 
 /**
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index d71ebd619..2e1ef8d8e 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -13,24 +13,43 @@ export interface FreebuffModelOption {
   displayName: string
   /** One-line description shown next to the label. */
   tagline: string
+  /** Availability policy for the selector and server-side admission. */
+  availability: 'always' | 'deployment_hours'
 }
 
+export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT'
+export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
+export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
+
 export const FREEBUFF_MODELS = [
   {
-    id: 'z-ai/glm-5.1',
-    displayName: 'GLM 5.1',
-    tagline: 'Smartest',
-  },
-  {
-    id: 'minimax/minimax-m2.7',
+    id: FREEBUFF_MINIMAX_MODEL_ID,
     displayName: 'MiniMax M2.7',
     tagline: 'Fastest',
+    availability: 'always',
+  },
+  {
+    id: FREEBUFF_GLM_MODEL_ID,
+    displayName: 'GLM 5.1',
+    tagline: 'Smartest',
+    availability: 'deployment_hours',
   },
 ] as const satisfies readonly FreebuffModelOption[]
 
 export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
 
-export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_MODELS[0].id
+/** What new freebuff users see selected in the picker. May not be currently
+ *  available (GLM is closed outside deployment hours); callers that need an
+ *  always-available id for resolution / auto-fallbacks should use
+ *  FALLBACK_FREEBUFF_MODEL_ID instead. */
+export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID
+
+/** Always-available fallback used when the requested model can't be served
+ *  right now (unknown id, deployment hours closed, etc.). Kept distinct from
+ *  DEFAULT_FREEBUFF_MODEL_ID so a new user's "preferred default" can be the
+ *  smartest model without auto-flipping anyone to a closed deployment. */
+export const FALLBACK_FREEBUFF_MODEL_ID: FreebuffModelId =
+  FREEBUFF_MINIMAX_MODEL_ID
 
 export function isFreebuffModelId(
   id: string | null | undefined,
@@ -42,12 +61,58 @@ export function isFreebuffModelId(
 export function resolveFreebuffModel(
   id: string | null | undefined,
 ): FreebuffModelId {
-  return isFreebuffModelId(id) ? id : DEFAULT_FREEBUFF_MODEL_ID
+  return isFreebuffModelId(id) ? id : FALLBACK_FREEBUFF_MODEL_ID
 }
 
 export function getFreebuffModel(id: string): FreebuffModelOption {
   return (
     FREEBUFF_MODELS.find((m) => m.id === id) ??
-    FREEBUFF_MODELS.find((m) => m.id === DEFAULT_FREEBUFF_MODEL_ID)!
+    FREEBUFF_MODELS.find((m) => m.id === FALLBACK_FREEBUFF_MODEL_ID)!
   )
 }
+
+function getZonedParts(
+  date: Date,
+  timeZone: string,
+): { weekday: string; minutes: number } {
+  const parts = new Intl.DateTimeFormat('en-US', {
+    timeZone,
+    weekday: 'short',
+    hour: '2-digit',
+    minute: '2-digit',
+    hourCycle: 'h23',
+  }).formatToParts(date)
+  const value = (type: string) => parts.find((part) => part.type === type)?.value
+  const hour = Number(value('hour') ?? 0)
+  const minute = Number(value('minute') ?? 0)
+  return {
+    weekday: value('weekday') ?? '',
+    minutes: hour * 60 + minute,
+  }
+}
+
+export function isFreebuffDeploymentHours(now: Date = new Date()): boolean {
+  const eastern = getZonedParts(now, 'America/New_York')
+  const pacific = getZonedParts(now, 'America/Los_Angeles')
+  if (eastern.weekday === 'Sat' || eastern.weekday === 'Sun') return false
+  return eastern.minutes >= 9 * 60 && pacific.minutes < 17 * 60
+}
+
+export function isFreebuffModelAvailable(
+  id: string,
+  now: Date = new Date(),
+): boolean {
+  const model = FREEBUFF_MODELS.find((m) => m.id === id)
+  if (!model) return false
+  return model.availability === 'always' || isFreebuffDeploymentHours(now)
+}
+
+export function resolveAvailableFreebuffModel(
+  id: string | null | undefined,
+  now: Date = new Date(),
+): FreebuffModelId {
+  const resolved = resolveFreebuffModel(id)
+  return isFreebuffModelAvailable(resolved, now)
+    ? resolved
+    : FALLBACK_FREEBUFF_MODEL_ID
+}
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index b28a77c31..3608f3631 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -423,8 +423,6 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
-  | 'moonshotai/kimi-k2.5'
-  | 'moonshotai/kimi-k2.5:nitro'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
index e42d9f0be..7789c91f2 100644
--- a/common/src/types/freebuff-session.ts
+++ b/common/src/types/freebuff-session.ts
@@ -5,6 +5,22 @@
  *
  * The CLI uses these shapes directly; there are no client-only states.
  */
+
+/**
+ * Per-model usage counter surfaced to the CLI so the waiting-room UI can
+ * render "N of M sessions used" alongside queue/active state. Present when
+ * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
+ * per 20-hour window). `recentCount` is the number of admissions inside
+ * `windowHours` at the time the response was produced — see also the
+ * standalone `rate_limited` status for the reject path.
+ */
+export interface FreebuffSessionRateLimit {
+  model: string
+  limit: number
+  windowHours: number
+  recentCount: number
+}
+
 export type FreebuffSessionServerResponse =
   | {
       /** Waiting room is globally off; free-mode requests flow through
@@ -38,6 +54,10 @@ export type FreebuffSessionServerResponse =
       queueDepthByModel: Record<string, number>
       estimatedWaitMs: number
       queuedAt: string
+      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+       *  for unlimited models or when the status was produced outside the
+       *  rate-limit check path (e.g. pure read via GET). */
+      rateLimit?: FreebuffSessionRateLimit
     }
   | {
       status: 'active'
@@ -47,6 +67,10 @@ export type FreebuffSessionServerResponse =
       admittedAt: string
       expiresAt: string
       remainingMs: number
+      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+       *  for unlimited models or when the status was produced outside the
+       *  rate-limit check path (e.g. pure read via GET). */
+      rateLimit?: FreebuffSessionRateLimit
     }
   | {
       /** Session is over. While `instanceId` is present we're inside the
@@ -92,6 +116,12 @@ export type FreebuffSessionServerResponse =
       currentModel: string
       requestedModel: string
     }
+  | {
+      /** Requested model is valid but not selectable right now. */
+      status: 'model_unavailable'
+      requestedModel: string
+      availableHours: string
+    }
   | {
       /** Account is banned. Returned from every endpoint so banned bots can't
        *  join the queue at all (otherwise they inflate `queueDepth` until the
@@ -99,3 +129,24 @@ export type FreebuffSessionServerResponse =
        *  stops polling and shows a banned message. */
       status: 'banned'
     }
+  | {
+      /** User has used up their per-model admission quota in the rolling
+       *  window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST
+       *  /session before the user is placed in the queue. `retryAfterMs` is
+       *  the time until the oldest admission inside the window falls off
+       *  and one quota slot opens up — clients should show the user when
+       *  they can try again. Terminal for the CLI's current poll session;
+       *  the user can exit and come back later. */
+      status: 'rate_limited'
+      /** The freebuff model the user tried to join. */
+      model: string
+      /** Max admissions permitted per window (e.g. 5). */
+      limit: number
+      /** Rolling window size in hours (e.g. 20). */
+      windowHours: number
+      /** Admission count inside the window at check time — will be ≥ limit. */
+      recentCount: number
+      /** Milliseconds from now until the oldest admission in the window
+       *  exits and the user regains one quota slot. */
+      retryAfterMs: number
+    }
diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md
index b1384d7b6..353bfb046 100644
--- a/docs/freebuff-waiting-room.md
+++ b/docs/freebuff-waiting-room.md
@@ -5,7 +5,7 @@
 The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
 
 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
-2. **Gate on per-deployment health** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` admit that tick; a degraded minimax-m2.7 no longer stalls glm-5.1 admissions.
+2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
 
 Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
@@ -149,8 +149,8 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r
 | Constant | Location | Default | Purpose |
 |---|---|---|---|
 | `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. |
-| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `glm-5.1`, `minimax-m2.7` | Selectable models; each gets its own queue and admission slot. |
-| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | glm-5.1 only | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
+| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `glm-5.1` | Selectable models; each gets its own queue and admission slot. |
+| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
 | `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. |
 | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime |
 | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
@@ -180,12 +180,12 @@ Response shapes:
 {
   "status": "queued",
   "instanceId": "e47…",
-  "model": "z-ai/glm-5.1",
+  "model": "minimax/minimax-m2.7",
   "position": 17,          // 1-indexed within this model's queue
   "queueDepth": 43,        // size of this model's queue
   "queueDepthByModel": {   // snapshot of every model's queue — powers the
-    "z-ai/glm-5.1": 43,    //  "N ahead" hint in the selector. Missing
-    "minimax/minimax-m2.7": 4  //  entries should be treated as 0.
+    "minimax/minimax-m2.7": 43, //  "N ahead" hint in the selector. Missing
+    "z-ai/glm-5.1": 4   //  entries should be treated as 0.
   },
   "estimatedWaitMs": 384000,
   "queuedAt": "2026-04-17T12:00:00Z"
@@ -195,7 +195,7 @@ Response shapes:
 {
   "status": "active",
   "instanceId": "e47…",
-  "model": "z-ai/glm-5.1",
+  "model": "minimax/minimax-m2.7",
   "admittedAt": "2026-04-17T12:00:00Z",
   "expiresAt":  "2026-04-17T13:00:00Z",
   "remainingMs": 3600000
@@ -219,7 +219,7 @@ Response shapes:
 // to actually switch.
 {
   "status": "model_locked",
-  "currentModel": "z-ai/glm-5.1",
+  "currentModel": "minimax/minimax-m2.7",
   "requestedModel": "minimax/minimax-m2.7"
 }
 ```
@@ -285,7 +285,7 @@ waitMs = (position - 1) * 24_000
 - Position 1 → 0 (next tick admits you)
 - Position 2 → 24s, and so on.
 
-`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses (during a per-deployment Fireworks incident only the affected model's queue stalls; healthy models keep draining), so the real wait can be longer or shorter.
+`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a GLM Fireworks incident or outside 9am ET-5pm PT, only GLM's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
 
 ## CLI Integration (frontend-side contract)
 
@@ -324,7 +324,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
 | Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. |
 | Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. |
 | Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. |
-| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded minimax-m2.7 doesn't block glm-5.1 admissions. |
+| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. |
 | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy |
 
 ## Testing
diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json
index 6426fac98..a597e0852 100644
--- a/freebuff/cli/release/package.json
+++ b/freebuff/cli/release/package.json
@@ -1,6 +1,6 @@
 {
   "name": "freebuff",
-  "version": "0.0.46",
+  "version": "0.0.48",
   "description": "The world's strongest free coding agent",
   "license": "MIT",
   "bin": {
diff --git a/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql b/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql
new file mode 100644
index 000000000..53a24ec98
--- /dev/null
+++ b/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql
@@ -0,0 +1,9 @@
+CREATE TABLE "free_session_admit" (
+	"id" text PRIMARY KEY NOT NULL,
+	"user_id" text NOT NULL,
+	"model" text NOT NULL,
+	"admitted_at" timestamp with time zone DEFAULT now() NOT NULL
+);
+--> statement-breakpoint
+ALTER TABLE "free_session_admit" ADD CONSTRAINT "free_session_admit_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+CREATE INDEX "idx_free_session_admit_user_model_time" ON "free_session_admit" USING btree ("user_id","model","admitted_at");
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/0046_snapshot.json b/packages/internal/src/db/migrations/meta/0046_snapshot.json
new file mode 100644
index 000000000..48747dd94
--- /dev/null
+++ b/packages/internal/src/db/migrations/meta/0046_snapshot.json
@@ -0,0 +1,3307 @@
+{
+  "id": "3bf6a16c-2fd6-4c9d-a395-f4ca2c080a3c",
+  "prevId": "76196ef1-2384-4edd-b832-c9ff8085d809",
+  "version": "7",
+  "dialect": "postgresql",
+  "tables": {
+    "public.account": {
+      "name": "account",
+      "schema": "",
+      "columns": {
+        "userId": {
+          "name": "userId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider": {
+          "name": "provider",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "providerAccountId": {
+          "name": "providerAccountId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "refresh_token": {
+          "name": "refresh_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "access_token": {
+          "name": "access_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "token_type": {
+          "name": "token_type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "scope": {
+          "name": "scope",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "id_token": {
+          "name": "id_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "session_state": {
+          "name": "session_state",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "account_userId_user_id_fk": {
+          "name": "account_userId_user_id_fk",
+          "tableFrom": "account",
+          "tableTo": "user",
+          "columnsFrom": [
+            "userId"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "account_provider_providerAccountId_pk": {
+          "name": "account_provider_providerAccountId_pk",
+          "columns": [
+            "provider",
+            "providerAccountId"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.ad_impression": {
+      "name": "ad_impression",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider": {
+          "name": "provider",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'gravity'"
+        },
+        "ad_text": {
+          "name": "ad_text",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cta": {
+          "name": "cta",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "''"
+        },
+        "url": {
+          "name": "url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "favicon": {
+          "name": "favicon",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "click_url": {
+          "name": "click_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "imp_url": {
+          "name": "imp_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "extra_pixels": {
+          "name": "extra_pixels",
+          "type": "text[]",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "payout": {
+          "name": "payout",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "credits_granted": {
+          "name": "credits_granted",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "grant_operation_id": {
+          "name": "grant_operation_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "served_at": {
+          "name": "served_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "impression_fired_at": {
+          "name": "impression_fired_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "clicked_at": {
+          "name": "clicked_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_ad_impression_user": {
+          "name": "idx_ad_impression_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "served_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_ad_impression_imp_url": {
+          "name": "idx_ad_impression_imp_url",
+          "columns": [
+            {
+              "expression": "imp_url",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "ad_impression_user_id_user_id_fk": {
+          "name": "ad_impression_user_id_user_id_fk",
+          "tableFrom": "ad_impression",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "ad_impression_imp_url_unique": {
+          "name": "ad_impression_imp_url_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "imp_url"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_config": {
+      "name": "agent_config",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "version": {
+          "name": "version",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "publisher_id": {
+          "name": "publisher_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "major": {
+          "name": "major",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "minor": {
+          "name": "minor",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "patch": {
+          "name": "patch",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "data": {
+          "name": "data",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_agent_config_publisher": {
+          "name": "idx_agent_config_publisher",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_config_publisher_id_publisher_id_fk": {
+          "name": "agent_config_publisher_id_publisher_id_fk",
+          "tableFrom": "agent_config",
+          "tableTo": "publisher",
+          "columnsFrom": [
+            "publisher_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "agent_config_publisher_id_id_version_pk": {
+          "name": "agent_config_publisher_id_id_version_pk",
+          "columns": [
+            "publisher_id",
+            "id",
+            "version"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_run": {
+      "name": "agent_run",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "publisher_id": {
+          "name": "publisher_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(agent_id, '/', 1)\n             ELSE NULL\n           END",
+            "type": "stored"
+          }
+        },
+        "agent_name": {
+          "name": "agent_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n             ELSE agent_id\n           END",
+            "type": "stored"
+          }
+        },
+        "agent_version": {
+          "name": "agent_version",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(agent_id, '@', 2)\n             ELSE NULL\n           END",
+            "type": "stored"
+          }
+        },
+        "ancestor_run_ids": {
+          "name": "ancestor_run_ids",
+          "type": "text[]",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "root_run_id": {
+          "name": "root_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END",
+            "type": "stored"
+          }
+        },
+        "parent_run_id": {
+          "name": "parent_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END",
+            "type": "stored"
+          }
+        },
+        "depth": {
+          "name": "depth",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)",
+            "type": "stored"
+          }
+        },
+        "duration_ms": {
+          "name": "duration_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+            "type": "stored"
+          }
+        },
+        "total_steps": {
+          "name": "total_steps",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "direct_credits": {
+          "name": "direct_credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0'"
+        },
+        "total_credits": {
+          "name": "total_credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0'"
+        },
+        "status": {
+          "name": "status",
+          "type": "agent_run_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'running'"
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_agent_run_user_id": {
+          "name": "idx_agent_run_user_id",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_parent": {
+          "name": "idx_agent_run_parent",
+          "columns": [
+            {
+              "expression": "parent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_root": {
+          "name": "idx_agent_run_root",
+          "columns": [
+            {
+              "expression": "root_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_agent_id": {
+          "name": "idx_agent_run_agent_id",
+          "columns": [
+            {
+              "expression": "agent_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_publisher": {
+          "name": "idx_agent_run_publisher",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_status": {
+          "name": "idx_agent_run_status",
+          "columns": [
+            {
+              "expression": "status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'running'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_ancestors_gin": {
+          "name": "idx_agent_run_ancestors_gin",
+          "columns": [
+            {
+              "expression": "ancestor_run_ids",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        },
+        "idx_agent_run_completed_publisher_agent": {
+          "name": "idx_agent_run_completed_publisher_agent",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_recent": {
+          "name": "idx_agent_run_completed_recent",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_version": {
+          "name": "idx_agent_run_completed_version",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_version",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_user": {
+          "name": "idx_agent_run_completed_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_run_user_id_user_id_fk": {
+          "name": "agent_run_user_id_user_id_fk",
+          "tableFrom": "agent_run",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_step": {
+      "name": "agent_step",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "agent_run_id": {
+          "name": "agent_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "step_number": {
+          "name": "step_number",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "duration_ms": {
+          "name": "duration_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+            "type": "stored"
+          }
+        },
+        "credits": {
+          "name": "credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'0'"
+        },
+        "child_run_ids": {
+          "name": "child_run_ids",
+          "type": "text[]",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "spawned_count": {
+          "name": "spawned_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "array_length(child_run_ids, 1)",
+            "type": "stored"
+          }
+        },
+        "message_id": {
+          "name": "message_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "status": {
+          "name": "status",
+          "type": "agent_step_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'completed'"
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "unique_step_number_per_run": {
+          "name": "unique_step_number_per_run",
+          "columns": [
+            {
+              "expression": "agent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "step_number",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_step_run_id": {
+          "name": "idx_agent_step_run_id",
+          "columns": [
+            {
+              "expression": "agent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_step_children_gin": {
+          "name": "idx_agent_step_children_gin",
+          "columns": [
+            {
+              "expression": "child_run_ids",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_step_agent_run_id_agent_run_id_fk": {
+          "name": "agent_step_agent_run_id_agent_run_id_fk",
+          "tableFrom": "agent_step",
+          "tableTo": "agent_run",
+          "columnsFrom": [
+            "agent_run_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.credit_ledger": {
+      "name": "credit_ledger",
+      "schema": "",
+      "columns": {
+        "operation_id": {
+          "name": "operation_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "principal": {
+          "name": "principal",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "balance": {
+          "name": "balance",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "grant_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "priority": {
+          "name": "priority",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_credit_ledger_active_balance": {
+          "name": "idx_credit_ledger_active_balance",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "balance",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "priority",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_credit_ledger_org": {
+          "name": "idx_credit_ledger_org",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_credit_ledger_subscription": {
+          "name": "idx_credit_ledger_subscription",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "credit_ledger_user_id_user_id_fk": {
+          "name": "credit_ledger_user_id_user_id_fk",
+          "tableFrom": "credit_ledger",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "credit_ledger_org_id_org_id_fk": {
+          "name": "credit_ledger_org_id_org_id_fk",
+          "tableFrom": "credit_ledger",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.encrypted_api_keys": {
+      "name": "encrypted_api_keys",
+      "schema": "",
+      "columns": {
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "api_key_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "api_key": {
+          "name": "api_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "encrypted_api_keys_user_id_user_id_fk": {
+          "name": "encrypted_api_keys_user_id_user_id_fk",
+          "tableFrom": "encrypted_api_keys",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "encrypted_api_keys_user_id_type_pk": {
+          "name": "encrypted_api_keys_user_id_type_pk",
+          "columns": [
+            "user_id",
+            "type"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.fingerprint": {
+      "name": "fingerprint",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "sig_hash": {
+          "name": "sig_hash",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.free_session": {
+      "name": "free_session",
+      "schema": "",
+      "columns": {
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "free_session_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "active_instance_id": {
+          "name": "active_instance_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "queued_at": {
+          "name": "queued_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "admitted_at": {
+          "name": "admitted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_free_session_queue": {
+          "name": "idx_free_session_queue",
+          "columns": [
+            {
+              "expression": "status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "model",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "queued_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_free_session_expiry": {
+          "name": "idx_free_session_expiry",
+          "columns": [
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "free_session_user_id_user_id_fk": {
+          "name": "free_session_user_id_user_id_fk",
+          "tableFrom": "free_session",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.free_session_admit": {
+      "name": "free_session_admit",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "admitted_at": {
+          "name": "admitted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_free_session_admit_user_model_time": {
+          "name": "idx_free_session_admit_user_model_time",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "model",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "admitted_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "free_session_admit_user_id_user_id_fk": {
+          "name": "free_session_admit_user_id_user_id_fk",
+          "tableFrom": "free_session_admit",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.git_eval_results": {
+      "name": "git_eval_results",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "cost_mode": {
+          "name": "cost_mode",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "reasoner_model": {
+          "name": "reasoner_model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "agent_model": {
+          "name": "agent_model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "metadata": {
+          "name": "metadata",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cost": {
+          "name": "cost",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "is_public": {
+          "name": "is_public",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.limit_override": {
+      "name": "limit_override",
+      "schema": "",
+      "columns": {
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "credits_per_block": {
+          "name": "credits_per_block",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "block_duration_hours": {
+          "name": "block_duration_hours",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "weekly_credit_limit": {
+          "name": "weekly_credit_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "limit_override_user_id_user_id_fk": {
+          "name": "limit_override_user_id_user_id_fk",
+          "tableFrom": "limit_override",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.message": {
+      "name": "message",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "finished_at": {
+          "name": "finished_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "client_id": {
+          "name": "client_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "client_request_id": {
+          "name": "client_request_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "request": {
+          "name": "request",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_message": {
+          "name": "last_message",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "\"message\".\"request\" -> -1",
+            "type": "stored"
+          }
+        },
+        "reasoning_text": {
+          "name": "reasoning_text",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "response": {
+          "name": "response",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "input_tokens": {
+          "name": "input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "cache_creation_input_tokens": {
+          "name": "cache_creation_input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_read_input_tokens": {
+          "name": "cache_read_input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "reasoning_tokens": {
+          "name": "reasoning_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "output_tokens": {
+          "name": "output_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cost": {
+          "name": "cost",
+          "type": "numeric(100, 20)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "credits": {
+          "name": "credits",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "byok": {
+          "name": "byok",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "latency_ms": {
+          "name": "latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "ttft_ms": {
+          "name": "ttft_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "repo_url": {
+          "name": "repo_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "message_user_id_idx": {
+          "name": "message_user_id_idx",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_finished_at_user_id_idx": {
+          "name": "message_finished_at_user_id_idx",
+          "columns": [
+            {
+              "expression": "finished_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_org_id_idx": {
+          "name": "message_org_id_idx",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_org_id_finished_at_idx": {
+          "name": "message_org_id_finished_at_idx",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "finished_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "message_user_id_user_id_fk": {
+          "name": "message_user_id_user_id_fk",
+          "tableFrom": "message",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "message_org_id_org_id_fk": {
+          "name": "message_org_id_org_id_fk",
+          "tableFrom": "message",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org": {
+      "name": "org",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "slug": {
+          "name": "slug",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "owner_id": {
+          "name": "owner_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "current_period_start": {
+          "name": "current_period_start",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "current_period_end": {
+          "name": "current_period_end",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_enabled": {
+          "name": "auto_topup_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "auto_topup_threshold": {
+          "name": "auto_topup_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "auto_topup_amount": {
+          "name": "auto_topup_amount",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "credit_limit": {
+          "name": "credit_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "billing_alerts": {
+          "name": "billing_alerts",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "usage_alerts": {
+          "name": "usage_alerts",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "weekly_reports": {
+          "name": "weekly_reports",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "org_owner_id_user_id_fk": {
+          "name": "org_owner_id_user_id_fk",
+          "tableFrom": "org",
+          "tableTo": "user",
+          "columnsFrom": [
+            "owner_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "org_slug_unique": {
+          "name": "org_slug_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "slug"
+          ]
+        },
+        "org_stripe_customer_id_unique": {
+          "name": "org_stripe_customer_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "stripe_customer_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_feature": {
+      "name": "org_feature",
+      "schema": "",
+      "columns": {
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "feature": {
+          "name": "feature",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "config": {
+          "name": "config",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "is_active": {
+          "name": "is_active",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_org_feature_active": {
+          "name": "idx_org_feature_active",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "is_active",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_feature_org_id_org_id_fk": {
+          "name": "org_feature_org_id_org_id_fk",
+          "tableFrom": "org_feature",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "org_feature_org_id_feature_pk": {
+          "name": "org_feature_org_id_feature_pk",
+          "columns": [
+            "org_id",
+            "feature"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_invite": {
+      "name": "org_invite",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "role": {
+          "name": "role",
+          "type": "org_role",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "token": {
+          "name": "token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "invited_by": {
+          "name": "invited_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "accepted_at": {
+          "name": "accepted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "accepted_by": {
+          "name": "accepted_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_org_invite_token": {
+          "name": "idx_org_invite_token",
+          "columns": [
+            {
+              "expression": "token",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_invite_email": {
+          "name": "idx_org_invite_email",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "email",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_invite_expires": {
+          "name": "idx_org_invite_expires",
+          "columns": [
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_invite_org_id_org_id_fk": {
+          "name": "org_invite_org_id_org_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_invite_invited_by_user_id_fk": {
+          "name": "org_invite_invited_by_user_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "user",
+          "columnsFrom": [
+            "invited_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "org_invite_accepted_by_user_id_fk": {
+          "name": "org_invite_accepted_by_user_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "user",
+          "columnsFrom": [
+            "accepted_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "org_invite_token_unique": {
+          "name": "org_invite_token_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "token"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_member": {
+      "name": "org_member",
+      "schema": "",
+      "columns": {
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "role": {
+          "name": "role",
+          "type": "org_role",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "joined_at": {
+          "name": "joined_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "org_member_org_id_org_id_fk": {
+          "name": "org_member_org_id_org_id_fk",
+          "tableFrom": "org_member",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_member_user_id_user_id_fk": {
+          "name": "org_member_user_id_user_id_fk",
+          "tableFrom": "org_member",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "org_member_org_id_user_id_pk": {
+          "name": "org_member_org_id_user_id_pk",
+          "columns": [
+            "org_id",
+            "user_id"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_repo": {
+      "name": "org_repo",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_url": {
+          "name": "repo_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_name": {
+          "name": "repo_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_owner": {
+          "name": "repo_owner",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "approved_by": {
+          "name": "approved_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "approved_at": {
+          "name": "approved_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "is_active": {
+          "name": "is_active",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        }
+      },
+      "indexes": {
+        "idx_org_repo_active": {
+          "name": "idx_org_repo_active",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "is_active",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_repo_unique": {
+          "name": "idx_org_repo_unique",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "repo_url",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_repo_org_id_org_id_fk": {
+          "name": "org_repo_org_id_org_id_fk",
+          "tableFrom": "org_repo",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_repo_approved_by_user_id_fk": {
+          "name": "org_repo_approved_by_user_id_fk",
+          "tableFrom": "org_repo",
+          "tableTo": "user",
+          "columnsFrom": [
+            "approved_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.publisher": {
+      "name": "publisher",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "verified": {
+          "name": "verified",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "bio": {
+          "name": "bio",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "avatar_url": {
+          "name": "avatar_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_by": {
+          "name": "created_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "publisher_user_id_user_id_fk": {
+          "name": "publisher_user_id_user_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "publisher_org_id_org_id_fk": {
+          "name": "publisher_org_id_org_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "publisher_created_by_user_id_fk": {
+          "name": "publisher_created_by_user_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "user",
+          "columnsFrom": [
+            "created_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {
+        "publisher_single_owner": {
+          "name": "publisher_single_owner",
+          "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n    (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)"
+        }
+      },
+      "isRLSEnabled": false
+    },
+    "public.referral": {
+      "name": "referral",
+      "schema": "",
+      "columns": {
+        "referrer_id": {
+          "name": "referrer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "referred_id": {
+          "name": "referred_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "referral_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'pending'"
+        },
+        "credits": {
+          "name": "credits",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "is_legacy": {
+          "name": "is_legacy",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "referral_referrer_id_user_id_fk": {
+          "name": "referral_referrer_id_user_id_fk",
+          "tableFrom": "referral",
+          "tableTo": "user",
+          "columnsFrom": [
+            "referrer_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "referral_referred_id_user_id_fk": {
+          "name": "referral_referred_id_user_id_fk",
+          "tableFrom": "referral",
+          "tableTo": "user",
+          "columnsFrom": [
+            "referred_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "referral_referrer_id_referred_id_pk": {
+          "name": "referral_referrer_id_referred_id_pk",
+          "columns": [
+            "referrer_id",
+            "referred_id"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.session": {
+      "name": "session",
+      "schema": "",
+      "columns": {
+        "sessionToken": {
+          "name": "sessionToken",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "userId": {
+          "name": "userId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires": {
+          "name": "expires",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "fingerprint_id": {
+          "name": "fingerprint_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "type": {
+          "name": "type",
+          "type": "session_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'web'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "session_userId_user_id_fk": {
+          "name": "session_userId_user_id_fk",
+          "tableFrom": "session",
+          "tableTo": "user",
+          "columnsFrom": [
+            "userId"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "session_fingerprint_id_fingerprint_id_fk": {
+          "name": "session_fingerprint_id_fingerprint_id_fk",
+          "tableFrom": "session",
+          "tableTo": "fingerprint",
+          "columnsFrom": [
+            "fingerprint_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.subscription": {
+      "name": "subscription",
+      "schema": "",
+      "columns": {
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_price_id": {
+          "name": "stripe_price_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "tier": {
+          "name": "tier",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "scheduled_tier": {
+          "name": "scheduled_tier",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "status": {
+          "name": "status",
+          "type": "subscription_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'active'"
+        },
+        "billing_period_start": {
+          "name": "billing_period_start",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "billing_period_end": {
+          "name": "billing_period_end",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cancel_at_period_end": {
+          "name": "cancel_at_period_end",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "canceled_at": {
+          "name": "canceled_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_subscription_customer": {
+          "name": "idx_subscription_customer",
+          "columns": [
+            {
+              "expression": "stripe_customer_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_subscription_user": {
+          "name": "idx_subscription_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_subscription_status": {
+          "name": "idx_subscription_status",
+          "columns": [
+            {
+              "expression": "status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"subscription\".\"status\" = 'active'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "subscription_user_id_user_id_fk": {
+          "name": "subscription_user_id_user_id_fk",
+          "tableFrom": "subscription",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.sync_failure": {
+      "name": "sync_failure",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "provider": {
+          "name": "provider",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "last_attempt_at": {
+          "name": "last_attempt_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "retry_count": {
+          "name": "retry_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 1
+        },
+        "last_error": {
+          "name": "last_error",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {
+        "idx_sync_failure_retry": {
+          "name": "idx_sync_failure_retry",
+          "columns": [
+            {
+              "expression": "retry_count",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "last_attempt_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"sync_failure\".\"retry_count\" < 5",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.user": {
+      "name": "user",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "password": {
+          "name": "password",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "emailVerified": {
+          "name": "emailVerified",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "image": {
+          "name": "image",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "next_quota_reset": {
+          "name": "next_quota_reset",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now() + INTERVAL '1 month'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "referral_code": {
+          "name": "referral_code",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'ref-' || gen_random_uuid()"
+        },
+        "referral_limit": {
+          "name": "referral_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 5
+        },
+        "discord_id": {
+          "name": "discord_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "handle": {
+          "name": "handle",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_enabled": {
+          "name": "auto_topup_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "auto_topup_threshold": {
+          "name": "auto_topup_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_amount": {
+          "name": "auto_topup_amount",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "banned": {
+          "name": "banned",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "fallback_to_a_la_carte": {
+          "name": "fallback_to_a_la_carte",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "user_email_unique": {
+          "name": "user_email_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "email"
+          ]
+        },
+        "user_stripe_customer_id_unique": {
+          "name": "user_stripe_customer_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "stripe_customer_id"
+          ]
+        },
+        "user_referral_code_unique": {
+          "name": "user_referral_code_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "referral_code"
+          ]
+        },
+        "user_discord_id_unique": {
+          "name": "user_discord_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "discord_id"
+          ]
+        },
+        "user_handle_unique": {
+          "name": "user_handle_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "handle"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.verificationToken": {
+      "name": "verificationToken",
+      "schema": "",
+      "columns": {
+        "identifier": {
+          "name": "identifier",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "token": {
+          "name": "token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires": {
+          "name": "expires",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "verificationToken_identifier_token_pk": {
+          "name": "verificationToken_identifier_token_pk",
+          "columns": [
+            "identifier",
+            "token"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    }
+  },
+  "enums": {
+    "public.referral_status": {
+      "name": "referral_status",
+      "schema": "public",
+      "values": [
+        "pending",
+        "completed"
+      ]
+    },
+    "public.agent_run_status": {
+      "name": "agent_run_status",
+      "schema": "public",
+      "values": [
+        "running",
+        "completed",
+        "failed",
+        "cancelled"
+      ]
+    },
+    "public.agent_step_status": {
+      "name": "agent_step_status",
+      "schema": "public",
+      "values": [
+        "running",
+        "completed",
+        "skipped"
+      ]
+    },
+    "public.api_key_type": {
+      "name": "api_key_type",
+      "schema": "public",
+      "values": [
+        "anthropic",
+        "gemini",
+        "openai"
+      ]
+    },
+    "public.free_session_status": {
+      "name": "free_session_status",
+      "schema": "public",
+      "values": [
+        "queued",
+        "active"
+      ]
+    },
+    "public.grant_type": {
+      "name": "grant_type",
+      "schema": "public",
+      "values": [
+        "free",
+        "referral",
+        "referral_legacy",
+        "subscription",
+        "purchase",
+        "admin",
+        "organization",
+        "ad"
+      ]
+    },
+    "public.org_role": {
+      "name": "org_role",
+      "schema": "public",
+      "values": [
+        "owner",
+        "admin",
+        "member"
+      ]
+    },
+    "public.session_type": {
+      "name": "session_type",
+      "schema": "public",
+      "values": [
+        "web",
+        "pat",
+        "cli"
+      ]
+    },
+    "public.subscription_status": {
+      "name": "subscription_status",
+      "schema": "public",
+      "values": [
+        "incomplete",
+        "incomplete_expired",
+        "trialing",
+        "active",
+        "past_due",
+        "canceled",
+        "unpaid",
+        "paused"
+      ]
+    }
+  },
+  "schemas": {},
+  "sequences": {},
+  "roles": {},
+  "policies": {},
+  "views": {},
+  "_meta": {
+    "columns": {},
+    "schemas": {},
+    "tables": {}
+  }
+}
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json
index f67ef37dc..78747c831 100644
--- a/packages/internal/src/db/migrations/meta/_journal.json
+++ b/packages/internal/src/db/migrations/meta/_journal.json
@@ -323,6 +323,13 @@
       "when": 1776813242936,
       "tag": "0045_mean_sleeper",
       "breakpoints": true
+    },
+    {
+      "idx": 46,
+      "version": "7",
+      "when": 1776898844362,
+      "tag": "0046_cloudy_firedrake",
+      "breakpoints": true
     }
   ]
 }
\ No newline at end of file
diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts
index b6f170d29..2ead1fc6d 100644
--- a/packages/internal/src/db/schema.ts
+++ b/packages/internal/src/db/schema.ts
@@ -870,3 +870,37 @@ export const freeSession = pgTable(
     index('idx_free_session_expiry').on(table.expires_at),
   ],
 )
+
+/**
+ * Audit log of every admission — one row per queued→active transition. Used
+ * to rate-limit heavy users (e.g. no more than 5 GLM sessions per 20h).
+ *
+ * Separate from `free_session` because that table is one-row-per-user (state,
+ * not history); the UPSERT path there would otherwise destroy prior admissions.
+ */
+export const freeSessionAdmit = pgTable(
+  'free_session_admit',
+  {
+    id: text('id')
+      .primaryKey()
+      .$defaultFn(() => crypto.randomUUID()),
+    user_id: text('user_id')
+      .notNull()
+      .references(() => user.id, { onDelete: 'cascade' }),
+    model: text('model').notNull(),
+    admitted_at: timestamp('admitted_at', {
+      mode: 'date',
+      withTimezone: true,
+    })
+      .notNull()
+      .defaultNow(),
+  },
+  (table) => [
+    // Rate-limit lookup: WHERE user_id=$1 AND model=$2 AND admitted_at > $cutoff
+    index('idx_free_session_admit_user_model_time').on(
+      table.user_id,
+      table.model,
+      table.admitted_at,
+    ),
+  ],
+)
diff --git a/scripts/test-fireworks-cache-intervals.ts b/scripts/test-fireworks-cache-intervals.ts
index 0ed71193f..8d4e86740 100644
--- a/scripts/test-fireworks-cache-intervals.ts
+++ b/scripts/test-fireworks-cache-intervals.ts
@@ -13,7 +13,6 @@
  *
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
- *   kimi-k2.5           — moonshotai/kimi-k2.5
  *   minimax             — minimax/minimax-m2.5
  *
  * Flags:
@@ -39,7 +38,7 @@ const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
 type ModelConfig = {
   id: string
   standardModel: string
-  deploymentModel: string
+  deploymentModel?: string
   inputCostPerToken: number
   cachedInputCostPerToken: number
   outputCostPerToken: number
@@ -54,14 +53,6 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.4 / 1_000_000,
   },
-  'kimi-k2.5': {
-    id: 'moonshotai/kimi-k2.5',
-    standardModel: 'accounts/fireworks/models/kimi-k2p5',
-    deploymentModel: 'accounts/james-65d217/deployments/mx8l5rq2',
-    inputCostPerToken: 0.6 / 1_000_000,
-    cachedInputCostPerToken: 0.1 / 1_000_000,
-    outputCostPerToken: 3.0 / 1_000_000,
-  },
   minimax: {
     id: 'minimax/minimax-m2.5',
     standardModel: 'accounts/fireworks/models/minimax-m2p5',
@@ -117,8 +108,12 @@ function parseArgs(): {
 const { modelKey, useDeployment: USE_DEPLOYMENT, intervals: INTERVALS_SEC } =
   parseArgs()
 const MODEL = MODEL_CONFIGS[modelKey]
+if (USE_DEPLOYMENT && !MODEL.deploymentModel) {
+  console.error(`❌ No custom deployment configured for ${MODEL.id}`)
+  process.exit(1)
+}
 const FIREWORKS_MODEL = USE_DEPLOYMENT
-  ? MODEL.deploymentModel
+  ? MODEL.deploymentModel!
   : MODEL.standardModel
 const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
 const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
index 67028228d..a1e4950f8 100644
--- a/scripts/test-fireworks-long.ts
+++ b/scripts/test-fireworks-long.ts
@@ -12,12 +12,17 @@
  * Models:
  *   glm-5.1   (default) — z-ai/glm-5.1
  *   minimax             — minimax/minimax-m2.5
+ *   minimax-m2.7        — minimax/minimax-m2.7
  *
  * Flags:
  *   --deployment   Use custom deployment instead of serverless (standard API)
  *                  Serverless is the default
+ * Examples:
+ *   bun scripts/test-fireworks-long.ts glm-5.1 --deployment
  */
 
+import { FIREWORKS_DEPLOYMENT_MAP } from '../web/src/llm-api/fireworks-config'
+
 export { }
 
 const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
@@ -25,7 +30,7 @@ const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
 type ModelConfig = {
   id: string              // OpenRouter-style ID (for display)
   standardModel: string  // Fireworks standard API model ID
-  deploymentModel: string // Fireworks custom deployment model ID
+  deploymentModel?: string // Fireworks custom deployment model ID
   inputCostPerToken: number
   cachedInputCostPerToken: number
   outputCostPerToken: number
@@ -35,19 +40,11 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
   'glm-5.1': {
     id: 'z-ai/glm-5.1',
     standardModel: 'accounts/fireworks/models/glm-5p1',
-    deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
+    deploymentModel: FIREWORKS_DEPLOYMENT_MAP['z-ai/glm-5.1'],
     inputCostPerToken: 1.40 / 1_000_000,
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.40 / 1_000_000,
   },
-  'kimi-k2.5': {
-    id: 'moonshotai/kimi-k2.5',
-    standardModel: 'accounts/fireworks/models/kimi-k2p5',
-    deploymentModel: 'accounts/james-65d217/deployments/mx8l5rq2',
-    inputCostPerToken: 0.60 / 1_000_000,
-    cachedInputCostPerToken: 0.10 / 1_000_000,
-    outputCostPerToken: 3.00 / 1_000_000,
-  },
   minimax: {
     id: 'minimax/minimax-m2.5',
     standardModel: 'accounts/fireworks/models/minimax-m2p5',
@@ -67,9 +64,16 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
 }
 
 const DEFAULT_MODEL = 'glm-5.1'
+const MODEL_ALIASES: Record<string, keyof typeof MODEL_CONFIGS> = {
+  glm: 'glm-5.1',
+  'z-ai/glm-5.1': 'glm-5.1',
+  'minimax/minimax-m2.5': 'minimax',
+  'minimax/minimax-m2.7': 'minimax-m2.7',
+}
 
 function getModelConfig(modelArg?: string): ModelConfig {
-  const key = modelArg ?? DEFAULT_MODEL
+  const rawKey = modelArg ?? DEFAULT_MODEL
+  const key = MODEL_ALIASES[rawKey] ?? rawKey
   const config = MODEL_CONFIGS[key]
   if (!config) {
     console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`)
@@ -83,7 +87,11 @@ const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !=
 const MODEL = getModelConfig(modelArg)
 
 // Default to serverless (standard API); use --deployment for custom deployment
-const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel
+if (USE_DEPLOYMENT && !MODEL.deploymentModel) {
+  console.error(`❌ No custom deployment configured for ${MODEL.id}`)
+  process.exit(1)
+}
+const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel! : MODEL.standardModel
 const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
 const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
 const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken
@@ -455,4 +463,4 @@ async function main() {
   console.log('Done!')
 }
 
-main()
\ No newline at end of file
+main()
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index 51a3eb46b..1aac8800c 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,6 +1,7 @@
 import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
 import { NextRequest } from 'next/server'
 
+import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models'
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
 
 import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
@@ -528,7 +529,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-new-free' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'minimax/minimax-m2.7',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -555,6 +556,76 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(response.status).toBe(200)
     })
 
+    it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => {
+      const fetchedBodies: Record<string, unknown>[] = []
+      const fetchViaFireworks = mock(
+        async (_url: string | URL | Request, init?: RequestInit) => {
+          fetchedBodies.push(JSON.parse(init?.body as string))
+          return new Response(
+            JSON.stringify({
+              id: 'test-id',
+              model: 'accounts/james-65d217/deployments/mjb4i7ea',
+              choices: [{ message: { content: 'test response' } }],
+              usage: {
+                prompt_tokens: 10,
+                completion_tokens: 20,
+                total_tokens: 30,
+              },
+            }),
+            {
+              status: 200,
+              headers: { 'Content-Type': 'application/json' },
+            },
+          )
+        },
+      ) as unknown as typeof globalThis.fetch
+
+      const req = new NextRequest(
+        'http://localhost:3000/api/v1/chat/completions',
+        {
+          method: 'POST',
+          headers: { Authorization: 'Bearer test-api-key-new-free' },
+          body: JSON.stringify({
+            model: 'z-ai/glm-5.1',
+            stream: false,
+            codebuff_metadata: {
+              run_id: 'run-free',
+              client_id: 'test-client-id-123',
+              cost_mode: 'free',
+            },
+          }),
+        },
+      )
+
+      const response = await postChatCompletions({
+        req,
+        getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+        logger: mockLogger,
+        trackEvent: mockTrackEvent,
+        getUserUsageData: mockGetUserUsageData,
+        getAgentRunFromId: mockGetAgentRunFromId,
+        fetch: fetchViaFireworks,
+        insertMessageBigquery: mockInsertMessageBigquery,
+        loggerWithContext: mockLoggerWithContext,
+        checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+      })
+
+      const body = await response.json()
+      if (isFreebuffDeploymentHours()) {
+        expect(response.status).toBe(200)
+        expect(fetchedBodies).toHaveLength(1)
+        expect(fetchedBodies[0].model).toBe(
+          'accounts/james-65d217/deployments/mjb4i7ea',
+        )
+        expect(body.model).toBe('z-ai/glm-5.1')
+        expect(body.provider).toBe('Fireworks')
+      } else {
+        expect(response.status).toBe(503)
+        expect(fetchedBodies).toHaveLength(0)
+        expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+      }
+    })
+
     it('skips credit check when in FREE mode even with 0 credits', async () => {
       const req = new NextRequest(
         'http://localhost:3000/api/v1/chat/completions',
@@ -562,7 +633,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-no-credits' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'minimax/minimax-m2.7',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -671,7 +742,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-new-free' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'minimax/minimax-m2.7',
             stream: true,
             codebuff_metadata: {
               run_id: 'run-123',
@@ -853,7 +924,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: { Authorization: 'Bearer test-api-key-123' },
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'minimax/minimax-m2.7',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index 657c17f6d..e4675e488 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -3,6 +3,7 @@ import { describe, expect, test } from 'bun:test'
 import {
   deleteFreebuffSession,
   FREEBUFF_INSTANCE_HEADER,
+  FREEBUFF_MODEL_HEADER,
   getFreebuffSession,
   postFreebuffSession,
 } from '../_handlers'
@@ -12,16 +13,17 @@ import type { SessionDeps } from '@/server/free-session/public-api'
 import type { InternalSessionRow } from '@/server/free-session/types'
 import type { NextRequest } from 'next/server'
 
-const DEFAULT_MODEL = 'z-ai/glm-5.1'
+const DEFAULT_MODEL = 'minimax/minimax-m2.7'
 
 function makeReq(
   apiKey: string | null,
-  opts: { instanceId?: string; cfCountry?: string } = {},
+  opts: { instanceId?: string; cfCountry?: string; model?: string } = {},
 ): NextRequest {
   const headers = new Headers()
   if (apiKey) headers.set('Authorization', `Bearer ${apiKey}`)
   if (opts.instanceId) headers.set(FREEBUFF_INSTANCE_HEADER, opts.instanceId)
   if (opts.cfCountry) headers.set('cf-ipcountry', opts.cfCountry)
+  if (opts.model) headers.set(FREEBUFF_MODEL_HEADER, opts.model)
   return {
     headers,
   } as unknown as NextRequest
@@ -44,6 +46,9 @@ function makeSessionDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
     getInstantAdmitCapacity: () => 0,
     activeCountForModel: async () => 0,
     promoteQueuedUser: async () => null,
+    // No admits in handler tests — the rate-limit check reads empty and
+    // every request falls through to the queue.
+    listRecentAdmits: async () => [],
     now: () => now,
     getSessionRow: async (userId) => rows.get(userId) ?? null,
     queueDepthsByModel: async () => {
@@ -153,6 +158,19 @@ describe('POST /api/v1/freebuff/session', () => {
     expect(body.status).toBe('queued')
   })
 
+  test('returns model_unavailable for GLM outside deployment hours', async () => {
+    const sessionDeps = makeSessionDeps()
+    const resp = await postFreebuffSession(
+      makeReq('ok', { model: 'z-ai/glm-5.1' }),
+      makeDeps(sessionDeps, 'u1'),
+    )
+    expect(resp.status).toBe(409)
+    const body = await resp.json()
+    expect(body.status).toBe('model_unavailable')
+    expect(body.availableHours).toBe('9am ET-5pm PT')
+    expect(sessionDeps.rows.size).toBe(0)
+  })
+
   // Banned bots with valid API keys were POSTing every few seconds and
   // inflating queueDepth between the 15s admission-tick sweeps. Rejecting at
   // the HTTP layer with 403 (terminal, like country_blocked) keeps them out
diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts
index ec17568a3..9a2d61899 100644
--- a/web/src/app/api/v1/freebuff/session/_handlers.ts
+++ b/web/src/app/api/v1/freebuff/session/_handlers.ts
@@ -138,12 +138,21 @@ export async function postFreebuffSession(
       model: requestedModel,
       deps: deps.sessionDeps,
     })
-    // model_locked is a 409 so it's distinguishable from a normal queued/active
-    // response on the client. banned is a 403 (terminal, mirrors country_blocked)
-    // so older CLIs that don't know the status fall into their `!resp.ok` error
-    // path and back off instead of tight-polling on the unrecognized 200 body.
+    // model_locked / model_unavailable are 409 so they're distinguishable
+    // from normal queued/active responses on the client. banned is a 403
+    // (terminal, mirrors country_blocked) so older CLIs that don't know the
+    // status fall into their `!resp.ok` error path and back off instead of
+    // tight-polling on the unrecognized 200 body. rate_limited uses 429 for
+    // the same reason as banned — older CLIs back off, newer CLIs parse the
+    // structured body.
     const status =
-      state.status === 'model_locked' ? 409 : state.status === 'banned' ? 403 : 200
+      state.status === 'model_locked' || state.status === 'model_unavailable'
+        ? 409
+        : state.status === 'banned'
+          ? 403
+          : state.status === 'rate_limited'
+            ? 429
+            : 200
     return NextResponse.json(state, { status })
   } catch (error) {
     return serverError(deps, 'POST', auth.userId, error)
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index 9ed91fd0a..be17a6e2e 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -3,7 +3,7 @@ import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test'
 import {
   createFireworksRequestWithFallback,
   DEPLOYMENT_COOLDOWN_MS,
-  FireworksError,
+  isDeploymentHours,
   isDeploymentCoolingDown,
   markDeploymentScalingUp,
   resetDeploymentCooldown,
@@ -13,6 +13,11 @@ import type { Logger } from '@codebuff/common/types/contracts/logger'
 
 const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1'
 const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea'
+const IN_DEPLOYMENT_HOURS = new Date('2026-04-17T16:00:00Z') // Friday, 12pm ET / 9am PT
+const BEFORE_DEPLOYMENT_HOURS = new Date('2026-04-17T12:59:00Z') // Friday, 8:59am ET
+const AFTER_DEPLOYMENT_HOURS = new Date('2026-04-18T00:00:00Z') // Friday, 5pm PT
+const WEEKDAY_AFTER_DEPLOYMENT_HOURS = new Date('2026-04-21T00:01:00Z') // Monday, 5:01pm PT
+const WEEKEND_DEPLOYMENT_HOURS = new Date('2026-04-18T16:00:00Z') // Saturday
 
 function createMockLogger(): Logger {
   return {
@@ -23,18 +28,20 @@ function createMockLogger(): Logger {
   }
 }
 
-// Helper: create a Date at a specific ET hour using a known EDT date (June 2025, UTC-4)
-function dateAtEtHour(hour: number): Date {
-  // June 15, 2025 is EDT (UTC-4), so ET hour H = UTC hour H+4
-  const utcHour = hour + 4
-  if (utcHour < 24) {
-    return new Date(`2025-06-15T${String(utcHour).padStart(2, '0')}:30:00Z`)
-  }
-  // Wraps to next day
-  return new Date(`2025-06-16T${String(utcHour - 24).padStart(2, '0')}:30:00Z`)
-}
-
 describe('Fireworks deployment routing', () => {
+  describe('deployment hours', () => {
+    it('is active from 9am ET until before 5pm PT on weekdays', () => {
+      expect(isDeploymentHours(BEFORE_DEPLOYMENT_HOURS)).toBe(false)
+      expect(isDeploymentHours(IN_DEPLOYMENT_HOURS)).toBe(true)
+      expect(isDeploymentHours(AFTER_DEPLOYMENT_HOURS)).toBe(false)
+      expect(isDeploymentHours(WEEKDAY_AFTER_DEPLOYMENT_HOURS)).toBe(false)
+    })
+
+    it('is inactive on weekends', () => {
+      expect(isDeploymentHours(WEEKEND_DEPLOYMENT_HOURS)).toBe(false)
+    })
+  })
+
   describe('deployment cooldown', () => {
     beforeEach(() => {
       resetDeploymentCooldown()
@@ -81,27 +88,9 @@ describe('Fireworks deployment routing', () => {
       model: 'z-ai/glm-5.1',
       messages: [{ role: 'user' as const, content: 'test' }],
     }
-
-    function spyDeploymentHours(inHours: boolean) {
-      // Control isDeploymentHours by mocking Date.prototype.toLocaleString
-      // When called with the ET timezone options, return an hour inside or outside the window
-      const original = Date.prototype.toLocaleString
-      const spy = {
-        restore: () => {
-          Date.prototype.toLocaleString = original
-        },
-      }
-      Date.prototype.toLocaleString = function (
-        this: Date,
-        ...args: Parameters<Date['toLocaleString']>
-      ) {
-        const options = args[1] as Intl.DateTimeFormatOptions | undefined
-        if (options?.timeZone === 'America/New_York' && options?.hour === 'numeric') {
-          return inHours ? '14' : '3'
-        }
-        return original.apply(this, args)
-      }
-      return spy
+    const liteBody = {
+      ...minimalBody,
+      codebuff_metadata: { cost_mode: 'lite' },
     }
 
     it('uses standard API when custom deployment is disabled', async () => {
@@ -128,7 +117,6 @@ describe('Fireworks deployment routing', () => {
     })
 
     it('tries custom deployment during deployment hours', async () => {
-      const spy = spyDeploymentHours(true)
       const fetchCalls: string[] = []
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -137,160 +125,115 @@ describe('Fireworks deployment routing', () => {
         return new Response(JSON.stringify({ ok: true }), { status: 200 })
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(1)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toHaveLength(1)
+      expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
     })
 
-    it('falls back to standard API on 503 DEPLOYMENT_SCALING_UP', async () => {
-      const spy = spyDeploymentHours(true)
+    it('returns deployment 503 on DEPLOYMENT_SCALING_UP without serverless fallback', async () => {
       const fetchCalls: string[] = []
-      let callCount = 0
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
         const body = JSON.parse(init?.body as string)
         fetchCalls.push(body.model)
-        callCount++
-
-        if (callCount === 1) {
-          return new Response(
-            JSON.stringify({
-              error: {
-                message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
-                code: 'DEPLOYMENT_SCALING_UP',
-                type: 'error',
-              },
-            }),
-            { status: 503, statusText: 'Service Unavailable' },
-          )
-        }
-
-        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+        return new Response(
+          JSON.stringify({
+            error: {
+              message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
+              code: 'DEPLOYMENT_SCALING_UP',
+              type: 'error',
+            },
+          }),
+          { status: 503, statusText: 'Service Unavailable' },
+        )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(2)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-        expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
-        // Verify cooldown was activated
-        expect(isDeploymentCoolingDown()).toBe(true)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+      expect(isDeploymentCoolingDown()).toBe(true)
     })
 
-    it('falls back to standard API on non-scaling 503 from deployment', async () => {
-      const spy = spyDeploymentHours(true)
+    it('returns non-scaling deployment 503 without serverless fallback', async () => {
       const fetchCalls: string[] = []
-      let callCount = 0
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
         const body = JSON.parse(init?.body as string)
         fetchCalls.push(body.model)
-        callCount++
-
-        if (callCount === 1) {
-          return new Response(
-            JSON.stringify({
-              error: {
-                message: 'Service temporarily unavailable',
-                code: 'SERVICE_UNAVAILABLE',
-                type: 'error',
-              },
-            }),
-            { status: 503, statusText: 'Service Unavailable' },
-          )
-        }
-
-        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+        return new Response(
+          JSON.stringify({
+            error: {
+              message: 'Service temporarily unavailable',
+              code: 'SERVICE_UNAVAILABLE',
+              type: 'error',
+            },
+          }),
+          { status: 503, statusText: 'Service Unavailable' },
+        )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(2)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-        expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
-        // Non-scaling 503 should NOT activate the cooldown
-        expect(isDeploymentCoolingDown()).toBe(false)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+      expect(isDeploymentCoolingDown()).toBe(false)
     })
 
-    it('falls back to standard API on 500 Internal Error from deployment', async () => {
-      const spy = spyDeploymentHours(true)
+    it('returns 500 Internal Error from deployment without serverless fallback', async () => {
       const fetchCalls: string[] = []
-      let callCount = 0
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
         const body = JSON.parse(init?.body as string)
         fetchCalls.push(body.model)
-        callCount++
-
-        if (callCount === 1) {
-          return new Response(
-            JSON.stringify({ error: 'Internal error' }),
-            { status: 500, statusText: 'Internal Server Error' },
-          )
-        }
-
-        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+        return new Response(
+          JSON.stringify({ error: 'Internal error' }),
+          { status: 500, statusText: 'Internal Server Error' },
+        )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(2)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-        expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
-        expect(isDeploymentCoolingDown()).toBe(false)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(500)
+      expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+      expect(isDeploymentCoolingDown()).toBe(false)
     })
 
-    it('skips deployment during cooldown and goes straight to standard API', async () => {
-      const spy = spyDeploymentHours(true)
+    it('returns cooldown error without serverless fallback', async () => {
       markDeploymentScalingUp()
 
       const fetchCalls: string[] = []
@@ -300,26 +243,21 @@ describe('Fireworks deployment routing', () => {
         return new Response(JSON.stringify({ ok: true }), { status: 200 })
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(1)
-        expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      expect(fetchCalls).toHaveLength(0)
     })
 
     it('uses standard API for models without a custom deployment', async () => {
-      const spy = spyDeploymentHours(true)
       const fetchCalls: string[] = []
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -328,27 +266,66 @@ describe('Fireworks deployment routing', () => {
         return new Response(JSON.stringify({ ok: true }), { status: 200 })
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: { ...minimalBody, model: 'some-other/model' } as never,
-          originalModel: 'some-other/model',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(response.status).toBe(200)
-        expect(fetchCalls).toHaveLength(1)
-        // Model without mapping falls through to the original model
-        expect(fetchCalls[0]).toBe('some-other/model')
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: { ...minimalBody, model: 'some-other/model' } as never,
+        originalModel: 'some-other/model',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: BEFORE_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toHaveLength(1)
+      // Model without mapping falls through to the original model
+      expect(fetchCalls[0]).toBe('some-other/model')
+    })
+
+    it('returns an availability error for deployment models outside hours', async () => {
+      const mockFetch = mock(async () => {
+        throw new Error('should not fetch outside deployment hours')
+      }) as unknown as typeof globalThis.fetch
+
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: BEFORE_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      const body = await response.json()
+      expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+    })
+
+    it('falls back to the standard Fireworks API in lite mode outside deployment hours', async () => {
+      const fetchCalls: string[] = []
+
+      const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+        const body = JSON.parse(init?.body as string)
+        fetchCalls.push(body.model)
+        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+      }) as unknown as typeof globalThis.fetch
+
+      const response = await createFireworksRequestWithFallback({
+        body: liteBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: BEFORE_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
     })
 
     it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => {
-      const spy = spyDeploymentHours(true)
       const fetchCalls: string[] = []
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -360,23 +337,20 @@ describe('Fireworks deployment routing', () => {
         )
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        const response = await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        // Non-5xx errors from deployment are returned as-is (caller handles them)
-        expect(response.status).toBe(429)
-        expect(fetchCalls).toHaveLength(1)
-        expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      // Non-5xx errors from deployment are returned as-is (caller handles them)
+      expect(response.status).toBe(429)
+      expect(fetchCalls).toHaveLength(1)
+      expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
     })
 
     it('transforms reasoning to reasoning_effort (defaults to medium)', async () => {
@@ -535,17 +509,44 @@ describe('Fireworks deployment routing', () => {
       expect(fetchedBodies[0].reasoning_effort).toBe('low')
     })
 
-    it('logs when trying deployment and when falling back on 5xx', async () => {
-      const spy = spyDeploymentHours(true)
-      let callCount = 0
-
+    it('logs when trying deployment and when deployment returns 5xx', async () => {
       const mockFetch = mock(async () => {
-        callCount++
-        if (callCount === 1) {
+        return new Response(
+          JSON.stringify({
+            error: {
+              message: 'Scaling up',
+              code: 'DEPLOYMENT_SCALING_UP',
+              type: 'error',
+            },
+          }),
+          { status: 503, statusText: 'Service Unavailable' },
+        )
+      }) as unknown as typeof globalThis.fetch
+
+      await createFireworksRequestWithFallback({
+        body: minimalBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(logger.info).toHaveBeenCalledTimes(2)
+    })
+
+    it('falls back to the standard Fireworks API in lite mode after deployment scaling 503', async () => {
+      const fetchCalls: string[] = []
+
+      const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+        const body = JSON.parse(init?.body as string)
+        fetchCalls.push(body.model)
+        if (fetchCalls.length === 1) {
           return new Response(
             JSON.stringify({
               error: {
-                message: 'Scaling up',
+                message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
                 code: 'DEPLOYMENT_SCALING_UP',
                 type: 'error',
               },
@@ -556,20 +557,70 @@ describe('Fireworks deployment routing', () => {
         return new Response(JSON.stringify({ ok: true }), { status: 200 })
       }) as unknown as typeof globalThis.fetch
 
-      try {
-        await createFireworksRequestWithFallback({
-          body: minimalBody as never,
-          originalModel: 'z-ai/glm-5.1',
-          fetch: mockFetch,
-          logger,
-          useCustomDeployment: true,
-          sessionId: 'test-user-id',
-        })
-
-        expect(logger.info).toHaveBeenCalledTimes(2)
-      } finally {
-        spy.restore()
-      }
+      const response = await createFireworksRequestWithFallback({
+        body: liteBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID, STANDARD_MODEL_ID])
+      expect(isDeploymentCoolingDown()).toBe(true)
+    })
+
+    it('falls back to the standard Fireworks API in lite mode during deployment cooldown', async () => {
+      markDeploymentScalingUp()
+
+      const fetchCalls: string[] = []
+      const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+        const body = JSON.parse(init?.body as string)
+        fetchCalls.push(body.model)
+        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+      }) as unknown as typeof globalThis.fetch
+
+      const response = await createFireworksRequestWithFallback({
+        body: liteBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
+    })
+
+    it('falls back to the standard Fireworks API in lite mode when the deployment request throws', async () => {
+      const fetchCalls: string[] = []
+
+      const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+        const body = JSON.parse(init?.body as string)
+        fetchCalls.push(body.model)
+        if (fetchCalls.length === 1) {
+          throw new Error('socket hang up')
+        }
+        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+      }) as unknown as typeof globalThis.fetch
+
+      const response = await createFireworksRequestWithFallback({
+        body: liteBody as never,
+        originalModel: 'z-ai/glm-5.1',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID, STANDARD_MODEL_ID])
+      expect(logger.warn).toHaveBeenCalledTimes(1)
     })
   })
 })
diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts
index fb6d59580..566728250 100644
--- a/web/src/llm-api/fireworks-config.ts
+++ b/web/src/llm-api/fireworks-config.ts
@@ -10,7 +10,6 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217'
 
 export const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
   // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
-  // 'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/mx8l5rq2',
-  // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd',
   'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
+  // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd',
 }
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 6e304638d..a2f4f80a8 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -1,5 +1,9 @@
 import { Agent } from 'undici'
 
+import {
+  FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+  isFreebuffDeploymentHours,
+} from '@codebuff/common/constants/freebuff-models'
 import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
 import { getErrorObject } from '@codebuff/common/util/error'
 import { env } from '@codebuff/internal/env'
@@ -32,15 +36,14 @@ const FIREWORKS_MODEL_MAP: Record<string, string> = {
   'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
   'minimax/minimax-m2.7': 'accounts/fireworks/models/minimax-m2p7',
   'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
-  'moonshotai/kimi-k2.5': 'accounts/fireworks/models/kimi-k2p5',
 }
 
 /** Flag to enable custom Fireworks deployments (set to false to use global API only) */
 const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true
 
-/** Check if current time is within deployment hours (always enabled) */
-export function isDeploymentHours(_now: Date = new Date()): boolean {
-  return true
+/** Check if current time is within deployment hours: Mon-Fri, 9am ET to 5pm PT. */
+export function isDeploymentHours(now: Date = new Date()): boolean {
+  return isFreebuffDeploymentHours(now)
 }
 
 /**
@@ -93,7 +96,7 @@ function createFireworksRequest(params: {
 
   // Transform OpenRouter-style `reasoning` object into Fireworks' `reasoning_effort`.
   // Unlike OpenAI, Fireworks supports reasoning_effort together with function tools
-  // (e.g. GLM-4.5/5.1 and Kimi K2 are designed for interleaved reasoning + tool use).
+  // (e.g. GLM-4.5/5.1 are designed for interleaved reasoning + tool use).
   if (fireworksBody.reasoning && typeof fireworksBody.reasoning === 'object') {
     const reasoning = fireworksBody.reasoning as {
       enabled?: boolean
@@ -165,15 +168,10 @@ const FIREWORKS_PRICING_MAP: Record<string, FireworksPricing> = {
     cachedInputCostPerToken: 0.26 / 1_000_000,
     outputCostPerToken: 4.40 / 1_000_000,
   },
-  'moonshotai/kimi-k2.5': {
-    inputCostPerToken: 0.60 / 1_000_000,
-    cachedInputCostPerToken: 0.10 / 1_000_000,
-    outputCostPerToken: 3.00 / 1_000_000,
-  },
 }
 
 function getFireworksPricing(model: string): FireworksPricing {
-  return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1']
+  return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_PRICING_MAP['z-ai/glm-5.1']
 }
 
 function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
@@ -708,9 +706,10 @@ async function parseFireworksError(response: Response): Promise<FireworksError>
 }
 
 /**
- * Tries the custom Fireworks deployment during business hours (10am–8pm ET),
- * falling back to the standard API if the deployment returns 503 DEPLOYMENT_SCALING_UP.
- * Outside deployment hours or during cooldown, goes straight to the standard API.
+ * Uses custom Fireworks deployments only during deployment hours. Deployment
+ * mapped models never fall back to the serverless API outside hours, during
+ * cooldown, or after deployment 5xxs; those states surface as provider errors
+ * so freebuff can offer MiniMax as the always-on option.
  */
 export async function createFireworksRequestWithFallback(params: {
   body: ChatCompletionRequestBody
@@ -719,45 +718,109 @@ export async function createFireworksRequestWithFallback(params: {
   logger: Logger
   useCustomDeployment?: boolean
   sessionId: string
+  now?: Date
 }): Promise<Response> {
   const { body, originalModel, fetch, logger, sessionId } = params
+  const now = params.now ?? new Date()
   const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT
   const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel]
-  const shouldTryDeployment =
-    useCustomDeployment &&
-    deploymentModelId &&
-    isDeploymentHours() &&
-    !isDeploymentCoolingDown()
+  const hasDeployment = useCustomDeployment && Boolean(deploymentModelId)
+  const shouldFallbackToStandardApi = body.codebuff_metadata?.cost_mode === 'lite'
+
+  const createStandardApiRequest = () =>
+    createFireworksRequest({ body, originalModel, fetch, sessionId })
+
+  if (hasDeployment && !isDeploymentHours(now)) {
+    if (shouldFallbackToStandardApi) {
+      logger.info(
+        { model: originalModel },
+        'Falling back to Fireworks standard API outside deployment hours',
+      )
+      return createStandardApiRequest()
+    }
+    return new Response(
+      JSON.stringify({
+        error: {
+          message: `${originalModel} is only available during ${FREEBUFF_DEPLOYMENT_HOURS_LABEL}. Use minimax/minimax-m2.7 outside those hours.`,
+          code: 'DEPLOYMENT_OUTSIDE_HOURS',
+          type: 'availability_error',
+        },
+      }),
+      { status: 503, statusText: 'Service Unavailable' },
+    )
+  }
+
+  if (hasDeployment && isDeploymentCoolingDown()) {
+    if (shouldFallbackToStandardApi) {
+      logger.info(
+        { model: originalModel },
+        'Falling back to Fireworks standard API during deployment cooldown',
+      )
+      return createStandardApiRequest()
+    }
+    return new Response(
+      JSON.stringify({
+        error: {
+          message: `${originalModel} deployment is temporarily unavailable. Use minimax/minimax-m2.7 while it recovers.`,
+          code: 'DEPLOYMENT_COOLDOWN',
+          type: 'availability_error',
+        },
+      }),
+      { status: 503, statusText: 'Service Unavailable' },
+    )
+  }
 
-  if (shouldTryDeployment) {
+  if (hasDeployment && deploymentModelId) {
     logger.info(
       { model: originalModel, deploymentModel: deploymentModelId },
       'Trying Fireworks custom deployment',
     )
-    const response = await createFireworksRequest({
-      body,
-      originalModel,
-      fetch,
-      modelIdOverride: deploymentModelId,
-      sessionId,
-    })
+    let response: Response
+    try {
+      response = await createFireworksRequest({
+        body,
+        originalModel,
+        fetch,
+        modelIdOverride: deploymentModelId,
+        sessionId,
+      })
+    } catch (error) {
+      if (shouldFallbackToStandardApi) {
+        logger.warn(
+          { model: originalModel, error: getErrorObject(error) },
+          'Fireworks custom deployment request failed, falling back to standard API',
+        )
+        return createStandardApiRequest()
+      }
+      throw error
+    }
 
     if (response.status >= 500) {
       const errorText = await response.text()
       logger.info(
         { model: originalModel, status: response.status, errorText: errorText.slice(0, 200) },
-        'Fireworks custom deployment returned 5xx, falling back to standard API',
+        'Fireworks custom deployment returned 5xx',
       )
       if (errorText.includes('DEPLOYMENT_SCALING_UP')) {
         markDeploymentScalingUp()
       }
-      // Fall through to standard API request below
-    } else {
-      return response
+      if (shouldFallbackToStandardApi) {
+        logger.info(
+          { model: originalModel, status: response.status },
+          'Falling back to Fireworks standard API after deployment 5xx',
+        )
+        return createStandardApiRequest()
+      }
+      return new Response(errorText, {
+        status: response.status,
+        statusText: response.statusText,
+        headers: response.headers,
+      })
     }
+    return response
   }
 
-  return createFireworksRequest({ body, originalModel, fetch, sessionId })
+  return createStandardApiRequest()
 }
 
 function creditsToFakeCost(credits: number): number {
diff --git a/web/src/server/free-session/__tests__/config.test.ts b/web/src/server/free-session/__tests__/config.test.ts
new file mode 100644
index 000000000..93f5fdcf0
--- /dev/null
+++ b/web/src/server/free-session/__tests__/config.test.ts
@@ -0,0 +1,13 @@
+import { describe, expect, test } from 'bun:test'
+
+import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+
+import { getInstantAdmitCapacity } from '../config'
+
+describe('free session config', () => {
+  test('every selectable freebuff model has instant-admit capacity', () => {
+    for (const model of FREEBUFF_MODELS) {
+      expect(getInstantAdmitCapacity(model.id)).toBeGreaterThan(0)
+    }
+  })
+})
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index a824f6d22..8b08d63df 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -13,14 +13,22 @@ import type { InternalSessionRow } from '../types'
 
 const SESSION_LEN = 60 * 60 * 1000
 const GRACE_MS = 30 * 60 * 1000
-const DEFAULT_MODEL = 'z-ai/glm-5.1'
+const DEFAULT_MODEL = 'minimax/minimax-m2.7'
+
+interface AdmitRecord {
+  user_id: string
+  model: string
+  admitted_at: Date
+}
 
 function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
   rows: Map<string, InternalSessionRow>
+  admits: AdmitRecord[]
   _tick: (n: Date) => void
   _now: () => Date
 } {
   const rows = new Map<string, InternalSessionRow>()
+  const admits: AdmitRecord[] = []
   let currentNow = new Date('2026-04-17T12:00:00Z')
   let instanceCounter = 0
 
@@ -28,10 +36,12 @@ function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
 
   const deps: SessionDeps & {
     rows: Map<string, InternalSessionRow>
+    admits: AdmitRecord[]
     _tick: (n: Date) => void
     _now: () => Date
   } = {
     rows,
+    admits,
     _tick: (n: Date) => {
       currentNow = n
     },
@@ -50,6 +60,18 @@ function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
       }
       return n
     },
+    listRecentAdmits: async ({ userId, model, since, limit }) => {
+      return admits
+        .filter(
+          (a) =>
+            a.user_id === userId &&
+            a.model === model &&
+            a.admitted_at.getTime() >= since.getTime(),
+        )
+        .sort((a, b) => a.admitted_at.getTime() - b.admitted_at.getTime())
+        .slice(0, limit)
+        .map((a) => a.admitted_at)
+    },
     promoteQueuedUser: async ({ userId, model, sessionLengthMs, now }) => {
       const row = rows.get(userId)
       if (!row || row.status !== 'queued' || row.model !== model) return null
@@ -57,6 +79,7 @@ function makeDeps(overrides: Partial<SessionDeps> = {}): SessionDeps & {
       row.admitted_at = now
       row.expires_at = new Date(now.getTime() + sessionLengthMs)
       row.updated_at = now
+      admits.push({ user_id: userId, model, admitted_at: now })
       return row
     },
     now: () => currentNow,
@@ -177,19 +200,34 @@ describe('requestSession', () => {
     expect(state.instanceId).toBe('inst-1')
   })
 
+  test('deployment-hours-only model is unavailable outside deployment hours', async () => {
+    const state = await requestSession({
+      userId: 'u1',
+      model: 'z-ai/glm-5.1',
+      deps,
+    })
+    expect(state).toEqual({
+      status: 'model_unavailable',
+      requestedModel: 'z-ai/glm-5.1',
+      availableHours: '9am ET-5pm PT',
+    })
+    expect(deps.rows.size).toBe(0)
+  })
+
   test('queued response includes a per-model depth snapshot for the selector', async () => {
-    // Seed 2 users in glm + 1 in minimax so the returned map captures both.
+    deps._tick(new Date('2026-04-17T16:00:00Z'))
+    // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both.
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
     await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
-    await requestSession({ userId: 'u3', model: 'minimax/minimax-m2.7', deps })
+    await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps })
 
     const state = await getSessionState({ userId: 'u1', deps })
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.queueDepthByModel).toEqual({
       [DEFAULT_MODEL]: 2,
-      'minimax/minimax-m2.7': 1,
+      'z-ai/glm-5.1': 1,
     })
   })
 
@@ -264,11 +302,12 @@ describe('requestSession', () => {
   })
 
   test('instant-admit: per-model capacities are independent', async () => {
-    // GLM saturated at 1 active, MiniMax still has room.
+    // MiniMax saturated at 1 active, GLM still has room.
     const admitDeps = makeDeps({
       getInstantAdmitCapacity: (model) =>
         model === DEFAULT_MODEL ? 1 : 10,
     })
+    admitDeps._tick(new Date('2026-04-17T16:00:00Z'))
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps: admitDeps })
     const s2 = await requestSession({
       userId: 'u2',
@@ -277,12 +316,264 @@ describe('requestSession', () => {
     })
     const s3 = await requestSession({
       userId: 'u3',
-      model: 'minimax/minimax-m2.7',
+      model: 'z-ai/glm-5.1',
       deps: admitDeps,
     })
     expect(s2.status).toBe('queued')
     expect(s3.status).toBe('active')
   })
+
+  // Per-user rate limit (5 GLM admissions per 20h) — the wire limit is
+  // hard-coded in public-api.ts, so tests seed the fake admit log directly
+  // rather than configuring it. GLM also has deployment-hours gating, so
+  // these tests bump `now` into the open window (12pm ET on a weekday)
+  // before issuing the request.
+  const GLM_MODEL = 'z-ai/glm-5.1'
+  const GLM_LIMIT = 5
+  const GLM_WINDOW_HOURS = 20
+  const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
+
+  test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => {
+    deps._tick(GLM_OPEN_TIME)
+    // Seed 5 admits inside the 20h window, spaced so we can verify retryAfter
+    // points at the oldest one sliding off.
+    const now = deps._now()
+    // Oldest: 19h ago (still in window). Next 4: 1h, 2h, 3h, 4h ago.
+    const ages = [19, 4, 3, 2, 1]
+    for (const hoursAgo of ages) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: GLM_MODEL,
+        admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
+      })
+    }
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('rate_limited')
+    if (state.status !== 'rate_limited') throw new Error('unreachable')
+    expect(state.model).toBe(GLM_MODEL)
+    expect(state.limit).toBe(GLM_LIMIT)
+    expect(state.windowHours).toBe(GLM_WINDOW_HOURS)
+    expect(state.recentCount).toBe(GLM_LIMIT)
+    // Oldest admit is 19h ago; slot opens when it hits 20h, i.e. in 1h.
+    expect(state.retryAfterMs).toBe(60 * 60 * 1000)
+    // Blocked before any row is written — the user doesn't take a queue slot.
+    expect(deps.rows.has('u1')).toBe(false)
+  })
+
+  test('rate_limited: admits outside the 20h window do not count', async () => {
+    deps._tick(GLM_OPEN_TIME)
+    // 5 admits, each just over 20h old → all fall off the window.
+    const now = deps._now()
+    for (let i = 0; i < 5; i++) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: GLM_MODEL,
+        admitted_at: new Date(
+          now.getTime() - (GLM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i),
+        ),
+      })
+    }
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('queued')
+    if (state.status !== 'queued') throw new Error('unreachable')
+    expect(state.rateLimit?.recentCount).toBe(0)
+  })
+
+  test('rate_limited: Minimax is unlimited even with many recent admits', async () => {
+    const now = deps._now()
+    for (let i = 0; i < 20; i++) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: DEFAULT_MODEL,
+        admitted_at: new Date(now.getTime() - i * 60_000),
+      })
+    }
+    const state = await requestSession({
+      userId: 'u1',
+      model: DEFAULT_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('queued')
+    if (state.status !== 'queued') throw new Error('unreachable')
+    // No rate-limit info for unrated models — the CLI skips the quota line.
+    expect(state.rateLimit).toBeUndefined()
+  })
+
+  test('queued GLM response carries the current admit count', async () => {
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    // 2 admits in the window — under the limit so the user still queues.
+    deps.admits.push({
+      user_id: 'u1',
+      model: GLM_MODEL,
+      admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
+    })
+    deps.admits.push({
+      user_id: 'u1',
+      model: GLM_MODEL,
+      admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
+    })
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    if (state.status !== 'queued') throw new Error('unreachable')
+    expect(state.rateLimit).toEqual({
+      model: GLM_MODEL,
+      limit: GLM_LIMIT,
+      windowHours: GLM_WINDOW_HOURS,
+      recentCount: 2,
+    })
+  })
+
+  test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => {
+    // Reclaim path: user has an active+unexpired GLM session and restarts
+    // the CLI. POST must rotate their instance id (takeover) and NOT reject
+    // with rate_limited — otherwise they'd be stranded with a live session
+    // they can't reconnect to. The 5th admission is already in the log, so
+    // this also exercises "at the cap" rather than "over the cap".
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    // Seed 5 prior admits (the cap), with the latest one matching the
+    // active row we're about to install.
+    const ages = [19, 4, 3, 2, 0]
+    for (const hoursAgo of ages) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: GLM_MODEL,
+        admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
+      })
+    }
+    // Install the active row directly (skipping the normal request path so
+    // we don't have to unwind the rate-limit gate to set up the fixture).
+    const admittedAt = new Date(now.getTime() - 30 * 60 * 1000)
+    deps.rows.set('u1', {
+      user_id: 'u1',
+      status: 'active',
+      active_instance_id: 'inst-pre',
+      model: GLM_MODEL,
+      queued_at: admittedAt,
+      admitted_at: admittedAt,
+      expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
+      created_at: admittedAt,
+      updated_at: admittedAt,
+    })
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('active')
+    if (state.status !== 'active') throw new Error('unreachable')
+    // Instance id rotated; quota snapshot still reflects the full window.
+    expect(state.instanceId).not.toBe('inst-pre')
+    expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+  })
+
+  test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => {
+    // Same reclaim exception for queued rows: if a user has already queued
+    // (say they slipped in just before their 5th admit landed), a subsequent
+    // POST from the same CLI must preserve their queue position instead of
+    // flipping to rate_limited.
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    for (let i = 0; i < GLM_LIMIT; i++) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: GLM_MODEL,
+        admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
+      })
+    }
+    const queuedAt = new Date(now.getTime() - 5 * 60 * 1000)
+    deps.rows.set('u1', {
+      user_id: 'u1',
+      status: 'queued',
+      active_instance_id: 'inst-pre',
+      model: GLM_MODEL,
+      queued_at: queuedAt,
+      admitted_at: null,
+      expires_at: null,
+      created_at: queuedAt,
+      updated_at: queuedAt,
+    })
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('queued')
+    if (state.status !== 'queued') throw new Error('unreachable')
+    // Same position (1) since we preserved queued_at and nobody else is
+    // ahead; the instance id rotated so any prior CLI is superseded.
+    expect(state.instanceId).not.toBe('inst-pre')
+    expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+  })
+
+  test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => {
+    // The stored row's expires_at is in the past, so it doesn't represent
+    // an in-flight session. This POST is effectively a fresh request and
+    // must be blocked by the quota.
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    const ages = [19, 4, 3, 2, 1]
+    for (const hoursAgo of ages) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: GLM_MODEL,
+        admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
+      })
+    }
+    const admittedAt = new Date(now.getTime() - 2 * SESSION_LEN)
+    deps.rows.set('u1', {
+      user_id: 'u1',
+      status: 'active',
+      active_instance_id: 'inst-pre',
+      model: GLM_MODEL,
+      queued_at: admittedAt,
+      admitted_at: admittedAt,
+      expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
+      created_at: admittedAt,
+      updated_at: admittedAt,
+    })
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps,
+    })
+    expect(state.status).toBe('rate_limited')
+  })
+
+  test('instant-admit bumps the quota count for the freshly-written admit row', async () => {
+    const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 })
+    admitDeps._tick(GLM_OPEN_TIME)
+    // 1 existing admit in the window; this new call should instant-admit and
+    // write a second row, so the response's recentCount reflects 2.
+    const now = admitDeps._now()
+    admitDeps.admits.push({
+      user_id: 'u1',
+      model: GLM_MODEL,
+      admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
+    })
+    const state = await requestSession({
+      userId: 'u1',
+      model: GLM_MODEL,
+      deps: admitDeps,
+    })
+    if (state.status !== 'active') throw new Error('unreachable')
+    expect(state.rateLimit?.recentCount).toBe(2)
+  })
 })
 
 describe('getSessionState', () => {
@@ -341,6 +632,39 @@ describe('getSessionState', () => {
     expect(state).toEqual({ status: 'superseded' })
   })
 
+  test('getSessionState surfaces rateLimit on queued/active polls', async () => {
+    // Regression: the POST response attached rateLimit, but GET polls did
+    // not — so the "Sessions N/M used" line flashed once then disappeared on
+    // the next 5s poll. GET must attach the same quota snapshot. Rate
+    // limits only apply to GLM, so this test uses GLM explicitly (inside
+    // deployment hours) rather than the Minimax DEFAULT_MODEL.
+    deps._tick(new Date('2026-04-17T16:00:00Z'))
+    const now = deps._now()
+    deps.admits.push({
+      user_id: 'u1',
+      model: 'z-ai/glm-5.1',
+      admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
+    })
+    await requestSession({ userId: 'u1', model: 'z-ai/glm-5.1', deps })
+    const row = deps.rows.get('u1')!
+    row.status = 'active'
+    row.admitted_at = now
+    row.expires_at = new Date(now.getTime() + SESSION_LEN)
+
+    const state = await getSessionState({
+      userId: 'u1',
+      claimedInstanceId: row.active_instance_id,
+      deps,
+    })
+    if (state.status !== 'active') throw new Error('unreachable')
+    expect(state.rateLimit).toEqual({
+      model: 'z-ai/glm-5.1',
+      limit: 5,
+      windowHours: 20,
+      recentCount: 1,
+    })
+  })
+
   test('omitted claimedInstanceId on active session returns active (read-only)', async () => {
     // Polling without an id (e.g. very first GET before POST has resolved)
     // must not be classified as superseded — only an explicit mismatch is.
diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts
index 3f3c051d2..9f0b74c9f 100644
--- a/web/src/server/free-session/admission.ts
+++ b/web/src/server/free-session/admission.ts
@@ -1,4 +1,7 @@
-import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+import {
+  FREEBUFF_MODELS,
+  isFreebuffModelAvailable,
+} from '@codebuff/common/constants/freebuff-models'
 
 import {
   ADMISSION_TICK_MS,
@@ -111,7 +114,10 @@ export async function runAdmissionTick(
   // advisory locks and a single update each.
   const perModel = await Promise.all(
     models.map(async (model) => {
-      const health = fleet[model] ?? 'healthy'
+      const isRegisteredModel = FREEBUFF_MODELS.some((m) => m.id === model)
+      const health = !isRegisteredModel || isFreebuffModelAvailable(model, now)
+        ? fleet[model] ?? 'healthy'
+        : 'unhealthy'
       const { admitted, skipped } = await deps.admitFromQueue({
         model,
         sessionLengthMs: deps.sessionLengthMs,
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 450540443..02c5c05c9 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -1,4 +1,6 @@
 import {
+  FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+  isFreebuffModelAvailable,
   isFreebuffModelId as isSelectableFreebuffModel,
   resolveFreebuffModel,
 } from '@codebuff/common/constants/freebuff-models'
@@ -16,15 +18,65 @@ import {
   FreeSessionModelLockedError,
   getSessionRow,
   joinOrTakeOver,
+  listRecentAdmits,
   promoteQueuedUser,
   queueDepthsByModel,
   queuePositionFor,
 } from './store'
 import { toSessionStateResponse } from './session-view'
 
-import type { FreebuffSessionServerResponse } from '@codebuff/common/types/freebuff-session'
+import type {
+  FreebuffSessionRateLimit,
+  FreebuffSessionServerResponse,
+} from '@codebuff/common/types/freebuff-session'
 import type { InternalSessionRow, SessionStateResponse } from './types'
 
+/**
+ * Per-model admission rate limits. Keyed by freebuff model id; a model not
+ * in the map has no rate limit applied. Today only GLM 5.1 is limited
+ * (Minimax is cheap enough to leave unlimited).
+ *
+ * Hard-coded rather than env-driven: the values need to be observable in the
+ * code review, and the CLI already renders the numbers via `rateLimit` on
+ * queued/active responses — changing them is a deliberate, typed edit.
+ */
+const RATE_LIMITS: Record<string, { limit: number; windowHours: number }> = {
+  'z-ai/glm-5.1': { limit: 5, windowHours: 20 },
+}
+
+/** Fetch the caller's current quota snapshot for `model`, or undefined if the
+ *  model isn't rate-limited. Used by both POST (after admit) and GET polls so
+ *  the CLI's "N of M sessions used" line stays live instead of disappearing
+ *  after the first poll. Also returns the oldest admit in-window so callers
+ *  that need `retryAfterMs` don't have to re-query. */
+async function fetchRateLimitSnapshot(
+  userId: string,
+  model: string,
+  deps: SessionDeps,
+): Promise<
+  { info: FreebuffSessionRateLimit; oldest: Date | null } | undefined
+> {
+  const cfg = RATE_LIMITS[model]
+  if (!cfg) return undefined
+  const now = nowOf(deps)
+  const since = new Date(now.getTime() - cfg.windowHours * 60 * 60 * 1000)
+  const admits = await deps.listRecentAdmits({
+    userId,
+    model,
+    since,
+    limit: cfg.limit,
+  })
+  return {
+    info: {
+      model,
+      limit: cfg.limit,
+      windowHours: cfg.windowHours,
+      recentCount: admits.length,
+    },
+    oldest: admits[0] ?? null,
+  }
+}
+
 export interface SessionDeps {
   getSessionRow: (userId: string) => Promise<InternalSessionRow | null>
   joinOrTakeOver: (params: {
@@ -43,6 +95,15 @@ export interface SessionDeps {
    *  bound to a given model. Compared against the model's configured
    *  `instantAdmitCapacity` to decide whether a new joiner skips the queue. */
   activeCountForModel: (model: string) => Promise<number>
+  /** Rate-limit helper: oldest-first admission timestamps for (userId, model)
+   *  inside the window. The caller uses `rows.length` as the count (capped
+   *  at `limit`) and `rows[0]` as the oldest for `retryAfterMs`. */
+  listRecentAdmits: (params: {
+    userId: string
+    model: string
+    since: Date
+    limit: number
+  }) => Promise<Date[]>
   /** Instant-admit promotion: flips a specific queued row to active. Returns
    *  the updated row or null if the row wasn't in a queued state. */
   promoteQueuedUser: (params: {
@@ -71,6 +132,7 @@ const defaultDeps: SessionDeps = {
   queueDepthsByModel,
   queuePositionFor,
   activeCountForModel,
+  listRecentAdmits,
   promoteQueuedUser,
   getInstantAdmitCapacity,
   isWaitingRoomEnabled,
@@ -122,6 +184,21 @@ export type RequestSessionResult =
       currentModel: string
       requestedModel: string
     }
+  | {
+      /** User has hit the per-model admission quota in the rolling window.
+       *  See `FreebuffSessionServerResponse`'s `rate_limited` variant. */
+      status: 'rate_limited'
+      model: string
+      limit: number
+      windowHours: number
+      recentCount: number
+      retryAfterMs: number
+    }
+  | {
+      status: 'model_unavailable'
+      requestedModel: string
+      availableHours: string
+    }
 
 /**
  * Client calls this on CLI startup with the model they want to use.
@@ -152,6 +229,7 @@ export async function requestSession(params: {
 }): Promise<RequestSessionResult> {
   const deps = params.deps ?? defaultDeps
   const model = resolveFreebuffModel(params.model)
+  const now = nowOf(deps)
   if (params.userBanned) {
     return { status: 'banned' }
   }
@@ -161,13 +239,60 @@ export async function requestSession(params: {
   ) {
     return { status: 'disabled' }
   }
+  if (!isFreebuffModelAvailable(model, now)) {
+    return {
+      status: 'model_unavailable',
+      requestedModel: model,
+      availableHours: FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+    }
+  }
+
+  // Rate-limit check runs before joinOrTakeOver so heavy users never even
+  // create a queued row. Only models listed in RATE_LIMITS are gated; others
+  // (Minimax today) fall through unchanged.
+  //
+  // Takeover/reclaim exception: a user who already holds a queued or
+  // active+unexpired row on this same model is re-anchoring (CLI restart,
+  // same-account tab switch) rather than starting a new session. Admit
+  // counts are written at promotion time, so the quota only needs to gate
+  // fresh admissions — blocking a reclaim here would strand a user with an
+  // active 5th session unable to reconnect after a CLI restart.
+  const existing = await deps.getSessionRow(params.userId)
+  const isReclaim =
+    !!existing &&
+    existing.model === model &&
+    (existing.status === 'queued' ||
+      (existing.status === 'active' &&
+        !!existing.expires_at &&
+        existing.expires_at.getTime() > now.getTime()))
+
+  if (!isReclaim) {
+    const snapshot = await fetchRateLimitSnapshot(params.userId, model, deps)
+    if (snapshot && snapshot.info.recentCount >= snapshot.info.limit) {
+      // Oldest admit's window-anniversary is when one slot opens back up.
+      // Clamped at 0 so a clock skew can't surface a negative retry-after.
+      const windowMs = snapshot.info.windowHours * 60 * 60 * 1000
+      const retryAfterMs = Math.max(
+        0,
+        (snapshot.oldest?.getTime() ?? 0) + windowMs - now.getTime(),
+      )
+      return {
+        status: 'rate_limited',
+        model,
+        limit: snapshot.info.limit,
+        windowHours: snapshot.info.windowHours,
+        recentCount: snapshot.info.recentCount,
+        retryAfterMs,
+      }
+    }
+  }
 
   let row: InternalSessionRow
   try {
     row = await deps.joinOrTakeOver({
       userId: params.userId,
       model,
-      now: nowOf(deps),
+      now,
     })
   } catch (err) {
     if (err instanceof FreeSessionModelLockedError) {
@@ -199,7 +324,7 @@ export async function requestSession(params: {
           userId: params.userId,
           model,
           sessionLengthMs: deps.sessionLengthMs,
-          now: nowOf(deps),
+          now,
         })
         if (promoted) row = promoted
       }
@@ -212,7 +337,21 @@ export async function requestSession(params: {
       `joinOrTakeOver returned a row that maps to no view (user=${params.userId})`,
     )
   }
-  return view
+  return attachRateLimit(params.userId, view, deps)
+}
+
+/** Thread the current quota snapshot onto queued/active views so the CLI can
+ *  render "N of M sessions used". Other statuses pass through unchanged.
+ *  Called on both POST and GET so the line stays live across polls. */
+async function attachRateLimit(
+  userId: string,
+  view: SessionStateResponse,
+  deps: SessionDeps,
+): Promise<SessionStateResponse> {
+  if (view.status !== 'queued' && view.status !== 'active') return view
+  const snapshot = await fetchRateLimitSnapshot(userId, view.model, deps)
+  if (!snapshot) return view
+  return { ...view, rateLimit: snapshot.info }
 }
 
 /**
@@ -267,7 +406,7 @@ export async function getSessionState(params: {
 
   const view = await viewForRow(params.userId, deps, row)
   if (!view) return noneResponse()
-  return view
+  return attachRateLimit(params.userId, view, deps)
 }
 
 export async function endUserSession(params: {
diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts
index b3bd2bc48..e84331b69 100644
--- a/web/src/server/free-session/store.ts
+++ b/web/src/server/free-session/store.ts
@@ -1,7 +1,7 @@
 import { db } from '@codebuff/internal/db'
 import { coerceBool } from '@codebuff/internal/db/advisory-lock'
 import * as schema from '@codebuff/internal/db/schema'
-import { and, asc, count, eq, lt, sql } from 'drizzle-orm'
+import { and, asc, count, eq, gte, lt, sql } from 'drizzle-orm'
 
 import { FREEBUFF_ADMISSION_LOCK_ID } from './config'
 
@@ -369,6 +369,16 @@ export async function admitFromQueue(params: {
       )
       .returning()
 
+    if (admitted.length > 0) {
+      await tx.insert(schema.freeSessionAdmit).values(
+        admitted.map((r) => ({
+          user_id: r.user_id,
+          model: r.model,
+          admitted_at: now,
+        })),
+      )
+    }
+
     return { admitted: admitted as InternalSessionRow[], skipped: null }
   })
 }
@@ -391,23 +401,63 @@ export async function promoteQueuedUser(params: {
 }): Promise<InternalSessionRow | null> {
   const { userId, model, sessionLengthMs, now } = params
   const expiresAt = new Date(now.getTime() + sessionLengthMs)
-  const [row] = await db
-    .update(schema.freeSession)
-    .set({
-      status: 'active',
+  return db.transaction(async (tx) => {
+    const [row] = await tx
+      .update(schema.freeSession)
+      .set({
+        status: 'active',
+        admitted_at: now,
+        expires_at: expiresAt,
+        updated_at: now,
+      })
+      .where(
+        and(
+          eq(schema.freeSession.user_id, userId),
+          eq(schema.freeSession.status, 'queued'),
+          eq(schema.freeSession.model, model),
+        ),
+      )
+      .returning()
+    if (!row) return null
+    await tx.insert(schema.freeSessionAdmit).values({
+      user_id: userId,
+      model,
       admitted_at: now,
-      expires_at: expiresAt,
-      updated_at: now,
     })
+    return row as InternalSessionRow
+  })
+}
+
+/**
+ * List admissions for `userId` on `model` whose `admitted_at` is within the
+ * window `[since, ∞)`, ordered oldest-first. Caller gets both the count
+ * (array length, capped at `limit`) and the oldest timestamp (`rows[0]`) —
+ * the oldest is needed to compute `retryAfterMs` when the window is full,
+ * so one query covers both the check and the reject path.
+ *
+ * Drives the per-user, per-model rate limit (e.g. at most 5 GLM sessions in
+ * the last 20h) enforced before `joinOrTakeOver`.
+ */
+export async function listRecentAdmits(params: {
+  userId: string
+  model: string
+  since: Date
+  limit: number
+}): Promise<Date[]> {
+  const { userId, model, since, limit } = params
+  const rows = await db
+    .select({ admitted_at: schema.freeSessionAdmit.admitted_at })
+    .from(schema.freeSessionAdmit)
     .where(
       and(
-        eq(schema.freeSession.user_id, userId),
-        eq(schema.freeSession.status, 'queued'),
-        eq(schema.freeSession.model, model),
+        eq(schema.freeSessionAdmit.user_id, userId),
+        eq(schema.freeSessionAdmit.model, model),
+        gte(schema.freeSessionAdmit.admitted_at, since),
       ),
     )
-    .returning()
-  return (row as InternalSessionRow | undefined) ?? null
+    .orderBy(asc(schema.freeSessionAdmit.admitted_at))
+    .limit(limit)
+  return rows.map((r) => r.admitted_at)
 }
 
 /** Stable 31-bit hash so model-keyed advisory lock ids don't overflow int4. */