diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts
index 030857c8d..36d6b75c5 100644
--- a/agents/__tests__/editor.test.ts
+++ b/agents/__tests__/editor.test.ts
@@ -67,6 +67,11 @@ describe('editor agent', () => {
expect(glmEditor.model).toBe('z-ai/glm-5.1')
})
+ test('creates minimax editor', () => {
+ const minimaxEditor = createCodeEditor({ model: 'minimax' })
+ expect(minimaxEditor.model).toBe('minimax/minimax-m2.7')
+ })
+
test('gpt-5 editor does not include think tags in instructions', () => {
const gpt5Editor = createCodeEditor({ model: 'gpt-5' })
expect(gpt5Editor.instructionsPrompt).not.toContain('')
@@ -79,6 +84,12 @@ describe('editor agent', () => {
expect(glmEditor.instructionsPrompt).not.toContain('')
})
+ test('minimax editor does not include think tags in instructions', () => {
+ const minimaxEditor = createCodeEditor({ model: 'minimax' })
+ expect(minimaxEditor.instructionsPrompt).not.toContain('')
+ expect(minimaxEditor.instructionsPrompt).not.toContain('')
+ })
+
test('opus editor includes think tags in instructions', () => {
const opusEditor = createCodeEditor({ model: 'opus' })
expect(opusEditor.instructionsPrompt).toContain('')
diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts
index 3d208aa13..c98544d0f 100644
--- a/agents/editor/editor.ts
+++ b/agents/editor/editor.ts
@@ -4,7 +4,7 @@ import { publisher } from '../constants'
import type { AgentDefinition } from '../types/agent-definition'
export const createCodeEditor = (options: {
- model: 'gpt-5' | 'opus' | 'glm'
+ model: 'gpt-5' | 'opus' | 'glm' | 'minimax'
}): Omit => {
const { model } = options
return {
@@ -12,6 +12,8 @@ export const createCodeEditor = (options: {
model:
options.model === 'gpt-5'
? 'openai/gpt-5.1'
+ : options.model === 'minimax'
+ ? 'minimax/minimax-m2.7'
: options.model === 'glm'
? 'z-ai/glm-5.1'
: 'anthropic/claude-opus-4.7',
@@ -65,7 +67,7 @@ OR for new files or major rewrites:
}
-${model === 'gpt-5' || model === 'glm'
+${model === 'gpt-5' || model === 'glm' || model === 'minimax'
? ''
: `Before you start writing your implementation, you should use tags to think about the best way to implement the changes.
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
index b28a77c31..3608f3631 100644
--- a/agents/types/agent-definition.ts
+++ b/agents/types/agent-definition.ts
@@ -423,8 +423,6 @@ export type ModelName =
// Other open source models
| 'moonshotai/kimi-k2'
| 'moonshotai/kimi-k2:nitro'
- | 'moonshotai/kimi-k2.5'
- | 'moonshotai/kimi-k2.5:nitro'
| 'z-ai/glm-5'
| 'z-ai/glm-5.1'
| 'z-ai/glm-4.6'
diff --git a/cli/src/app.tsx b/cli/src/app.tsx
index 0661d7d3c..cac6e20ec 100644
--- a/cli/src/app.tsx
+++ b/cli/src/app.tsx
@@ -380,6 +380,7 @@ const AuthedSurface = ({
// 'queued' → waiting our turn
// 'country_blocked' → terminal region-gate message
// 'banned' → terminal account-banned message
+ // 'rate_limited' → hit per-model session quota; terminal for this run
//
// 'ended' deliberately falls through to : the agent may still be
// finishing work under the server-side grace period, and the chat surface
@@ -390,7 +391,8 @@ const AuthedSurface = ({
session.status === 'queued' ||
session.status === 'none' ||
session.status === 'country_blocked' ||
- session.status === 'banned')
+ session.status === 'banned' ||
+ session.status === 'rate_limited')
) {
return
}
diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx
index 6663c7e1e..09727ea6e 100644
--- a/cli/src/chat.tsx
+++ b/cli/src/chat.tsx
@@ -174,7 +174,11 @@ export const Chat = ({
})
const hasSubscription = subscriptionData?.hasSubscription ?? false
- const { adData, recordImpression } = useGravityAd({ enabled: IS_FREEBUFF || !hasSubscription })
+ const { adData, recordImpression } = useGravityAd({
+ enabled: IS_FREEBUFF || !hasSubscription,
+ provider: 'gravity',
+ fallbackProvider: 'carbon',
+ })
// Set initial mode from CLI flag on mount
useEffect(() => {
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index a33d89540..0850a0bd7 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -3,9 +3,16 @@ import { useKeyboard } from '@opentui/react'
import React, { useCallback, useEffect, useMemo, useState } from 'react'
import { Button } from './button'
-import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+import {
+ FALLBACK_FREEBUFF_MODEL_ID,
+ FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+ FREEBUFF_GLM_MODEL_ID,
+ FREEBUFF_MODELS,
+ isFreebuffModelAvailable,
+} from '@codebuff/common/constants/freebuff-models'
import { joinFreebuffQueue } from '../hooks/use-freebuff-session'
+import { useNow } from '../hooks/use-now'
import { useFreebuffModelStore } from '../state/freebuff-model-store'
import { useFreebuffSessionStore } from '../state/freebuff-session-store'
import { useTerminalDimensions } from '../hooks/use-terminal-dimensions'
@@ -13,6 +20,11 @@ import { useTheme } from '../hooks/use-theme'
import type { KeyEvent } from '@opentui/core'
+const FREEBUFF_MODEL_SELECTOR_MODELS = [
+ ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
+ ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
+]
+
/**
* Dual-purpose model picker:
* - Pre-chat landing (session 'none'): user hasn't joined any queue. Picking
@@ -33,7 +45,9 @@ export const FreebuffModelSelector: React.FC = () => {
const theme = useTheme()
const { terminalWidth } = useTerminalDimensions()
const selectedModel = useFreebuffModelStore((s) => s.selectedModel)
+ const setSelectedModel = useFreebuffModelStore((s) => s.setSelectedModel)
const session = useFreebuffSessionStore((s) => s.session)
+ const now = useNow(60_000)
const [pending, setPending] = useState(null)
const [hoveredId, setHoveredId] = useState(null)
// Keyboard cursor — separate from the actually-selected model so that
@@ -45,6 +59,20 @@ export const FreebuffModelSelector: React.FC = () => {
setFocusedId(selectedModel)
}, [selectedModel])
+ useEffect(() => {
+ // Landing-screen safety net: if the in-memory selection becomes
+ // unavailable (e.g. deployment hours close while the picker is open),
+ // swap to the always-available fallback so Enter doesn't POST a model
+ // the server will immediately reject. In-memory only — the user's saved
+ // preference (e.g. GLM) is preserved for the next launch.
+ if (
+ (session?.status === 'none' || !session) &&
+ !isFreebuffModelAvailable(selectedModel, new Date(now))
+ ) {
+ setSelectedModel(FALLBACK_FREEBUFF_MODEL_ID)
+ }
+ }, [now, selectedModel, session, setSelectedModel])
+
// Landing ('none'): depths come from the server snapshot, no "self" to
// subtract. In-queue ('queued'): for the user's queue, "ahead" is
// `position - 1` (themselves don't count); for every other queue, switching
@@ -85,18 +113,22 @@ export const FreebuffModelSelector: React.FC = () => {
)
// Decide row vs column layout based on whether both buttons actually fit
- // side-by-side. Each button's inner text is "● {displayName} · {tagline} {hint}",
+ // side-by-side. Each button's inner text is
+ // "● {displayName} · {tagline} · {hours} {hint}",
// plus 2 cols of border and 2 cols of padding. Buttons are separated by a
// gap of 2. If the total exceeds the terminal width, stack vertically.
const stackVertically = useMemo(() => {
const BUTTON_CHROME = 4 // 2 border + 2 padding
const GAP = 2
- const total = FREEBUFF_MODELS.reduce((sum, model, idx) => {
+ const total = FREEBUFF_MODEL_SELECTOR_MODELS.reduce((sum, model, idx) => {
const inner =
2 /* indicator + space */ +
model.displayName.length +
3 /* " · " */ +
model.tagline.length +
+ (model.availability === 'deployment_hours'
+ ? 3 + FREEBUFF_DEPLOYMENT_HOURS_LABEL.length
+ : 0) +
2 /* " " */ +
hintWidth
return sum + inner + BUTTON_CHROME + (idx > 0 ? GAP : 0)
@@ -115,10 +147,11 @@ export const FreebuffModelSelector: React.FC = () => {
(modelId: string) => {
if (pending) return
if (modelId === committedModelId) return
+ if (!isFreebuffModelAvailable(modelId, new Date(now))) return
setPending(modelId)
joinFreebuffQueue(modelId).finally(() => setPending(null))
},
- [pending, committedModelId],
+ [pending, committedModelId, now],
)
// Tab / Shift+Tab and arrow keys move the focus highlight only; Enter or
@@ -136,25 +169,30 @@ export const FreebuffModelSelector: React.FC = () => {
const isCommit = name === 'return' || name === 'enter' || name === 'space'
if (!isForward && !isBackward && !isCommit) return
if (isCommit) {
- if (focusedId !== committedModelId) {
+ if (
+ focusedId !== committedModelId &&
+ isFreebuffModelAvailable(focusedId, new Date(now))
+ ) {
key.preventDefault?.()
pick(focusedId)
}
return
}
- const currentIdx = FREEBUFF_MODELS.findIndex((m) => m.id === focusedId)
+ const currentIdx = FREEBUFF_MODEL_SELECTOR_MODELS.findIndex(
+ (m) => m.id === focusedId,
+ )
if (currentIdx === -1) return
- const len = FREEBUFF_MODELS.length
+ const len = FREEBUFF_MODEL_SELECTOR_MODELS.length
const nextIdx = isForward
? (currentIdx + 1) % len
: (currentIdx - 1 + len) % len
- const target = FREEBUFF_MODELS[nextIdx]
+ const target = FREEBUFF_MODEL_SELECTOR_MODELS[nextIdx]
if (target) {
key.preventDefault?.()
setFocusedId(target.id)
}
},
- [pending, pick, focusedId, committedModelId],
+ [pending, pick, focusedId, committedModelId, now],
),
)
@@ -173,7 +211,7 @@ export const FreebuffModelSelector: React.FC = () => {
alignItems: 'flex-start',
}}
>
- {FREEBUFF_MODELS.map((model) => {
+ {FREEBUFF_MODEL_SELECTOR_MODELS.map((model) => {
// 'Selected' means the dot is filled and the label is bold. On the
// landing screen ('none') this tracks the pre-focused pick; on the
// queued screen it tracks the model the server has us on. Either
@@ -181,15 +219,22 @@ export const FreebuffModelSelector: React.FC = () => {
const isSelected = model.id === selectedModel
const isHovered = hoveredId === model.id
const isFocused = focusedId === model.id && !isSelected
+ const isAvailable = isFreebuffModelAvailable(model.id, new Date(now))
const indicator = isSelected ? '●' : '○'
const indicatorColor = isSelected ? theme.primary : theme.muted
- const labelColor = isSelected ? theme.foreground : theme.muted
+ const labelColor = isSelected && isAvailable ? theme.foreground : theme.muted
// Clickable whenever picking would actually do something — i.e.
// anything except re-picking the queue we're already in.
- const interactable = !pending && model.id !== committedModelId
+ const interactable = !pending && isAvailable && model.id !== committedModelId
const ahead = aheadByModel?.[model.id]
const hint =
- ahead === undefined ? '' : ahead === 0 ? 'No wait' : `${ahead} ahead`
+ !isAvailable
+ ? 'Closed'
+ : ahead === undefined
+ ? ''
+ : ahead === 0
+ ? 'No wait'
+ : `${ahead} ahead`
const borderColor = isSelected
? theme.primary
@@ -202,7 +247,7 @@ export const FreebuffModelSelector: React.FC = () => {
key={model.id}
onClick={() => {
setFocusedId(model.id)
- pick(model.id)
+ if (isAvailable) pick(model.id)
}}
onMouseOver={() => interactable && setHoveredId(model.id)}
onMouseOut={() => setHoveredId((curr) => (curr === model.id ? null : curr))}
@@ -223,6 +268,9 @@ export const FreebuffModelSelector: React.FC = () => {
{model.displayName}
· {model.tagline}
+ {model.availability === 'deployment_hours' && (
+ · {FREEBUFF_DEPLOYMENT_HOURS_LABEL}
+ )}
{hint.padEnd(hintWidth)}
diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx
index e67823f7a..f2a09022e 100644
--- a/cli/src/components/waiting-room-screen.tsx
+++ b/cli/src/components/waiting-room-screen.tsx
@@ -43,6 +43,18 @@ const formatElapsed = (ms: number): string => {
return `${minutes}m ${seconds.toString().padStart(2, '0')}s`
}
+/** "in ~3h 20m" / "in ~45 min" / "in under a minute". Used on the
+ * rate-limited screen so users know when they can try again. */
+const formatRetryAfter = (ms: number): string => {
+ if (!Number.isFinite(ms) || ms <= 0) return 'any moment now'
+ const minutes = Math.round(ms / 60_000)
+ if (minutes < 1) return 'under a minute'
+ if (minutes < 60) return `${minutes} min`
+ const hours = Math.floor(minutes / 60)
+ const rem = minutes % 60
+ return rem === 0 ? `${hours}h` : `${hours}h ${rem}m`
+}
+
export const WaitingRoomScreen: React.FC = ({
session,
error,
@@ -72,11 +84,12 @@ export const WaitingRoomScreen: React.FC = ({
// Always enable ads in the waiting room — this is where monetization lives.
// forceStart bypasses the "wait for first user message" gate inside the hook,
// which would otherwise block ads here since no conversation exists yet.
- // Uses Carbon (BuySellAds); in-chat ads still use the Gravity default.
+ // Try Gravity first, then fall back to Carbon when Gravity doesn't fill.
const { adData, recordImpression } = useGravityAd({
enabled: true,
forceStart: true,
- provider: 'carbon',
+ provider: 'gravity',
+ fallbackProvider: 'carbon',
})
useFreebuffCtrlCExit()
@@ -216,6 +229,18 @@ export const WaitingRoomScreen: React.FC = ({
Elapsed
{formatElapsed(elapsedMs)}
+ {/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only
+ rendered for rate-limited models so the Minimax queue stays
+ clutter-free. */}
+ {session.rateLimit && (
+
+ Sessions
+
+ {session.rateLimit.recentCount} / {session.rateLimit.limit}
+
+ used in last {session.rateLimit.windowHours}h
+
+ )}
>
)}
@@ -253,11 +278,34 @@ export const WaitingRoomScreen: React.FC = ({
⚠ Account unavailable
- This account can't use freebuff. If you think this is a
+ This account has been suspended and can't use freebuff. If you think this is a
mistake, contact support@codebuff.com. Press Ctrl+C to exit.
>
)}
+
+ {/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
+ last 20h). Terminal for this run — the user can exit and come
+ back once the oldest session in the window rolls off. */}
+ {session?.status === 'rate_limited' && (
+ <>
+
+ ⚠ Session limit reached
+
+
+ You've used{' '}
+
+ {session.recentCount} of {session.limit}
+ {' '}
+ hour-long sessions on {session.model} in the last{' '}
+ {session.windowHours}h. Try again in{' '}
+
+ {formatRetryAfter(session.retryAfterMs)}
+
+ . Press Ctrl+C to exit.
+
+ >
+ )}
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
index 79deea1cf..b7a91eb1e 100644
--- a/cli/src/hooks/use-freebuff-session.ts
+++ b/cli/src/hooks/use-freebuff-session.ts
@@ -1,4 +1,8 @@
import { env } from '@codebuff/common/env'
+import {
+ FALLBACK_FREEBUFF_MODEL_ID,
+ resolveFreebuffModel,
+} from '@codebuff/common/constants/freebuff-models'
import { useEffect } from 'react'
import {
@@ -9,6 +13,7 @@ import { useFreebuffSessionStore } from '../state/freebuff-session-store'
import { getAuthTokenDetails } from '../utils/auth'
import { IS_FREEBUFF } from '../utils/constants'
import { logger } from '../utils/logger'
+import { saveFreebuffModelPreference } from '../utils/settings'
import type { FreebuffSessionResponse } from '../types/freebuff-session'
@@ -75,14 +80,31 @@ async function callSession(
return body
}
}
- // 409 from POST means the user picked a different model than their active
- // session is bound to. Surface as a non-throw `model_locked` so the UI can
- // show a confirmation prompt (DELETE then re-POST to switch).
+ // 409 from POST means the selected model cannot be joined right now, either
+ // because an active session is locked to another model or because a
+ // Surface model-switch conflicts and temporary model availability closures
+ // as non-throw states.
if (resp.status === 409 && method === 'POST') {
const body = (await resp.json().catch(() => null)) as
| FreebuffSessionResponse
| null
- if (body && body.status === 'model_locked') {
+ if (
+ body &&
+ (body.status === 'model_locked' || body.status === 'model_unavailable')
+ ) {
+ return body
+ }
+ }
+ // 429 from POST is the per-model session-quota reject (e.g. too many GLM
+ // sessions in the last 20h). Terminal for the current poll — the CLI shows
+ // a screen explaining the limit and when the user can try again. The 429
+ // status (rather than 200) keeps older CLIs in their error path so they
+ // back off instead of tight-polling an unrecognized 200 body.
+ if (resp.status === 429 && method === 'POST') {
+ const body = (await resp.json().catch(() => null)) as
+ | FreebuffSessionResponse
+ | null
+ if (body && body.status === 'rate_limited') {
return body
}
}
@@ -119,6 +141,8 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null {
case 'country_blocked':
case 'banned':
case 'model_locked':
+ case 'rate_limited':
+ case 'model_unavailable':
return null
}
}
@@ -260,7 +284,13 @@ export function returnToFreebuffLanding(
*/
export function joinFreebuffQueue(model: string): Promise {
if (!IS_FREEBUFF) return Promise.resolve()
- useFreebuffModelStore.getState().setSelectedModel(model)
+ // This is the only explicit user-pick path (called from the picker on
+ // click / Enter), so persistence belongs here — and ONLY here. Server-
+ // driven flips (`model_locked`, `model_unavailable`, takeover) go
+ // through `setSelectedModel` directly, which never writes to disk.
+ const resolved = resolveFreebuffModel(model)
+ useFreebuffModelStore.getState().setSelectedModel(resolved)
+ saveFreebuffModelPreference(resolved)
return restartFreebuffSession('rejoin')
}
@@ -398,6 +428,19 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
schedule(0)
return
}
+ if (next.status === 'model_unavailable') {
+ // Server says the requested model isn't available right now (e.g.
+ // GLM outside deployment hours). Flip to the always-available
+ // fallback for this run. In-memory only — `setSelectedModel`
+ // doesn't persist, so the user's saved preference (e.g. GLM)
+ // is preserved for their next launch during deployment hours.
+ useFreebuffModelStore
+ .getState()
+ .setSelectedModel(FALLBACK_FREEBUFF_MODEL_ID)
+ nextMethod = 'GET'
+ schedule(0)
+ return
+ }
// Startup takeover: the initial probe GET saw we already hold a seat
// (from a prior CLI instance). POST now to rotate our instance id so
diff --git a/cli/src/hooks/use-gravity-ad.ts b/cli/src/hooks/use-gravity-ad.ts
index e52b4bdd8..36a18faae 100644
--- a/cli/src/hooks/use-gravity-ad.ts
+++ b/cli/src/hooks/use-gravity-ad.ts
@@ -108,12 +108,15 @@ export const useGravityAd = (options?: {
/** Skip the "wait for first user message" gate. Used by the freebuff
* waiting room, which has no conversation but still needs ads. */
forceStart?: boolean
- /** Which ad network to query. Defaults to Gravity. */
+ /** Primary ad network to query. Defaults to Gravity. */
provider?: AdProvider
+ /** Backup ad network to try when the primary returns no fill or errors. */
+ fallbackProvider?: AdProvider
}): GravityAdState => {
const enabled = options?.enabled ?? true
const forceStart = options?.forceStart ?? false
const provider: AdProvider = options?.provider ?? 'gravity'
+ const fallbackProvider = options?.fallbackProvider
const [ad, setAd] = useState(null)
const [adData, setAdData] = useState(null)
const [isLoading, setIsLoading] = useState(false)
@@ -278,49 +281,63 @@ export const useGravityAd = (options?: {
}
}
- try {
- const response = await fetch(`${WEBSITE_URL}/api/v1/ads`, {
- method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- Authorization: `Bearer ${authToken}`,
- },
- body: JSON.stringify({
- provider,
- messages: adMessages,
- sessionId: useChatStore.getState().chatSessionId,
- device: getDeviceInfo(),
- // Carbon requires a real browser-ish useragent for targeting/fraud
- // detection. Gravity ignores it. We source one centrally so every
- // provider that needs it sees the same value.
- userAgent: getAdUserAgent(),
- }),
- })
+ const providersToTry =
+ fallbackProvider && fallbackProvider !== provider
+ ? [provider, fallbackProvider]
+ : [provider]
- if (!response.ok) {
- logger.warn(
- { provider, status: response.status, response: await response.json() },
- '[ads] Web API returned error',
- )
- return null
- }
+ for (const providerToTry of providersToTry) {
+ try {
+ const response = await fetch(`${WEBSITE_URL}/api/v1/ads`, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ Authorization: `Bearer ${authToken}`,
+ },
+ body: JSON.stringify({
+ provider: providerToTry,
+ messages: adMessages,
+ sessionId: useChatStore.getState().chatSessionId,
+ device: getDeviceInfo(),
+ // Carbon requires a real browser-ish useragent for targeting/fraud
+ // detection. Gravity ignores it. We source one centrally so every
+ // provider that needs it sees the same value.
+ userAgent: getAdUserAgent(),
+ }),
+ })
- const data = await response.json()
- const variant = data.variant ?? 'banner'
+ if (!response.ok) {
+ logger.warn(
+ {
+ provider: providerToTry,
+ status: response.status,
+ response: await response.json(),
+ },
+ '[ads] Web API returned error',
+ )
+ continue
+ }
- if (variant === 'choice' && Array.isArray(data.ads) && data.ads.length > 0) {
- return { variant: 'choice', ads: data.ads as AdResponse[] }
- }
+ const data = await response.json()
+ const variant = data.variant ?? 'banner'
- if (data.ad) {
- return { variant: 'banner', ad: data.ad as AdResponse }
- }
+ if (
+ variant === 'choice' &&
+ Array.isArray(data.ads) &&
+ data.ads.length > 0
+ ) {
+ return { variant: 'choice', ads: data.ads as AdResponse[] }
+ }
- return null
- } catch (err) {
- logger.error({ err }, '[ads] Failed to fetch ad')
- return null
+ if (data.ad) {
+ return { variant: 'banner', ad: data.ad as AdResponse }
+ }
+ } catch (err) {
+ logger.error({ err, provider: providerToTry }, '[ads] Failed to fetch ad')
+ }
}
+
+ return null
}
// Update tick function (uses ref to avoid useCallback dependency issues)
@@ -413,7 +430,7 @@ export const useGravityAd = (options?: {
clearInterval(id)
ctrlRef.current.intervalId = null
}
- }, [shouldStart, shouldHideAds])
+ }, [shouldStart, shouldHideAds, provider, fallbackProvider])
// Don't return ad when ads should be hidden
const visible = shouldStart && !shouldHideAds
diff --git a/cli/src/state/freebuff-model-store.ts b/cli/src/state/freebuff-model-store.ts
index 182a38831..c602d8464 100644
--- a/cli/src/state/freebuff-model-store.ts
+++ b/cli/src/state/freebuff-model-store.ts
@@ -1,19 +1,23 @@
import {
DEFAULT_FREEBUFF_MODEL_ID,
+ resolveAvailableFreebuffModel,
resolveFreebuffModel,
} from '@codebuff/common/constants/freebuff-models'
import { create } from 'zustand'
-import {
- loadFreebuffModelPreference,
- saveFreebuffModelPreference,
-} from '../utils/settings'
+import { loadFreebuffModelPreference } from '../utils/settings'
/**
* Holds the user's currently-selected freebuff model. Initialized from the
* persisted settings file so freebuff defaults to whatever model the user
- * last picked. Writing through `setSelectedModel` also persists to disk so
- * the next launch picks it up without an explicit save call.
+ * last picked.
+ *
+ * `setSelectedModel` is in-memory only — it does NOT persist. Persistence
+ * happens exclusively in `joinFreebuffQueue` (the explicit-pick path), so
+ * server-driven auto-flips (`model_locked`, `model_unavailable`, takeover)
+ * can update the in-memory selection without overwriting the user's saved
+ * preference. The latter previously caused users to get permanently flipped
+ * to the fallback model after a single auto-fallback.
*
* Components in the waiting room read this to highlight the current row in
* the model picker; the session hook reads it to decide which queue to join.
@@ -24,14 +28,11 @@ interface FreebuffModelStore {
}
export const useFreebuffModelStore = create((set) => ({
- selectedModel: resolveFreebuffModel(
+ selectedModel: resolveAvailableFreebuffModel(
loadFreebuffModelPreference() ?? DEFAULT_FREEBUFF_MODEL_ID,
),
- setSelectedModel: (model) => {
- const resolved = resolveFreebuffModel(model)
- saveFreebuffModelPreference(resolved)
- set({ selectedModel: resolved })
- },
+ setSelectedModel: (model) =>
+ set({ selectedModel: resolveFreebuffModel(model) }),
}))
/** Imperative read for non-React callers (the session hook's tick loop and
diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts
index 59206eb84..6106b3928 100644
--- a/cli/src/utils/local-agent-registry.ts
+++ b/cli/src/utils/local-agent-registry.ts
@@ -370,7 +370,7 @@ export const loadAgentDefinitions = (): AgentDefinition[] => {
}
// Override the model of free-mode agents to match the user's pick from the
- // freebuff waiting room. Bundled definitions hardcode glm-5.1; we swap in
+ // freebuff waiting room. Bundled definitions hardcode a free model; we swap in
// whatever the user chose so the chat-completions request body carries the
// matching model and the server-side session gate doesn't reject it as a
// model mismatch.
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index e44c74cc6..308e12df6 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -26,7 +26,10 @@ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const
*/
export const FREE_MODE_AGENT_MODELS: Record> = {
// Root orchestrator
- 'base2-free': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+ 'base2-free': new Set([
+ 'minimax/minimax-m2.7',
+ 'z-ai/glm-5.1',
+ ]),
// File exploration agents
'file-picker': new Set(['google/gemini-2.5-flash-lite']),
@@ -41,10 +44,16 @@ export const FREE_MODE_AGENT_MODELS: Record> = {
'basher': new Set(['google/gemini-3.1-flash-lite-preview']),
// Editor for free mode
- 'editor-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+ 'editor-lite': new Set([
+ 'minimax/minimax-m2.7',
+ 'z-ai/glm-5.1',
+ ]),
// Code reviewer for free mode
- 'code-reviewer-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']),
+ 'code-reviewer-lite': new Set([
+ 'minimax/minimax-m2.7',
+ 'z-ai/glm-5.1',
+ ]),
}
/**
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index d71ebd619..2e1ef8d8e 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -13,24 +13,43 @@ export interface FreebuffModelOption {
displayName: string
/** One-line description shown next to the label. */
tagline: string
+ /** Availability policy for the selector and server-side admission. */
+ availability: 'always' | 'deployment_hours'
}
+export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT'
+export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
+export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
+
export const FREEBUFF_MODELS = [
{
- id: 'z-ai/glm-5.1',
- displayName: 'GLM 5.1',
- tagline: 'Smartest',
- },
- {
- id: 'minimax/minimax-m2.7',
+ id: FREEBUFF_MINIMAX_MODEL_ID,
displayName: 'MiniMax M2.7',
tagline: 'Fastest',
+ availability: 'always',
+ },
+ {
+ id: FREEBUFF_GLM_MODEL_ID,
+ displayName: 'GLM 5.1',
+ tagline: 'Smartest',
+ availability: 'deployment_hours',
},
] as const satisfies readonly FreebuffModelOption[]
export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
-export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_MODELS[0].id
+/** What new freebuff users see selected in the picker. May not be currently
+ * available (GLM is closed outside deployment hours); callers that need an
+ * always-available id for resolution / auto-fallbacks should use
+ * FALLBACK_FREEBUFF_MODEL_ID instead. */
+export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID
+
+/** Always-available fallback used when the requested model can't be served
+ * right now (unknown id, deployment hours closed, etc.). Kept distinct from
+ * DEFAULT_FREEBUFF_MODEL_ID so a new user's "preferred default" can be the
+ * smartest model without auto-flipping anyone to a closed deployment. */
+export const FALLBACK_FREEBUFF_MODEL_ID: FreebuffModelId =
+ FREEBUFF_MINIMAX_MODEL_ID
export function isFreebuffModelId(
id: string | null | undefined,
@@ -42,12 +61,58 @@ export function isFreebuffModelId(
export function resolveFreebuffModel(
id: string | null | undefined,
): FreebuffModelId {
- return isFreebuffModelId(id) ? id : DEFAULT_FREEBUFF_MODEL_ID
+ return isFreebuffModelId(id) ? id : FALLBACK_FREEBUFF_MODEL_ID
}
export function getFreebuffModel(id: string): FreebuffModelOption {
return (
FREEBUFF_MODELS.find((m) => m.id === id) ??
- FREEBUFF_MODELS.find((m) => m.id === DEFAULT_FREEBUFF_MODEL_ID)!
+ FREEBUFF_MODELS.find((m) => m.id === FALLBACK_FREEBUFF_MODEL_ID)!
)
}
+
+function getZonedParts(
+ date: Date,
+ timeZone: string,
+): { weekday: string; minutes: number } {
+ const parts = new Intl.DateTimeFormat('en-US', {
+ timeZone,
+ weekday: 'short',
+ hour: '2-digit',
+ minute: '2-digit',
+ hourCycle: 'h23',
+ }).formatToParts(date)
+ const value = (type: string) => parts.find((part) => part.type === type)?.value
+ const hour = Number(value('hour') ?? 0)
+ const minute = Number(value('minute') ?? 0)
+ return {
+ weekday: value('weekday') ?? '',
+ minutes: hour * 60 + minute,
+ }
+}
+
+export function isFreebuffDeploymentHours(now: Date = new Date()): boolean {
+ const eastern = getZonedParts(now, 'America/New_York')
+ const pacific = getZonedParts(now, 'America/Los_Angeles')
+ if (eastern.weekday === 'Sat' || eastern.weekday === 'Sun') return false
+ return eastern.minutes >= 9 * 60 && pacific.minutes < 17 * 60
+}
+
+export function isFreebuffModelAvailable(
+ id: string,
+ now: Date = new Date(),
+): boolean {
+ const model = FREEBUFF_MODELS.find((m) => m.id === id)
+ if (!model) return false
+ return model.availability === 'always' || isFreebuffDeploymentHours(now)
+}
+
+export function resolveAvailableFreebuffModel(
+ id: string | null | undefined,
+ now: Date = new Date(),
+): FreebuffModelId {
+ const resolved = resolveFreebuffModel(id)
+ return isFreebuffModelAvailable(resolved, now)
+ ? resolved
+ : FALLBACK_FREEBUFF_MODEL_ID
+}
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index b28a77c31..3608f3631 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -423,8 +423,6 @@ export type ModelName =
// Other open source models
| 'moonshotai/kimi-k2'
| 'moonshotai/kimi-k2:nitro'
- | 'moonshotai/kimi-k2.5'
- | 'moonshotai/kimi-k2.5:nitro'
| 'z-ai/glm-5'
| 'z-ai/glm-5.1'
| 'z-ai/glm-4.6'
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
index e42d9f0be..7789c91f2 100644
--- a/common/src/types/freebuff-session.ts
+++ b/common/src/types/freebuff-session.ts
@@ -5,6 +5,22 @@
*
* The CLI uses these shapes directly; there are no client-only states.
*/
+
+/**
+ * Per-model usage counter surfaced to the CLI so the waiting-room UI can
+ * render "N of M sessions used" alongside queue/active state. Present when
+ * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
+ * per 20-hour window). `recentCount` is the number of admissions inside
+ * `windowHours` at the time the response was produced — see also the
+ * standalone `rate_limited` status for the reject path.
+ */
+export interface FreebuffSessionRateLimit {
+ model: string
+ limit: number
+ windowHours: number
+ recentCount: number
+}
+
export type FreebuffSessionServerResponse =
| {
/** Waiting room is globally off; free-mode requests flow through
@@ -38,6 +54,10 @@ export type FreebuffSessionServerResponse =
queueDepthByModel: Record
estimatedWaitMs: number
queuedAt: string
+ /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+ * for unlimited models or when the status was produced outside the
+ * rate-limit check path (e.g. pure read via GET). */
+ rateLimit?: FreebuffSessionRateLimit
}
| {
status: 'active'
@@ -47,6 +67,10 @@ export type FreebuffSessionServerResponse =
admittedAt: string
expiresAt: string
remainingMs: number
+ /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+ * for unlimited models or when the status was produced outside the
+ * rate-limit check path (e.g. pure read via GET). */
+ rateLimit?: FreebuffSessionRateLimit
}
| {
/** Session is over. While `instanceId` is present we're inside the
@@ -92,6 +116,12 @@ export type FreebuffSessionServerResponse =
currentModel: string
requestedModel: string
}
+ | {
+ /** Requested model is valid but not selectable right now. */
+ status: 'model_unavailable'
+ requestedModel: string
+ availableHours: string
+ }
| {
/** Account is banned. Returned from every endpoint so banned bots can't
* join the queue at all (otherwise they inflate `queueDepth` until the
@@ -99,3 +129,24 @@ export type FreebuffSessionServerResponse =
* stops polling and shows a banned message. */
status: 'banned'
}
+ | {
+ /** User has used up their per-model admission quota in the rolling
+ * window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST
+ * /session before the user is placed in the queue. `retryAfterMs` is
+ * the time until the oldest admission inside the window falls off
+ * and one quota slot opens up — clients should show the user when
+ * they can try again. Terminal for the CLI's current poll session;
+ * the user can exit and come back later. */
+ status: 'rate_limited'
+ /** The freebuff model the user tried to join. */
+ model: string
+ /** Max admissions permitted per window (e.g. 5). */
+ limit: number
+ /** Rolling window size in hours (e.g. 20). */
+ windowHours: number
+ /** Admission count inside the window at check time — will be ≥ limit. */
+ recentCount: number
+ /** Milliseconds from now until the oldest admission in the window
+ * exits and the user regains one quota slot. */
+ retryAfterMs: number
+ }
diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md
index b1384d7b6..353bfb046 100644
--- a/docs/freebuff-waiting-room.md
+++ b/docs/freebuff-waiting-room.md
@@ -5,7 +5,7 @@
The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
-2. **Gate on per-deployment health** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` admit that tick; a degraded minimax-m2.7 no longer stalls glm-5.1 admissions.
+2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
@@ -149,8 +149,8 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r
| Constant | Location | Default | Purpose |
|---|---|---|---|
| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. |
-| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `glm-5.1`, `minimax-m2.7` | Selectable models; each gets its own queue and admission slot. |
-| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | glm-5.1 only | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
+| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `glm-5.1` | Selectable models; each gets its own queue and admission slot. |
+| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. |
| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime |
| `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
@@ -180,12 +180,12 @@ Response shapes:
{
"status": "queued",
"instanceId": "e47…",
- "model": "z-ai/glm-5.1",
+ "model": "minimax/minimax-m2.7",
"position": 17, // 1-indexed within this model's queue
"queueDepth": 43, // size of this model's queue
"queueDepthByModel": { // snapshot of every model's queue — powers the
- "z-ai/glm-5.1": 43, // "N ahead" hint in the selector. Missing
- "minimax/minimax-m2.7": 4 // entries should be treated as 0.
+ "minimax/minimax-m2.7": 43, // "N ahead" hint in the selector. Missing
+ "z-ai/glm-5.1": 4 // entries should be treated as 0.
},
"estimatedWaitMs": 384000,
"queuedAt": "2026-04-17T12:00:00Z"
@@ -195,7 +195,7 @@ Response shapes:
{
"status": "active",
"instanceId": "e47…",
- "model": "z-ai/glm-5.1",
+ "model": "minimax/minimax-m2.7",
"admittedAt": "2026-04-17T12:00:00Z",
"expiresAt": "2026-04-17T13:00:00Z",
"remainingMs": 3600000
@@ -219,7 +219,7 @@ Response shapes:
// to actually switch.
{
"status": "model_locked",
- "currentModel": "z-ai/glm-5.1",
+ "currentModel": "minimax/minimax-m2.7",
"requestedModel": "minimax/minimax-m2.7"
}
```
@@ -285,7 +285,7 @@ waitMs = (position - 1) * 24_000
- Position 1 → 0 (next tick admits you)
- Position 2 → 24s, and so on.
-`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses (during a per-deployment Fireworks incident only the affected model's queue stalls; healthy models keep draining), so the real wait can be longer or shorter.
+`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a GLM Fireworks incident or outside 9am ET-5pm PT, only GLM's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
## CLI Integration (frontend-side contract)
@@ -324,7 +324,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
| Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. |
| Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. |
| Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. |
-| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded minimax-m2.7 doesn't block glm-5.1 admissions. |
+| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. |
| Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy |
## Testing
diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json
index 6426fac98..a597e0852 100644
--- a/freebuff/cli/release/package.json
+++ b/freebuff/cli/release/package.json
@@ -1,6 +1,6 @@
{
"name": "freebuff",
- "version": "0.0.46",
+ "version": "0.0.48",
"description": "The world's strongest free coding agent",
"license": "MIT",
"bin": {
diff --git a/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql b/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql
new file mode 100644
index 000000000..53a24ec98
--- /dev/null
+++ b/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql
@@ -0,0 +1,9 @@
+CREATE TABLE "free_session_admit" (
+ "id" text PRIMARY KEY NOT NULL,
+ "user_id" text NOT NULL,
+ "model" text NOT NULL,
+ "admitted_at" timestamp with time zone DEFAULT now() NOT NULL
+);
+--> statement-breakpoint
+ALTER TABLE "free_session_admit" ADD CONSTRAINT "free_session_admit_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+CREATE INDEX "idx_free_session_admit_user_model_time" ON "free_session_admit" USING btree ("user_id","model","admitted_at");
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/0046_snapshot.json b/packages/internal/src/db/migrations/meta/0046_snapshot.json
new file mode 100644
index 000000000..48747dd94
--- /dev/null
+++ b/packages/internal/src/db/migrations/meta/0046_snapshot.json
@@ -0,0 +1,3307 @@
+{
+ "id": "3bf6a16c-2fd6-4c9d-a395-f4ca2c080a3c",
+ "prevId": "76196ef1-2384-4edd-b832-c9ff8085d809",
+ "version": "7",
+ "dialect": "postgresql",
+ "tables": {
+ "public.account": {
+ "name": "account",
+ "schema": "",
+ "columns": {
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "provider": {
+ "name": "provider",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "providerAccountId": {
+ "name": "providerAccountId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "refresh_token": {
+ "name": "refresh_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "access_token": {
+ "name": "access_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "token_type": {
+ "name": "token_type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "scope": {
+ "name": "scope",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "id_token": {
+ "name": "id_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "session_state": {
+ "name": "session_state",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "account_userId_user_id_fk": {
+ "name": "account_userId_user_id_fk",
+ "tableFrom": "account",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "account_provider_providerAccountId_pk": {
+ "name": "account_provider_providerAccountId_pk",
+ "columns": [
+ "provider",
+ "providerAccountId"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.ad_impression": {
+ "name": "ad_impression",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "provider": {
+ "name": "provider",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'gravity'"
+ },
+ "ad_text": {
+ "name": "ad_text",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "title": {
+ "name": "title",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "cta": {
+ "name": "cta",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "''"
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "favicon": {
+ "name": "favicon",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "click_url": {
+ "name": "click_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "imp_url": {
+ "name": "imp_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "extra_pixels": {
+ "name": "extra_pixels",
+ "type": "text[]",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "payout": {
+ "name": "payout",
+ "type": "numeric(10, 6)",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "credits_granted": {
+ "name": "credits_granted",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "grant_operation_id": {
+ "name": "grant_operation_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "served_at": {
+ "name": "served_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "impression_fired_at": {
+ "name": "impression_fired_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "clicked_at": {
+ "name": "clicked_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "idx_ad_impression_user": {
+ "name": "idx_ad_impression_user",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "served_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_ad_impression_imp_url": {
+ "name": "idx_ad_impression_imp_url",
+ "columns": [
+ {
+ "expression": "imp_url",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "ad_impression_user_id_user_id_fk": {
+ "name": "ad_impression_user_id_user_id_fk",
+ "tableFrom": "ad_impression",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "ad_impression_imp_url_unique": {
+ "name": "ad_impression_imp_url_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "imp_url"
+ ]
+ }
+ },
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.agent_config": {
+ "name": "agent_config",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "version": {
+ "name": "version",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "publisher_id": {
+ "name": "publisher_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "major": {
+ "name": "major",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)",
+ "type": "stored"
+ }
+ },
+ "minor": {
+ "name": "minor",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)",
+ "type": "stored"
+ }
+ },
+ "patch": {
+ "name": "patch",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)",
+ "type": "stored"
+ }
+ },
+ "data": {
+ "name": "data",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "idx_agent_config_publisher": {
+ "name": "idx_agent_config_publisher",
+ "columns": [
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "agent_config_publisher_id_publisher_id_fk": {
+ "name": "agent_config_publisher_id_publisher_id_fk",
+ "tableFrom": "agent_config",
+ "tableTo": "publisher",
+ "columnsFrom": [
+ "publisher_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "agent_config_publisher_id_id_version_pk": {
+ "name": "agent_config_publisher_id_id_version_pk",
+ "columns": [
+ "publisher_id",
+ "id",
+ "version"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.agent_run": {
+ "name": "agent_run",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "agent_id": {
+ "name": "agent_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "publisher_id": {
+ "name": "publisher_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '/', 1)\n ELSE NULL\n END",
+ "type": "stored"
+ }
+ },
+ "agent_name": {
+ "name": "agent_name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n ELSE agent_id\n END",
+ "type": "stored"
+ }
+ },
+ "agent_version": {
+ "name": "agent_version",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '@', 2)\n ELSE NULL\n END",
+ "type": "stored"
+ }
+ },
+ "ancestor_run_ids": {
+ "name": "ancestor_run_ids",
+ "type": "text[]",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "root_run_id": {
+ "name": "root_run_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END",
+ "type": "stored"
+ }
+ },
+ "parent_run_id": {
+ "name": "parent_run_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END",
+ "type": "stored"
+ }
+ },
+ "depth": {
+ "name": "depth",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)",
+ "type": "stored"
+ }
+ },
+ "duration_ms": {
+ "name": "duration_ms",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+ "type": "stored"
+ }
+ },
+ "total_steps": {
+ "name": "total_steps",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "default": 0
+ },
+ "direct_credits": {
+ "name": "direct_credits",
+ "type": "numeric(10, 6)",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "'0'"
+ },
+ "total_credits": {
+ "name": "total_credits",
+ "type": "numeric(10, 6)",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "'0'"
+ },
+ "status": {
+ "name": "status",
+ "type": "agent_run_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'running'"
+ },
+ "error_message": {
+ "name": "error_message",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "completed_at": {
+ "name": "completed_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "idx_agent_run_user_id": {
+ "name": "idx_agent_run_user_id",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_parent": {
+ "name": "idx_agent_run_parent",
+ "columns": [
+ {
+ "expression": "parent_run_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_root": {
+ "name": "idx_agent_run_root",
+ "columns": [
+ {
+ "expression": "root_run_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_agent_id": {
+ "name": "idx_agent_run_agent_id",
+ "columns": [
+ {
+ "expression": "agent_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_publisher": {
+ "name": "idx_agent_run_publisher",
+ "columns": [
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_status": {
+ "name": "idx_agent_run_status",
+ "columns": [
+ {
+ "expression": "status",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'running'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_ancestors_gin": {
+ "name": "idx_agent_run_ancestors_gin",
+ "columns": [
+ {
+ "expression": "ancestor_run_ids",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "gin",
+ "with": {}
+ },
+ "idx_agent_run_completed_publisher_agent": {
+ "name": "idx_agent_run_completed_publisher_agent",
+ "columns": [
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "agent_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'completed'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_completed_recent": {
+ "name": "idx_agent_run_completed_recent",
+ "columns": [
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "agent_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'completed'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_completed_version": {
+ "name": "idx_agent_run_completed_version",
+ "columns": [
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "agent_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "agent_version",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'completed'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_completed_user": {
+ "name": "idx_agent_run_completed_user",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'completed'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "agent_run_user_id_user_id_fk": {
+ "name": "agent_run_user_id_user_id_fk",
+ "tableFrom": "agent_run",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.agent_step": {
+ "name": "agent_step",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "agent_run_id": {
+ "name": "agent_run_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "step_number": {
+ "name": "step_number",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "duration_ms": {
+ "name": "duration_ms",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+ "type": "stored"
+ }
+ },
+ "credits": {
+ "name": "credits",
+ "type": "numeric(10, 6)",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'0'"
+ },
+ "child_run_ids": {
+ "name": "child_run_ids",
+ "type": "text[]",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "spawned_count": {
+ "name": "spawned_count",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "array_length(child_run_ids, 1)",
+ "type": "stored"
+ }
+ },
+ "message_id": {
+ "name": "message_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "status": {
+ "name": "status",
+ "type": "agent_step_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'completed'"
+ },
+ "error_message": {
+ "name": "error_message",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "completed_at": {
+ "name": "completed_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "unique_step_number_per_run": {
+ "name": "unique_step_number_per_run",
+ "columns": [
+ {
+ "expression": "agent_run_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "step_number",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_step_run_id": {
+ "name": "idx_agent_step_run_id",
+ "columns": [
+ {
+ "expression": "agent_run_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_step_children_gin": {
+ "name": "idx_agent_step_children_gin",
+ "columns": [
+ {
+ "expression": "child_run_ids",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "gin",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "agent_step_agent_run_id_agent_run_id_fk": {
+ "name": "agent_step_agent_run_id_agent_run_id_fk",
+ "tableFrom": "agent_step",
+ "tableTo": "agent_run",
+ "columnsFrom": [
+ "agent_run_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.credit_ledger": {
+ "name": "credit_ledger",
+ "schema": "",
+ "columns": {
+ "operation_id": {
+ "name": "operation_id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "principal": {
+ "name": "principal",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "balance": {
+ "name": "balance",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "type": {
+ "name": "type",
+ "type": "grant_type",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "priority": {
+ "name": "priority",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "stripe_subscription_id": {
+ "name": "stripe_subscription_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "idx_credit_ledger_active_balance": {
+ "name": "idx_credit_ledger_active_balance",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "balance",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "expires_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "priority",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_credit_ledger_org": {
+ "name": "idx_credit_ledger_org",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_credit_ledger_subscription": {
+ "name": "idx_credit_ledger_subscription",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "type",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "credit_ledger_user_id_user_id_fk": {
+ "name": "credit_ledger_user_id_user_id_fk",
+ "tableFrom": "credit_ledger",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "credit_ledger_org_id_org_id_fk": {
+ "name": "credit_ledger_org_id_org_id_fk",
+ "tableFrom": "credit_ledger",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.encrypted_api_keys": {
+ "name": "encrypted_api_keys",
+ "schema": "",
+ "columns": {
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "type": {
+ "name": "type",
+ "type": "api_key_type",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "api_key": {
+ "name": "api_key",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "encrypted_api_keys_user_id_user_id_fk": {
+ "name": "encrypted_api_keys_user_id_user_id_fk",
+ "tableFrom": "encrypted_api_keys",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "encrypted_api_keys_user_id_type_pk": {
+ "name": "encrypted_api_keys_user_id_type_pk",
+ "columns": [
+ "user_id",
+ "type"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.fingerprint": {
+ "name": "fingerprint",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "sig_hash": {
+ "name": "sig_hash",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.free_session": {
+ "name": "free_session",
+ "schema": "",
+ "columns": {
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "status": {
+ "name": "status",
+ "type": "free_session_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "active_instance_id": {
+ "name": "active_instance_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "model": {
+ "name": "model",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "queued_at": {
+ "name": "queued_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "admitted_at": {
+ "name": "admitted_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "idx_free_session_queue": {
+ "name": "idx_free_session_queue",
+ "columns": [
+ {
+ "expression": "status",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "model",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "queued_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_free_session_expiry": {
+ "name": "idx_free_session_expiry",
+ "columns": [
+ {
+ "expression": "expires_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "free_session_user_id_user_id_fk": {
+ "name": "free_session_user_id_user_id_fk",
+ "tableFrom": "free_session",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.free_session_admit": {
+ "name": "free_session_admit",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "model": {
+ "name": "model",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "admitted_at": {
+ "name": "admitted_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "idx_free_session_admit_user_model_time": {
+ "name": "idx_free_session_admit_user_model_time",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "model",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "admitted_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "free_session_admit_user_id_user_id_fk": {
+ "name": "free_session_admit_user_id_user_id_fk",
+ "tableFrom": "free_session_admit",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.git_eval_results": {
+ "name": "git_eval_results",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "cost_mode": {
+ "name": "cost_mode",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "reasoner_model": {
+ "name": "reasoner_model",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "agent_model": {
+ "name": "agent_model",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "metadata": {
+ "name": "metadata",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "cost": {
+ "name": "cost",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "is_public": {
+ "name": "is_public",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.limit_override": {
+ "name": "limit_override",
+ "schema": "",
+ "columns": {
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "credits_per_block": {
+ "name": "credits_per_block",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "block_duration_hours": {
+ "name": "block_duration_hours",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "weekly_credit_limit": {
+ "name": "weekly_credit_limit",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "limit_override_user_id_user_id_fk": {
+ "name": "limit_override_user_id_user_id_fk",
+ "tableFrom": "limit_override",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.message": {
+ "name": "message",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "finished_at": {
+ "name": "finished_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "client_id": {
+ "name": "client_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "client_request_id": {
+ "name": "client_request_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "model": {
+ "name": "model",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "agent_id": {
+ "name": "agent_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "request": {
+ "name": "request",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "last_message": {
+ "name": "last_message",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "\"message\".\"request\" -> -1",
+ "type": "stored"
+ }
+ },
+ "reasoning_text": {
+ "name": "reasoning_text",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "response": {
+ "name": "response",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "input_tokens": {
+ "name": "input_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "cache_creation_input_tokens": {
+ "name": "cache_creation_input_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "cache_read_input_tokens": {
+ "name": "cache_read_input_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "reasoning_tokens": {
+ "name": "reasoning_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "output_tokens": {
+ "name": "output_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "cost": {
+ "name": "cost",
+ "type": "numeric(100, 20)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "credits": {
+ "name": "credits",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "byok": {
+ "name": "byok",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "latency_ms": {
+ "name": "latency_ms",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "ttft_ms": {
+ "name": "ttft_ms",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "repo_url": {
+ "name": "repo_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "message_user_id_idx": {
+ "name": "message_user_id_idx",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "message_finished_at_user_id_idx": {
+ "name": "message_finished_at_user_id_idx",
+ "columns": [
+ {
+ "expression": "finished_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "message_org_id_idx": {
+ "name": "message_org_id_idx",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "message_org_id_finished_at_idx": {
+ "name": "message_org_id_finished_at_idx",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "finished_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "message_user_id_user_id_fk": {
+ "name": "message_user_id_user_id_fk",
+ "tableFrom": "message",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "message_org_id_org_id_fk": {
+ "name": "message_org_id_org_id_fk",
+ "tableFrom": "message",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org": {
+ "name": "org",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "slug": {
+ "name": "slug",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "owner_id": {
+ "name": "owner_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "stripe_customer_id": {
+ "name": "stripe_customer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "stripe_subscription_id": {
+ "name": "stripe_subscription_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "current_period_start": {
+ "name": "current_period_start",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "current_period_end": {
+ "name": "current_period_end",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "auto_topup_enabled": {
+ "name": "auto_topup_enabled",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "auto_topup_threshold": {
+ "name": "auto_topup_threshold",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "auto_topup_amount": {
+ "name": "auto_topup_amount",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "credit_limit": {
+ "name": "credit_limit",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "billing_alerts": {
+ "name": "billing_alerts",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": true
+ },
+ "usage_alerts": {
+ "name": "usage_alerts",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": true
+ },
+ "weekly_reports": {
+ "name": "weekly_reports",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "org_owner_id_user_id_fk": {
+ "name": "org_owner_id_user_id_fk",
+ "tableFrom": "org",
+ "tableTo": "user",
+ "columnsFrom": [
+ "owner_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "org_slug_unique": {
+ "name": "org_slug_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "slug"
+ ]
+ },
+ "org_stripe_customer_id_unique": {
+ "name": "org_stripe_customer_id_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "stripe_customer_id"
+ ]
+ }
+ },
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org_feature": {
+ "name": "org_feature",
+ "schema": "",
+ "columns": {
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "feature": {
+ "name": "feature",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "config": {
+ "name": "config",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "is_active": {
+ "name": "is_active",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "idx_org_feature_active": {
+ "name": "idx_org_feature_active",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "is_active",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "org_feature_org_id_org_id_fk": {
+ "name": "org_feature_org_id_org_id_fk",
+ "tableFrom": "org_feature",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "org_feature_org_id_feature_pk": {
+ "name": "org_feature_org_id_feature_pk",
+ "columns": [
+ "org_id",
+ "feature"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org_invite": {
+ "name": "org_invite",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "role": {
+ "name": "role",
+ "type": "org_role",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "token": {
+ "name": "token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "invited_by": {
+ "name": "invited_by",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "accepted_at": {
+ "name": "accepted_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "accepted_by": {
+ "name": "accepted_by",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "idx_org_invite_token": {
+ "name": "idx_org_invite_token",
+ "columns": [
+ {
+ "expression": "token",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_org_invite_email": {
+ "name": "idx_org_invite_email",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "email",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_org_invite_expires": {
+ "name": "idx_org_invite_expires",
+ "columns": [
+ {
+ "expression": "expires_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "org_invite_org_id_org_id_fk": {
+ "name": "org_invite_org_id_org_id_fk",
+ "tableFrom": "org_invite",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "org_invite_invited_by_user_id_fk": {
+ "name": "org_invite_invited_by_user_id_fk",
+ "tableFrom": "org_invite",
+ "tableTo": "user",
+ "columnsFrom": [
+ "invited_by"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "org_invite_accepted_by_user_id_fk": {
+ "name": "org_invite_accepted_by_user_id_fk",
+ "tableFrom": "org_invite",
+ "tableTo": "user",
+ "columnsFrom": [
+ "accepted_by"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "org_invite_token_unique": {
+ "name": "org_invite_token_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "token"
+ ]
+ }
+ },
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org_member": {
+ "name": "org_member",
+ "schema": "",
+ "columns": {
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "role": {
+ "name": "role",
+ "type": "org_role",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "joined_at": {
+ "name": "joined_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "org_member_org_id_org_id_fk": {
+ "name": "org_member_org_id_org_id_fk",
+ "tableFrom": "org_member",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "org_member_user_id_user_id_fk": {
+ "name": "org_member_user_id_user_id_fk",
+ "tableFrom": "org_member",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "org_member_org_id_user_id_pk": {
+ "name": "org_member_org_id_user_id_pk",
+ "columns": [
+ "org_id",
+ "user_id"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org_repo": {
+ "name": "org_repo",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "repo_url": {
+ "name": "repo_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "repo_name": {
+ "name": "repo_name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "repo_owner": {
+ "name": "repo_owner",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "approved_by": {
+ "name": "approved_by",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "approved_at": {
+ "name": "approved_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "is_active": {
+ "name": "is_active",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": true
+ }
+ },
+ "indexes": {
+ "idx_org_repo_active": {
+ "name": "idx_org_repo_active",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "is_active",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_org_repo_unique": {
+ "name": "idx_org_repo_unique",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "repo_url",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "org_repo_org_id_org_id_fk": {
+ "name": "org_repo_org_id_org_id_fk",
+ "tableFrom": "org_repo",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "org_repo_approved_by_user_id_fk": {
+ "name": "org_repo_approved_by_user_id_fk",
+ "tableFrom": "org_repo",
+ "tableTo": "user",
+ "columnsFrom": [
+ "approved_by"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.publisher": {
+ "name": "publisher",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "verified": {
+ "name": "verified",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "bio": {
+ "name": "bio",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "avatar_url": {
+ "name": "avatar_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_by": {
+ "name": "created_by",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "publisher_user_id_user_id_fk": {
+ "name": "publisher_user_id_user_id_fk",
+ "tableFrom": "publisher",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "publisher_org_id_org_id_fk": {
+ "name": "publisher_org_id_org_id_fk",
+ "tableFrom": "publisher",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "publisher_created_by_user_id_fk": {
+ "name": "publisher_created_by_user_id_fk",
+ "tableFrom": "publisher",
+ "tableTo": "user",
+ "columnsFrom": [
+ "created_by"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {
+ "publisher_single_owner": {
+ "name": "publisher_single_owner",
+ "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)"
+ }
+ },
+ "isRLSEnabled": false
+ },
+ "public.referral": {
+ "name": "referral",
+ "schema": "",
+ "columns": {
+ "referrer_id": {
+ "name": "referrer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "referred_id": {
+ "name": "referred_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "status": {
+ "name": "status",
+ "type": "referral_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'pending'"
+ },
+ "credits": {
+ "name": "credits",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "is_legacy": {
+ "name": "is_legacy",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "completed_at": {
+ "name": "completed_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "referral_referrer_id_user_id_fk": {
+ "name": "referral_referrer_id_user_id_fk",
+ "tableFrom": "referral",
+ "tableTo": "user",
+ "columnsFrom": [
+ "referrer_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "referral_referred_id_user_id_fk": {
+ "name": "referral_referred_id_user_id_fk",
+ "tableFrom": "referral",
+ "tableTo": "user",
+ "columnsFrom": [
+ "referred_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "referral_referrer_id_referred_id_pk": {
+ "name": "referral_referrer_id_referred_id_pk",
+ "columns": [
+ "referrer_id",
+ "referred_id"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.session": {
+ "name": "session",
+ "schema": "",
+ "columns": {
+ "sessionToken": {
+ "name": "sessionToken",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "expires": {
+ "name": "expires",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "fingerprint_id": {
+ "name": "fingerprint_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "type": {
+ "name": "type",
+ "type": "session_type",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'web'"
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "session_userId_user_id_fk": {
+ "name": "session_userId_user_id_fk",
+ "tableFrom": "session",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "session_fingerprint_id_fingerprint_id_fk": {
+ "name": "session_fingerprint_id_fingerprint_id_fk",
+ "tableFrom": "session",
+ "tableTo": "fingerprint",
+ "columnsFrom": [
+ "fingerprint_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.subscription": {
+ "name": "subscription",
+ "schema": "",
+ "columns": {
+ "stripe_subscription_id": {
+ "name": "stripe_subscription_id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "stripe_customer_id": {
+ "name": "stripe_customer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "stripe_price_id": {
+ "name": "stripe_price_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "tier": {
+ "name": "tier",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "scheduled_tier": {
+ "name": "scheduled_tier",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "status": {
+ "name": "status",
+ "type": "subscription_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'active'"
+ },
+ "billing_period_start": {
+ "name": "billing_period_start",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "billing_period_end": {
+ "name": "billing_period_end",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "cancel_at_period_end": {
+ "name": "cancel_at_period_end",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "canceled_at": {
+ "name": "canceled_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "idx_subscription_customer": {
+ "name": "idx_subscription_customer",
+ "columns": [
+ {
+ "expression": "stripe_customer_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_subscription_user": {
+ "name": "idx_subscription_user",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_subscription_status": {
+ "name": "idx_subscription_status",
+ "columns": [
+ {
+ "expression": "status",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"subscription\".\"status\" = 'active'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "subscription_user_id_user_id_fk": {
+ "name": "subscription_user_id_user_id_fk",
+ "tableFrom": "subscription",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.sync_failure": {
+ "name": "sync_failure",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "provider": {
+ "name": "provider",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "last_attempt_at": {
+ "name": "last_attempt_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "retry_count": {
+ "name": "retry_count",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 1
+ },
+ "last_error": {
+ "name": "last_error",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {
+ "idx_sync_failure_retry": {
+ "name": "idx_sync_failure_retry",
+ "columns": [
+ {
+ "expression": "retry_count",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "last_attempt_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"sync_failure\".\"retry_count\" < 5",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.user": {
+ "name": "user",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "password": {
+ "name": "password",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "emailVerified": {
+ "name": "emailVerified",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "image": {
+ "name": "image",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "stripe_customer_id": {
+ "name": "stripe_customer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "next_quota_reset": {
+ "name": "next_quota_reset",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "now() + INTERVAL '1 month'"
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "referral_code": {
+ "name": "referral_code",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "'ref-' || gen_random_uuid()"
+ },
+ "referral_limit": {
+ "name": "referral_limit",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 5
+ },
+ "discord_id": {
+ "name": "discord_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "handle": {
+ "name": "handle",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "auto_topup_enabled": {
+ "name": "auto_topup_enabled",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "auto_topup_threshold": {
+ "name": "auto_topup_threshold",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "auto_topup_amount": {
+ "name": "auto_topup_amount",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "banned": {
+ "name": "banned",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "fallback_to_a_la_carte": {
+ "name": "fallback_to_a_la_carte",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "user_email_unique": {
+ "name": "user_email_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "email"
+ ]
+ },
+ "user_stripe_customer_id_unique": {
+ "name": "user_stripe_customer_id_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "stripe_customer_id"
+ ]
+ },
+ "user_referral_code_unique": {
+ "name": "user_referral_code_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "referral_code"
+ ]
+ },
+ "user_discord_id_unique": {
+ "name": "user_discord_id_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "discord_id"
+ ]
+ },
+ "user_handle_unique": {
+ "name": "user_handle_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "handle"
+ ]
+ }
+ },
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.verificationToken": {
+ "name": "verificationToken",
+ "schema": "",
+ "columns": {
+ "identifier": {
+ "name": "identifier",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "token": {
+ "name": "token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "expires": {
+ "name": "expires",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {
+ "verificationToken_identifier_token_pk": {
+ "name": "verificationToken_identifier_token_pk",
+ "columns": [
+ "identifier",
+ "token"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ }
+ },
+ "enums": {
+ "public.referral_status": {
+ "name": "referral_status",
+ "schema": "public",
+ "values": [
+ "pending",
+ "completed"
+ ]
+ },
+ "public.agent_run_status": {
+ "name": "agent_run_status",
+ "schema": "public",
+ "values": [
+ "running",
+ "completed",
+ "failed",
+ "cancelled"
+ ]
+ },
+ "public.agent_step_status": {
+ "name": "agent_step_status",
+ "schema": "public",
+ "values": [
+ "running",
+ "completed",
+ "skipped"
+ ]
+ },
+ "public.api_key_type": {
+ "name": "api_key_type",
+ "schema": "public",
+ "values": [
+ "anthropic",
+ "gemini",
+ "openai"
+ ]
+ },
+ "public.free_session_status": {
+ "name": "free_session_status",
+ "schema": "public",
+ "values": [
+ "queued",
+ "active"
+ ]
+ },
+ "public.grant_type": {
+ "name": "grant_type",
+ "schema": "public",
+ "values": [
+ "free",
+ "referral",
+ "referral_legacy",
+ "subscription",
+ "purchase",
+ "admin",
+ "organization",
+ "ad"
+ ]
+ },
+ "public.org_role": {
+ "name": "org_role",
+ "schema": "public",
+ "values": [
+ "owner",
+ "admin",
+ "member"
+ ]
+ },
+ "public.session_type": {
+ "name": "session_type",
+ "schema": "public",
+ "values": [
+ "web",
+ "pat",
+ "cli"
+ ]
+ },
+ "public.subscription_status": {
+ "name": "subscription_status",
+ "schema": "public",
+ "values": [
+ "incomplete",
+ "incomplete_expired",
+ "trialing",
+ "active",
+ "past_due",
+ "canceled",
+ "unpaid",
+ "paused"
+ ]
+ }
+ },
+ "schemas": {},
+ "sequences": {},
+ "roles": {},
+ "policies": {},
+ "views": {},
+ "_meta": {
+ "columns": {},
+ "schemas": {},
+ "tables": {}
+ }
+}
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json
index f67ef37dc..78747c831 100644
--- a/packages/internal/src/db/migrations/meta/_journal.json
+++ b/packages/internal/src/db/migrations/meta/_journal.json
@@ -323,6 +323,13 @@
"when": 1776813242936,
"tag": "0045_mean_sleeper",
"breakpoints": true
+ },
+ {
+ "idx": 46,
+ "version": "7",
+ "when": 1776898844362,
+ "tag": "0046_cloudy_firedrake",
+ "breakpoints": true
}
]
}
\ No newline at end of file
diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts
index b6f170d29..2ead1fc6d 100644
--- a/packages/internal/src/db/schema.ts
+++ b/packages/internal/src/db/schema.ts
@@ -870,3 +870,37 @@ export const freeSession = pgTable(
index('idx_free_session_expiry').on(table.expires_at),
],
)
+
+/**
+ * Audit log of every admission — one row per queued→active transition. Used
+ * to rate-limit heavy users (e.g. no more than 5 GLM sessions per 20h).
+ *
+ * Separate from `free_session` because that table is one-row-per-user (state,
+ * not history); the UPSERT path there would otherwise destroy prior admissions.
+ */
+export const freeSessionAdmit = pgTable(
+ 'free_session_admit',
+ {
+ id: text('id')
+ .primaryKey()
+ .$defaultFn(() => crypto.randomUUID()),
+ user_id: text('user_id')
+ .notNull()
+ .references(() => user.id, { onDelete: 'cascade' }),
+ model: text('model').notNull(),
+ admitted_at: timestamp('admitted_at', {
+ mode: 'date',
+ withTimezone: true,
+ })
+ .notNull()
+ .defaultNow(),
+ },
+ (table) => [
+ // Rate-limit lookup: WHERE user_id=$1 AND model=$2 AND admitted_at > $cutoff
+ index('idx_free_session_admit_user_model_time').on(
+ table.user_id,
+ table.model,
+ table.admitted_at,
+ ),
+ ],
+)
diff --git a/scripts/test-fireworks-cache-intervals.ts b/scripts/test-fireworks-cache-intervals.ts
index 0ed71193f..8d4e86740 100644
--- a/scripts/test-fireworks-cache-intervals.ts
+++ b/scripts/test-fireworks-cache-intervals.ts
@@ -13,7 +13,6 @@
*
* Models:
* glm-5.1 (default) — z-ai/glm-5.1
- * kimi-k2.5 — moonshotai/kimi-k2.5
* minimax — minimax/minimax-m2.5
*
* Flags:
@@ -39,7 +38,7 @@ const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
type ModelConfig = {
id: string
standardModel: string
- deploymentModel: string
+ deploymentModel?: string
inputCostPerToken: number
cachedInputCostPerToken: number
outputCostPerToken: number
@@ -54,14 +53,6 @@ const MODEL_CONFIGS: Record = {
cachedInputCostPerToken: 0.26 / 1_000_000,
outputCostPerToken: 4.4 / 1_000_000,
},
- 'kimi-k2.5': {
- id: 'moonshotai/kimi-k2.5',
- standardModel: 'accounts/fireworks/models/kimi-k2p5',
- deploymentModel: 'accounts/james-65d217/deployments/mx8l5rq2',
- inputCostPerToken: 0.6 / 1_000_000,
- cachedInputCostPerToken: 0.1 / 1_000_000,
- outputCostPerToken: 3.0 / 1_000_000,
- },
minimax: {
id: 'minimax/minimax-m2.5',
standardModel: 'accounts/fireworks/models/minimax-m2p5',
@@ -117,8 +108,12 @@ function parseArgs(): {
const { modelKey, useDeployment: USE_DEPLOYMENT, intervals: INTERVALS_SEC } =
parseArgs()
const MODEL = MODEL_CONFIGS[modelKey]
+if (USE_DEPLOYMENT && !MODEL.deploymentModel) {
+ console.error(`❌ No custom deployment configured for ${MODEL.id}`)
+ process.exit(1)
+}
const FIREWORKS_MODEL = USE_DEPLOYMENT
- ? MODEL.deploymentModel
+ ? MODEL.deploymentModel!
: MODEL.standardModel
const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
index 67028228d..a1e4950f8 100644
--- a/scripts/test-fireworks-long.ts
+++ b/scripts/test-fireworks-long.ts
@@ -12,12 +12,17 @@
* Models:
* glm-5.1 (default) — z-ai/glm-5.1
* minimax — minimax/minimax-m2.5
+ * minimax-m2.7 — minimax/minimax-m2.7
*
* Flags:
* --deployment Use custom deployment instead of serverless (standard API)
* Serverless is the default
+ * Examples:
+ * bun scripts/test-fireworks-long.ts glm-5.1 --deployment
*/
+import { FIREWORKS_DEPLOYMENT_MAP } from '../web/src/llm-api/fireworks-config'
+
export { }
const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
@@ -25,7 +30,7 @@ const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
type ModelConfig = {
id: string // OpenRouter-style ID (for display)
standardModel: string // Fireworks standard API model ID
- deploymentModel: string // Fireworks custom deployment model ID
+ deploymentModel?: string // Fireworks custom deployment model ID
inputCostPerToken: number
cachedInputCostPerToken: number
outputCostPerToken: number
@@ -35,19 +40,11 @@ const MODEL_CONFIGS: Record = {
'glm-5.1': {
id: 'z-ai/glm-5.1',
standardModel: 'accounts/fireworks/models/glm-5p1',
- deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
+ deploymentModel: FIREWORKS_DEPLOYMENT_MAP['z-ai/glm-5.1'],
inputCostPerToken: 1.40 / 1_000_000,
cachedInputCostPerToken: 0.26 / 1_000_000,
outputCostPerToken: 4.40 / 1_000_000,
},
- 'kimi-k2.5': {
- id: 'moonshotai/kimi-k2.5',
- standardModel: 'accounts/fireworks/models/kimi-k2p5',
- deploymentModel: 'accounts/james-65d217/deployments/mx8l5rq2',
- inputCostPerToken: 0.60 / 1_000_000,
- cachedInputCostPerToken: 0.10 / 1_000_000,
- outputCostPerToken: 3.00 / 1_000_000,
- },
minimax: {
id: 'minimax/minimax-m2.5',
standardModel: 'accounts/fireworks/models/minimax-m2p5',
@@ -67,9 +64,16 @@ const MODEL_CONFIGS: Record = {
}
const DEFAULT_MODEL = 'glm-5.1'
+const MODEL_ALIASES: Record = {
+ glm: 'glm-5.1',
+ 'z-ai/glm-5.1': 'glm-5.1',
+ 'minimax/minimax-m2.5': 'minimax',
+ 'minimax/minimax-m2.7': 'minimax-m2.7',
+}
function getModelConfig(modelArg?: string): ModelConfig {
- const key = modelArg ?? DEFAULT_MODEL
+ const rawKey = modelArg ?? DEFAULT_MODEL
+ const key = MODEL_ALIASES[rawKey] ?? rawKey
const config = MODEL_CONFIGS[key]
if (!config) {
console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`)
@@ -83,7 +87,11 @@ const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !=
const MODEL = getModelConfig(modelArg)
// Default to serverless (standard API); use --deployment for custom deployment
-const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel
+if (USE_DEPLOYMENT && !MODEL.deploymentModel) {
+ console.error(`❌ No custom deployment configured for ${MODEL.id}`)
+ process.exit(1)
+}
+const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel! : MODEL.standardModel
const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken
@@ -455,4 +463,4 @@ async function main() {
console.log('Done!')
}
-main()
\ No newline at end of file
+main()
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index 51a3eb46b..1aac8800c 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,6 +1,7 @@
import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
import { NextRequest } from 'next/server'
+import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models'
import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
@@ -528,7 +529,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
method: 'POST',
headers: { Authorization: 'Bearer test-api-key-new-free' },
body: JSON.stringify({
- model: 'z-ai/glm-5.1',
+ model: 'minimax/minimax-m2.7',
stream: false,
codebuff_metadata: {
run_id: 'run-free',
@@ -555,6 +556,76 @@ describe('/api/v1/chat/completions POST endpoint', () => {
expect(response.status).toBe(200)
})
+ it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => {
+ const fetchedBodies: Record[] = []
+ const fetchViaFireworks = mock(
+ async (_url: string | URL | Request, init?: RequestInit) => {
+ fetchedBodies.push(JSON.parse(init?.body as string))
+ return new Response(
+ JSON.stringify({
+ id: 'test-id',
+ model: 'accounts/james-65d217/deployments/mjb4i7ea',
+ choices: [{ message: { content: 'test response' } }],
+ usage: {
+ prompt_tokens: 10,
+ completion_tokens: 20,
+ total_tokens: 30,
+ },
+ }),
+ {
+ status: 200,
+ headers: { 'Content-Type': 'application/json' },
+ },
+ )
+ },
+ ) as unknown as typeof globalThis.fetch
+
+ const req = new NextRequest(
+ 'http://localhost:3000/api/v1/chat/completions',
+ {
+ method: 'POST',
+ headers: { Authorization: 'Bearer test-api-key-new-free' },
+ body: JSON.stringify({
+ model: 'z-ai/glm-5.1',
+ stream: false,
+ codebuff_metadata: {
+ run_id: 'run-free',
+ client_id: 'test-client-id-123',
+ cost_mode: 'free',
+ },
+ }),
+ },
+ )
+
+ const response = await postChatCompletions({
+ req,
+ getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+ logger: mockLogger,
+ trackEvent: mockTrackEvent,
+ getUserUsageData: mockGetUserUsageData,
+ getAgentRunFromId: mockGetAgentRunFromId,
+ fetch: fetchViaFireworks,
+ insertMessageBigquery: mockInsertMessageBigquery,
+ loggerWithContext: mockLoggerWithContext,
+ checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+ })
+
+ const body = await response.json()
+ if (isFreebuffDeploymentHours()) {
+ expect(response.status).toBe(200)
+ expect(fetchedBodies).toHaveLength(1)
+ expect(fetchedBodies[0].model).toBe(
+ 'accounts/james-65d217/deployments/mjb4i7ea',
+ )
+ expect(body.model).toBe('z-ai/glm-5.1')
+ expect(body.provider).toBe('Fireworks')
+ } else {
+ expect(response.status).toBe(503)
+ expect(fetchedBodies).toHaveLength(0)
+ expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+ }
+ })
+
it('skips credit check when in FREE mode even with 0 credits', async () => {
const req = new NextRequest(
'http://localhost:3000/api/v1/chat/completions',
@@ -562,7 +633,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
method: 'POST',
headers: { Authorization: 'Bearer test-api-key-no-credits' },
body: JSON.stringify({
- model: 'z-ai/glm-5.1',
+ model: 'minimax/minimax-m2.7',
stream: false,
codebuff_metadata: {
run_id: 'run-free',
@@ -671,7 +742,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
method: 'POST',
headers: { Authorization: 'Bearer test-api-key-new-free' },
body: JSON.stringify({
- model: 'z-ai/glm-5.1',
+ model: 'minimax/minimax-m2.7',
stream: true,
codebuff_metadata: {
run_id: 'run-123',
@@ -853,7 +924,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
method: 'POST',
headers: { Authorization: 'Bearer test-api-key-123' },
body: JSON.stringify({
- model: 'z-ai/glm-5.1',
+ model: 'minimax/minimax-m2.7',
stream: false,
codebuff_metadata: {
run_id: 'run-free',
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index 657c17f6d..e4675e488 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -3,6 +3,7 @@ import { describe, expect, test } from 'bun:test'
import {
deleteFreebuffSession,
FREEBUFF_INSTANCE_HEADER,
+ FREEBUFF_MODEL_HEADER,
getFreebuffSession,
postFreebuffSession,
} from '../_handlers'
@@ -12,16 +13,17 @@ import type { SessionDeps } from '@/server/free-session/public-api'
import type { InternalSessionRow } from '@/server/free-session/types'
import type { NextRequest } from 'next/server'
-const DEFAULT_MODEL = 'z-ai/glm-5.1'
+const DEFAULT_MODEL = 'minimax/minimax-m2.7'
function makeReq(
apiKey: string | null,
- opts: { instanceId?: string; cfCountry?: string } = {},
+ opts: { instanceId?: string; cfCountry?: string; model?: string } = {},
): NextRequest {
const headers = new Headers()
if (apiKey) headers.set('Authorization', `Bearer ${apiKey}`)
if (opts.instanceId) headers.set(FREEBUFF_INSTANCE_HEADER, opts.instanceId)
if (opts.cfCountry) headers.set('cf-ipcountry', opts.cfCountry)
+ if (opts.model) headers.set(FREEBUFF_MODEL_HEADER, opts.model)
return {
headers,
} as unknown as NextRequest
@@ -44,6 +46,9 @@ function makeSessionDeps(overrides: Partial = {}): SessionDeps & {
getInstantAdmitCapacity: () => 0,
activeCountForModel: async () => 0,
promoteQueuedUser: async () => null,
+ // No admits in handler tests — the rate-limit check reads empty and
+ // every request falls through to the queue.
+ listRecentAdmits: async () => [],
now: () => now,
getSessionRow: async (userId) => rows.get(userId) ?? null,
queueDepthsByModel: async () => {
@@ -153,6 +158,19 @@ describe('POST /api/v1/freebuff/session', () => {
expect(body.status).toBe('queued')
})
+ test('returns model_unavailable for GLM outside deployment hours', async () => {
+ const sessionDeps = makeSessionDeps()
+ const resp = await postFreebuffSession(
+ makeReq('ok', { model: 'z-ai/glm-5.1' }),
+ makeDeps(sessionDeps, 'u1'),
+ )
+ expect(resp.status).toBe(409)
+ const body = await resp.json()
+ expect(body.status).toBe('model_unavailable')
+ expect(body.availableHours).toBe('9am ET-5pm PT')
+ expect(sessionDeps.rows.size).toBe(0)
+ })
+
// Banned bots with valid API keys were POSTing every few seconds and
// inflating queueDepth between the 15s admission-tick sweeps. Rejecting at
// the HTTP layer with 403 (terminal, like country_blocked) keeps them out
diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts
index ec17568a3..9a2d61899 100644
--- a/web/src/app/api/v1/freebuff/session/_handlers.ts
+++ b/web/src/app/api/v1/freebuff/session/_handlers.ts
@@ -138,12 +138,21 @@ export async function postFreebuffSession(
model: requestedModel,
deps: deps.sessionDeps,
})
- // model_locked is a 409 so it's distinguishable from a normal queued/active
- // response on the client. banned is a 403 (terminal, mirrors country_blocked)
- // so older CLIs that don't know the status fall into their `!resp.ok` error
- // path and back off instead of tight-polling on the unrecognized 200 body.
+ // model_locked / model_unavailable are 409 so they're distinguishable
+ // from normal queued/active responses on the client. banned is a 403
+ // (terminal, mirrors country_blocked) so older CLIs that don't know the
+ // status fall into their `!resp.ok` error path and back off instead of
+ // tight-polling on the unrecognized 200 body. rate_limited uses 429 for
+ // the same reason as banned — older CLIs back off, newer CLIs parse the
+ // structured body.
const status =
- state.status === 'model_locked' ? 409 : state.status === 'banned' ? 403 : 200
+ state.status === 'model_locked' || state.status === 'model_unavailable'
+ ? 409
+ : state.status === 'banned'
+ ? 403
+ : state.status === 'rate_limited'
+ ? 429
+ : 200
return NextResponse.json(state, { status })
} catch (error) {
return serverError(deps, 'POST', auth.userId, error)
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index 9ed91fd0a..be17a6e2e 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -3,7 +3,7 @@ import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test'
import {
createFireworksRequestWithFallback,
DEPLOYMENT_COOLDOWN_MS,
- FireworksError,
+ isDeploymentHours,
isDeploymentCoolingDown,
markDeploymentScalingUp,
resetDeploymentCooldown,
@@ -13,6 +13,11 @@ import type { Logger } from '@codebuff/common/types/contracts/logger'
const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1'
const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea'
+const IN_DEPLOYMENT_HOURS = new Date('2026-04-17T16:00:00Z') // Friday, 12pm ET / 9am PT
+const BEFORE_DEPLOYMENT_HOURS = new Date('2026-04-17T12:59:00Z') // Friday, 8:59am ET
+const AFTER_DEPLOYMENT_HOURS = new Date('2026-04-18T00:00:00Z') // Friday, 5pm PT
+const WEEKDAY_AFTER_DEPLOYMENT_HOURS = new Date('2026-04-21T00:01:00Z') // Monday, 5:01pm PT
+const WEEKEND_DEPLOYMENT_HOURS = new Date('2026-04-18T16:00:00Z') // Saturday
function createMockLogger(): Logger {
return {
@@ -23,18 +28,20 @@ function createMockLogger(): Logger {
}
}
-// Helper: create a Date at a specific ET hour using a known EDT date (June 2025, UTC-4)
-function dateAtEtHour(hour: number): Date {
- // June 15, 2025 is EDT (UTC-4), so ET hour H = UTC hour H+4
- const utcHour = hour + 4
- if (utcHour < 24) {
- return new Date(`2025-06-15T${String(utcHour).padStart(2, '0')}:30:00Z`)
- }
- // Wraps to next day
- return new Date(`2025-06-16T${String(utcHour - 24).padStart(2, '0')}:30:00Z`)
-}
-
describe('Fireworks deployment routing', () => {
+ describe('deployment hours', () => {
+ it('is active from 9am ET until before 5pm PT on weekdays', () => {
+ expect(isDeploymentHours(BEFORE_DEPLOYMENT_HOURS)).toBe(false)
+ expect(isDeploymentHours(IN_DEPLOYMENT_HOURS)).toBe(true)
+ expect(isDeploymentHours(AFTER_DEPLOYMENT_HOURS)).toBe(false)
+ expect(isDeploymentHours(WEEKDAY_AFTER_DEPLOYMENT_HOURS)).toBe(false)
+ })
+
+ it('is inactive on weekends', () => {
+ expect(isDeploymentHours(WEEKEND_DEPLOYMENT_HOURS)).toBe(false)
+ })
+ })
+
describe('deployment cooldown', () => {
beforeEach(() => {
resetDeploymentCooldown()
@@ -81,27 +88,9 @@ describe('Fireworks deployment routing', () => {
model: 'z-ai/glm-5.1',
messages: [{ role: 'user' as const, content: 'test' }],
}
-
- function spyDeploymentHours(inHours: boolean) {
- // Control isDeploymentHours by mocking Date.prototype.toLocaleString
- // When called with the ET timezone options, return an hour inside or outside the window
- const original = Date.prototype.toLocaleString
- const spy = {
- restore: () => {
- Date.prototype.toLocaleString = original
- },
- }
- Date.prototype.toLocaleString = function (
- this: Date,
- ...args: Parameters
- ) {
- const options = args[1] as Intl.DateTimeFormatOptions | undefined
- if (options?.timeZone === 'America/New_York' && options?.hour === 'numeric') {
- return inHours ? '14' : '3'
- }
- return original.apply(this, args)
- }
- return spy
+ const liteBody = {
+ ...minimalBody,
+ codebuff_metadata: { cost_mode: 'lite' },
}
it('uses standard API when custom deployment is disabled', async () => {
@@ -128,7 +117,6 @@ describe('Fireworks deployment routing', () => {
})
it('tries custom deployment during deployment hours', async () => {
- const spy = spyDeploymentHours(true)
const fetchCalls: string[] = []
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -137,160 +125,115 @@ describe('Fireworks deployment routing', () => {
return new Response(JSON.stringify({ ok: true }), { status: 200 })
}) as unknown as typeof globalThis.fetch
- try {
- const response = await createFireworksRequestWithFallback({
- body: minimalBody as never,
- originalModel: 'z-ai/glm-5.1',
- fetch: mockFetch,
- logger,
- useCustomDeployment: true,
- sessionId: 'test-user-id',
- })
-
- expect(response.status).toBe(200)
- expect(fetchCalls).toHaveLength(1)
- expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
- } finally {
- spy.restore()
- }
+ const response = await createFireworksRequestWithFallback({
+ body: minimalBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(200)
+ expect(fetchCalls).toHaveLength(1)
+ expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
})
- it('falls back to standard API on 503 DEPLOYMENT_SCALING_UP', async () => {
- const spy = spyDeploymentHours(true)
+ it('returns deployment 503 on DEPLOYMENT_SCALING_UP without serverless fallback', async () => {
const fetchCalls: string[] = []
- let callCount = 0
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
const body = JSON.parse(init?.body as string)
fetchCalls.push(body.model)
- callCount++
-
- if (callCount === 1) {
- return new Response(
- JSON.stringify({
- error: {
- message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
- code: 'DEPLOYMENT_SCALING_UP',
- type: 'error',
- },
- }),
- { status: 503, statusText: 'Service Unavailable' },
- )
- }
-
- return new Response(JSON.stringify({ ok: true }), { status: 200 })
+ return new Response(
+ JSON.stringify({
+ error: {
+ message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
+ code: 'DEPLOYMENT_SCALING_UP',
+ type: 'error',
+ },
+ }),
+ { status: 503, statusText: 'Service Unavailable' },
+ )
}) as unknown as typeof globalThis.fetch
- try {
- const response = await createFireworksRequestWithFallback({
- body: minimalBody as never,
- originalModel: 'z-ai/glm-5.1',
- fetch: mockFetch,
- logger,
- useCustomDeployment: true,
- sessionId: 'test-user-id',
- })
-
- expect(response.status).toBe(200)
- expect(fetchCalls).toHaveLength(2)
- expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
- expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
- // Verify cooldown was activated
- expect(isDeploymentCoolingDown()).toBe(true)
- } finally {
- spy.restore()
- }
+ const response = await createFireworksRequestWithFallback({
+ body: minimalBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(503)
+ expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+ expect(isDeploymentCoolingDown()).toBe(true)
})
- it('falls back to standard API on non-scaling 503 from deployment', async () => {
- const spy = spyDeploymentHours(true)
+ it('returns non-scaling deployment 503 without serverless fallback', async () => {
const fetchCalls: string[] = []
- let callCount = 0
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
const body = JSON.parse(init?.body as string)
fetchCalls.push(body.model)
- callCount++
-
- if (callCount === 1) {
- return new Response(
- JSON.stringify({
- error: {
- message: 'Service temporarily unavailable',
- code: 'SERVICE_UNAVAILABLE',
- type: 'error',
- },
- }),
- { status: 503, statusText: 'Service Unavailable' },
- )
- }
-
- return new Response(JSON.stringify({ ok: true }), { status: 200 })
+ return new Response(
+ JSON.stringify({
+ error: {
+ message: 'Service temporarily unavailable',
+ code: 'SERVICE_UNAVAILABLE',
+ type: 'error',
+ },
+ }),
+ { status: 503, statusText: 'Service Unavailable' },
+ )
}) as unknown as typeof globalThis.fetch
- try {
- const response = await createFireworksRequestWithFallback({
- body: minimalBody as never,
- originalModel: 'z-ai/glm-5.1',
- fetch: mockFetch,
- logger,
- useCustomDeployment: true,
- sessionId: 'test-user-id',
- })
-
- expect(response.status).toBe(200)
- expect(fetchCalls).toHaveLength(2)
- expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
- expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
- // Non-scaling 503 should NOT activate the cooldown
- expect(isDeploymentCoolingDown()).toBe(false)
- } finally {
- spy.restore()
- }
+ const response = await createFireworksRequestWithFallback({
+ body: minimalBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(503)
+ expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+ expect(isDeploymentCoolingDown()).toBe(false)
})
- it('falls back to standard API on 500 Internal Error from deployment', async () => {
- const spy = spyDeploymentHours(true)
+ it('returns 500 Internal Error from deployment without serverless fallback', async () => {
const fetchCalls: string[] = []
- let callCount = 0
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
const body = JSON.parse(init?.body as string)
fetchCalls.push(body.model)
- callCount++
-
- if (callCount === 1) {
- return new Response(
- JSON.stringify({ error: 'Internal error' }),
- { status: 500, statusText: 'Internal Server Error' },
- )
- }
-
- return new Response(JSON.stringify({ ok: true }), { status: 200 })
+ return new Response(
+ JSON.stringify({ error: 'Internal error' }),
+ { status: 500, statusText: 'Internal Server Error' },
+ )
}) as unknown as typeof globalThis.fetch
- try {
- const response = await createFireworksRequestWithFallback({
- body: minimalBody as never,
- originalModel: 'z-ai/glm-5.1',
- fetch: mockFetch,
- logger,
- useCustomDeployment: true,
- sessionId: 'test-user-id',
- })
-
- expect(response.status).toBe(200)
- expect(fetchCalls).toHaveLength(2)
- expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
- expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
- expect(isDeploymentCoolingDown()).toBe(false)
- } finally {
- spy.restore()
- }
+ const response = await createFireworksRequestWithFallback({
+ body: minimalBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(500)
+ expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID])
+ expect(isDeploymentCoolingDown()).toBe(false)
})
- it('skips deployment during cooldown and goes straight to standard API', async () => {
- const spy = spyDeploymentHours(true)
+ it('returns cooldown error without serverless fallback', async () => {
markDeploymentScalingUp()
const fetchCalls: string[] = []
@@ -300,26 +243,21 @@ describe('Fireworks deployment routing', () => {
return new Response(JSON.stringify({ ok: true }), { status: 200 })
}) as unknown as typeof globalThis.fetch
- try {
- const response = await createFireworksRequestWithFallback({
- body: minimalBody as never,
- originalModel: 'z-ai/glm-5.1',
- fetch: mockFetch,
- logger,
- useCustomDeployment: true,
- sessionId: 'test-user-id',
- })
-
- expect(response.status).toBe(200)
- expect(fetchCalls).toHaveLength(1)
- expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID)
- } finally {
- spy.restore()
- }
+ const response = await createFireworksRequestWithFallback({
+ body: minimalBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(503)
+ expect(fetchCalls).toHaveLength(0)
})
it('uses standard API for models without a custom deployment', async () => {
- const spy = spyDeploymentHours(true)
const fetchCalls: string[] = []
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -328,27 +266,66 @@ describe('Fireworks deployment routing', () => {
return new Response(JSON.stringify({ ok: true }), { status: 200 })
}) as unknown as typeof globalThis.fetch
- try {
- const response = await createFireworksRequestWithFallback({
- body: { ...minimalBody, model: 'some-other/model' } as never,
- originalModel: 'some-other/model',
- fetch: mockFetch,
- logger,
- useCustomDeployment: true,
- sessionId: 'test-user-id',
- })
-
- expect(response.status).toBe(200)
- expect(fetchCalls).toHaveLength(1)
- // Model without mapping falls through to the original model
- expect(fetchCalls[0]).toBe('some-other/model')
- } finally {
- spy.restore()
- }
+ const response = await createFireworksRequestWithFallback({
+ body: { ...minimalBody, model: 'some-other/model' } as never,
+ originalModel: 'some-other/model',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: BEFORE_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(200)
+ expect(fetchCalls).toHaveLength(1)
+ // Model without mapping falls through to the original model
+ expect(fetchCalls[0]).toBe('some-other/model')
+ })
+
+ it('returns an availability error for deployment models outside hours', async () => {
+ const mockFetch = mock(async () => {
+ throw new Error('should not fetch outside deployment hours')
+ }) as unknown as typeof globalThis.fetch
+
+ const response = await createFireworksRequestWithFallback({
+ body: minimalBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: BEFORE_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(503)
+ const body = await response.json()
+ expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+ })
+
+ it('falls back to the standard Fireworks API in lite mode outside deployment hours', async () => {
+ const fetchCalls: string[] = []
+
+ const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+ const body = JSON.parse(init?.body as string)
+ fetchCalls.push(body.model)
+ return new Response(JSON.stringify({ ok: true }), { status: 200 })
+ }) as unknown as typeof globalThis.fetch
+
+ const response = await createFireworksRequestWithFallback({
+ body: liteBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: BEFORE_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(200)
+ expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
})
it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => {
- const spy = spyDeploymentHours(true)
const fetchCalls: string[] = []
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -360,23 +337,20 @@ describe('Fireworks deployment routing', () => {
)
}) as unknown as typeof globalThis.fetch
- try {
- const response = await createFireworksRequestWithFallback({
- body: minimalBody as never,
- originalModel: 'z-ai/glm-5.1',
- fetch: mockFetch,
- logger,
- useCustomDeployment: true,
- sessionId: 'test-user-id',
- })
-
- // Non-5xx errors from deployment are returned as-is (caller handles them)
- expect(response.status).toBe(429)
- expect(fetchCalls).toHaveLength(1)
- expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
- } finally {
- spy.restore()
- }
+ const response = await createFireworksRequestWithFallback({
+ body: minimalBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ // Non-5xx errors from deployment are returned as-is (caller handles them)
+ expect(response.status).toBe(429)
+ expect(fetchCalls).toHaveLength(1)
+ expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
})
it('transforms reasoning to reasoning_effort (defaults to medium)', async () => {
@@ -535,17 +509,44 @@ describe('Fireworks deployment routing', () => {
expect(fetchedBodies[0].reasoning_effort).toBe('low')
})
- it('logs when trying deployment and when falling back on 5xx', async () => {
- const spy = spyDeploymentHours(true)
- let callCount = 0
-
+ it('logs when trying deployment and when deployment returns 5xx', async () => {
const mockFetch = mock(async () => {
- callCount++
- if (callCount === 1) {
+ return new Response(
+ JSON.stringify({
+ error: {
+ message: 'Scaling up',
+ code: 'DEPLOYMENT_SCALING_UP',
+ type: 'error',
+ },
+ }),
+ { status: 503, statusText: 'Service Unavailable' },
+ )
+ }) as unknown as typeof globalThis.fetch
+
+ await createFireworksRequestWithFallback({
+ body: minimalBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(logger.info).toHaveBeenCalledTimes(2)
+ })
+
+ it('falls back to the standard Fireworks API in lite mode after deployment scaling 503', async () => {
+ const fetchCalls: string[] = []
+
+ const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+ const body = JSON.parse(init?.body as string)
+ fetchCalls.push(body.model)
+ if (fetchCalls.length === 1) {
return new Response(
JSON.stringify({
error: {
- message: 'Scaling up',
+ message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
code: 'DEPLOYMENT_SCALING_UP',
type: 'error',
},
@@ -556,20 +557,70 @@ describe('Fireworks deployment routing', () => {
return new Response(JSON.stringify({ ok: true }), { status: 200 })
}) as unknown as typeof globalThis.fetch
- try {
- await createFireworksRequestWithFallback({
- body: minimalBody as never,
- originalModel: 'z-ai/glm-5.1',
- fetch: mockFetch,
- logger,
- useCustomDeployment: true,
- sessionId: 'test-user-id',
- })
-
- expect(logger.info).toHaveBeenCalledTimes(2)
- } finally {
- spy.restore()
- }
+ const response = await createFireworksRequestWithFallback({
+ body: liteBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(200)
+ expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID, STANDARD_MODEL_ID])
+ expect(isDeploymentCoolingDown()).toBe(true)
+ })
+
+ it('falls back to the standard Fireworks API in lite mode during deployment cooldown', async () => {
+ markDeploymentScalingUp()
+
+ const fetchCalls: string[] = []
+ const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+ const body = JSON.parse(init?.body as string)
+ fetchCalls.push(body.model)
+ return new Response(JSON.stringify({ ok: true }), { status: 200 })
+ }) as unknown as typeof globalThis.fetch
+
+ const response = await createFireworksRequestWithFallback({
+ body: liteBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(200)
+ expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
+ })
+
+ it('falls back to the standard Fireworks API in lite mode when the deployment request throws', async () => {
+ const fetchCalls: string[] = []
+
+ const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+ const body = JSON.parse(init?.body as string)
+ fetchCalls.push(body.model)
+ if (fetchCalls.length === 1) {
+ throw new Error('socket hang up')
+ }
+ return new Response(JSON.stringify({ ok: true }), { status: 200 })
+ }) as unknown as typeof globalThis.fetch
+
+ const response = await createFireworksRequestWithFallback({
+ body: liteBody as never,
+ originalModel: 'z-ai/glm-5.1',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(200)
+ expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID, STANDARD_MODEL_ID])
+ expect(logger.warn).toHaveBeenCalledTimes(1)
})
})
})
diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts
index fb6d59580..566728250 100644
--- a/web/src/llm-api/fireworks-config.ts
+++ b/web/src/llm-api/fireworks-config.ts
@@ -10,7 +10,6 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217'
export const FIREWORKS_DEPLOYMENT_MAP: Record = {
// 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
- // 'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/mx8l5rq2',
- // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd',
'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
+ // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd',
}
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 6e304638d..a2f4f80a8 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -1,5 +1,9 @@
import { Agent } from 'undici'
+import {
+ FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+ isFreebuffDeploymentHours,
+} from '@codebuff/common/constants/freebuff-models'
import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
import { getErrorObject } from '@codebuff/common/util/error'
import { env } from '@codebuff/internal/env'
@@ -32,15 +36,14 @@ const FIREWORKS_MODEL_MAP: Record = {
'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
'minimax/minimax-m2.7': 'accounts/fireworks/models/minimax-m2p7',
'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
- 'moonshotai/kimi-k2.5': 'accounts/fireworks/models/kimi-k2p5',
}
/** Flag to enable custom Fireworks deployments (set to false to use global API only) */
const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true
-/** Check if current time is within deployment hours (always enabled) */
-export function isDeploymentHours(_now: Date = new Date()): boolean {
- return true
+/** Check if current time is within deployment hours: Mon-Fri, 9am ET to 5pm PT. */
+export function isDeploymentHours(now: Date = new Date()): boolean {
+ return isFreebuffDeploymentHours(now)
}
/**
@@ -93,7 +96,7 @@ function createFireworksRequest(params: {
// Transform OpenRouter-style `reasoning` object into Fireworks' `reasoning_effort`.
// Unlike OpenAI, Fireworks supports reasoning_effort together with function tools
- // (e.g. GLM-4.5/5.1 and Kimi K2 are designed for interleaved reasoning + tool use).
+ // (e.g. GLM-4.5/5.1 are designed for interleaved reasoning + tool use).
if (fireworksBody.reasoning && typeof fireworksBody.reasoning === 'object') {
const reasoning = fireworksBody.reasoning as {
enabled?: boolean
@@ -165,15 +168,10 @@ const FIREWORKS_PRICING_MAP: Record = {
cachedInputCostPerToken: 0.26 / 1_000_000,
outputCostPerToken: 4.40 / 1_000_000,
},
- 'moonshotai/kimi-k2.5': {
- inputCostPerToken: 0.60 / 1_000_000,
- cachedInputCostPerToken: 0.10 / 1_000_000,
- outputCostPerToken: 3.00 / 1_000_000,
- },
}
function getFireworksPricing(model: string): FireworksPricing {
- return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1']
+ return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_PRICING_MAP['z-ai/glm-5.1']
}
function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData {
@@ -708,9 +706,10 @@ async function parseFireworksError(response: Response): Promise
}
/**
- * Tries the custom Fireworks deployment during business hours (10am–8pm ET),
- * falling back to the standard API if the deployment returns 503 DEPLOYMENT_SCALING_UP.
- * Outside deployment hours or during cooldown, goes straight to the standard API.
+ * Uses custom Fireworks deployments only during deployment hours. Deployment
+ * mapped models never fall back to the serverless API outside hours, during
+ * cooldown, or after deployment 5xxs; those states surface as provider errors
+ * so freebuff can offer MiniMax as the always-on option.
*/
export async function createFireworksRequestWithFallback(params: {
body: ChatCompletionRequestBody
@@ -719,45 +718,109 @@ export async function createFireworksRequestWithFallback(params: {
logger: Logger
useCustomDeployment?: boolean
sessionId: string
+ now?: Date
}): Promise {
const { body, originalModel, fetch, logger, sessionId } = params
+ const now = params.now ?? new Date()
const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT
const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel]
- const shouldTryDeployment =
- useCustomDeployment &&
- deploymentModelId &&
- isDeploymentHours() &&
- !isDeploymentCoolingDown()
+ const hasDeployment = useCustomDeployment && Boolean(deploymentModelId)
+ const shouldFallbackToStandardApi = body.codebuff_metadata?.cost_mode === 'lite'
+
+ const createStandardApiRequest = () =>
+ createFireworksRequest({ body, originalModel, fetch, sessionId })
+
+ if (hasDeployment && !isDeploymentHours(now)) {
+ if (shouldFallbackToStandardApi) {
+ logger.info(
+ { model: originalModel },
+ 'Falling back to Fireworks standard API outside deployment hours',
+ )
+ return createStandardApiRequest()
+ }
+ return new Response(
+ JSON.stringify({
+ error: {
+ message: `${originalModel} is only available during ${FREEBUFF_DEPLOYMENT_HOURS_LABEL}. Use minimax/minimax-m2.7 outside those hours.`,
+ code: 'DEPLOYMENT_OUTSIDE_HOURS',
+ type: 'availability_error',
+ },
+ }),
+ { status: 503, statusText: 'Service Unavailable' },
+ )
+ }
+
+ if (hasDeployment && isDeploymentCoolingDown()) {
+ if (shouldFallbackToStandardApi) {
+ logger.info(
+ { model: originalModel },
+ 'Falling back to Fireworks standard API during deployment cooldown',
+ )
+ return createStandardApiRequest()
+ }
+ return new Response(
+ JSON.stringify({
+ error: {
+ message: `${originalModel} deployment is temporarily unavailable. Use minimax/minimax-m2.7 while it recovers.`,
+ code: 'DEPLOYMENT_COOLDOWN',
+ type: 'availability_error',
+ },
+ }),
+ { status: 503, statusText: 'Service Unavailable' },
+ )
+ }
- if (shouldTryDeployment) {
+ if (hasDeployment && deploymentModelId) {
logger.info(
{ model: originalModel, deploymentModel: deploymentModelId },
'Trying Fireworks custom deployment',
)
- const response = await createFireworksRequest({
- body,
- originalModel,
- fetch,
- modelIdOverride: deploymentModelId,
- sessionId,
- })
+ let response: Response
+ try {
+ response = await createFireworksRequest({
+ body,
+ originalModel,
+ fetch,
+ modelIdOverride: deploymentModelId,
+ sessionId,
+ })
+ } catch (error) {
+ if (shouldFallbackToStandardApi) {
+ logger.warn(
+ { model: originalModel, error: getErrorObject(error) },
+ 'Fireworks custom deployment request failed, falling back to standard API',
+ )
+ return createStandardApiRequest()
+ }
+ throw error
+ }
if (response.status >= 500) {
const errorText = await response.text()
logger.info(
{ model: originalModel, status: response.status, errorText: errorText.slice(0, 200) },
- 'Fireworks custom deployment returned 5xx, falling back to standard API',
+ 'Fireworks custom deployment returned 5xx',
)
if (errorText.includes('DEPLOYMENT_SCALING_UP')) {
markDeploymentScalingUp()
}
- // Fall through to standard API request below
- } else {
- return response
+ if (shouldFallbackToStandardApi) {
+ logger.info(
+ { model: originalModel, status: response.status },
+ 'Falling back to Fireworks standard API after deployment 5xx',
+ )
+ return createStandardApiRequest()
+ }
+ return new Response(errorText, {
+ status: response.status,
+ statusText: response.statusText,
+ headers: response.headers,
+ })
}
+ return response
}
- return createFireworksRequest({ body, originalModel, fetch, sessionId })
+ return createStandardApiRequest()
}
function creditsToFakeCost(credits: number): number {
diff --git a/web/src/server/free-session/__tests__/config.test.ts b/web/src/server/free-session/__tests__/config.test.ts
new file mode 100644
index 000000000..93f5fdcf0
--- /dev/null
+++ b/web/src/server/free-session/__tests__/config.test.ts
@@ -0,0 +1,13 @@
+import { describe, expect, test } from 'bun:test'
+
+import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+
+import { getInstantAdmitCapacity } from '../config'
+
+describe('free session config', () => {
+ test('every selectable freebuff model has instant-admit capacity', () => {
+ for (const model of FREEBUFF_MODELS) {
+ expect(getInstantAdmitCapacity(model.id)).toBeGreaterThan(0)
+ }
+ })
+})
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index a824f6d22..8b08d63df 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -13,14 +13,22 @@ import type { InternalSessionRow } from '../types'
const SESSION_LEN = 60 * 60 * 1000
const GRACE_MS = 30 * 60 * 1000
-const DEFAULT_MODEL = 'z-ai/glm-5.1'
+const DEFAULT_MODEL = 'minimax/minimax-m2.7'
+
+interface AdmitRecord {
+ user_id: string
+ model: string
+ admitted_at: Date
+}
function makeDeps(overrides: Partial = {}): SessionDeps & {
rows: Map
+ admits: AdmitRecord[]
_tick: (n: Date) => void
_now: () => Date
} {
const rows = new Map()
+ const admits: AdmitRecord[] = []
let currentNow = new Date('2026-04-17T12:00:00Z')
let instanceCounter = 0
@@ -28,10 +36,12 @@ function makeDeps(overrides: Partial = {}): SessionDeps & {
const deps: SessionDeps & {
rows: Map
+ admits: AdmitRecord[]
_tick: (n: Date) => void
_now: () => Date
} = {
rows,
+ admits,
_tick: (n: Date) => {
currentNow = n
},
@@ -50,6 +60,18 @@ function makeDeps(overrides: Partial = {}): SessionDeps & {
}
return n
},
+ listRecentAdmits: async ({ userId, model, since, limit }) => {
+ return admits
+ .filter(
+ (a) =>
+ a.user_id === userId &&
+ a.model === model &&
+ a.admitted_at.getTime() >= since.getTime(),
+ )
+ .sort((a, b) => a.admitted_at.getTime() - b.admitted_at.getTime())
+ .slice(0, limit)
+ .map((a) => a.admitted_at)
+ },
promoteQueuedUser: async ({ userId, model, sessionLengthMs, now }) => {
const row = rows.get(userId)
if (!row || row.status !== 'queued' || row.model !== model) return null
@@ -57,6 +79,7 @@ function makeDeps(overrides: Partial = {}): SessionDeps & {
row.admitted_at = now
row.expires_at = new Date(now.getTime() + sessionLengthMs)
row.updated_at = now
+ admits.push({ user_id: userId, model, admitted_at: now })
return row
},
now: () => currentNow,
@@ -177,19 +200,34 @@ describe('requestSession', () => {
expect(state.instanceId).toBe('inst-1')
})
+ test('deployment-hours-only model is unavailable outside deployment hours', async () => {
+ const state = await requestSession({
+ userId: 'u1',
+ model: 'z-ai/glm-5.1',
+ deps,
+ })
+ expect(state).toEqual({
+ status: 'model_unavailable',
+ requestedModel: 'z-ai/glm-5.1',
+ availableHours: '9am ET-5pm PT',
+ })
+ expect(deps.rows.size).toBe(0)
+ })
+
test('queued response includes a per-model depth snapshot for the selector', async () => {
- // Seed 2 users in glm + 1 in minimax so the returned map captures both.
+ deps._tick(new Date('2026-04-17T16:00:00Z'))
+ // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both.
await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
deps._tick(new Date(deps._now().getTime() + 1000))
await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
deps._tick(new Date(deps._now().getTime() + 1000))
- await requestSession({ userId: 'u3', model: 'minimax/minimax-m2.7', deps })
+ await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps })
const state = await getSessionState({ userId: 'u1', deps })
if (state.status !== 'queued') throw new Error('unreachable')
expect(state.queueDepthByModel).toEqual({
[DEFAULT_MODEL]: 2,
- 'minimax/minimax-m2.7': 1,
+ 'z-ai/glm-5.1': 1,
})
})
@@ -264,11 +302,12 @@ describe('requestSession', () => {
})
test('instant-admit: per-model capacities are independent', async () => {
- // GLM saturated at 1 active, MiniMax still has room.
+ // MiniMax saturated at 1 active, GLM still has room.
const admitDeps = makeDeps({
getInstantAdmitCapacity: (model) =>
model === DEFAULT_MODEL ? 1 : 10,
})
+ admitDeps._tick(new Date('2026-04-17T16:00:00Z'))
await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps: admitDeps })
const s2 = await requestSession({
userId: 'u2',
@@ -277,12 +316,264 @@ describe('requestSession', () => {
})
const s3 = await requestSession({
userId: 'u3',
- model: 'minimax/minimax-m2.7',
+ model: 'z-ai/glm-5.1',
deps: admitDeps,
})
expect(s2.status).toBe('queued')
expect(s3.status).toBe('active')
})
+
+ // Per-user rate limit (5 GLM admissions per 20h) — the wire limit is
+ // hard-coded in public-api.ts, so tests seed the fake admit log directly
+ // rather than configuring it. GLM also has deployment-hours gating, so
+ // these tests bump `now` into the open window (12pm ET on a weekday)
+ // before issuing the request.
+ const GLM_MODEL = 'z-ai/glm-5.1'
+ const GLM_LIMIT = 5
+ const GLM_WINDOW_HOURS = 20
+ const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
+
+ test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => {
+ deps._tick(GLM_OPEN_TIME)
+ // Seed 5 admits inside the 20h window, spaced so we can verify retryAfter
+ // points at the oldest one sliding off.
+ const now = deps._now()
+ // Oldest: 19h ago (still in window). Next 4: 1h, 2h, 3h, 4h ago.
+ const ages = [19, 4, 3, 2, 1]
+ for (const hoursAgo of ages) {
+ deps.admits.push({
+ user_id: 'u1',
+ model: GLM_MODEL,
+ admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
+ })
+ }
+
+ const state = await requestSession({
+ userId: 'u1',
+ model: GLM_MODEL,
+ deps,
+ })
+ expect(state.status).toBe('rate_limited')
+ if (state.status !== 'rate_limited') throw new Error('unreachable')
+ expect(state.model).toBe(GLM_MODEL)
+ expect(state.limit).toBe(GLM_LIMIT)
+ expect(state.windowHours).toBe(GLM_WINDOW_HOURS)
+ expect(state.recentCount).toBe(GLM_LIMIT)
+ // Oldest admit is 19h ago; slot opens when it hits 20h, i.e. in 1h.
+ expect(state.retryAfterMs).toBe(60 * 60 * 1000)
+ // Blocked before any row is written — the user doesn't take a queue slot.
+ expect(deps.rows.has('u1')).toBe(false)
+ })
+
+ test('rate_limited: admits outside the 20h window do not count', async () => {
+ deps._tick(GLM_OPEN_TIME)
+ // 5 admits, each just over 20h old → all fall off the window.
+ const now = deps._now()
+ for (let i = 0; i < 5; i++) {
+ deps.admits.push({
+ user_id: 'u1',
+ model: GLM_MODEL,
+ admitted_at: new Date(
+ now.getTime() - (GLM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i),
+ ),
+ })
+ }
+ const state = await requestSession({
+ userId: 'u1',
+ model: GLM_MODEL,
+ deps,
+ })
+ expect(state.status).toBe('queued')
+ if (state.status !== 'queued') throw new Error('unreachable')
+ expect(state.rateLimit?.recentCount).toBe(0)
+ })
+
+ test('rate_limited: Minimax is unlimited even with many recent admits', async () => {
+ const now = deps._now()
+ for (let i = 0; i < 20; i++) {
+ deps.admits.push({
+ user_id: 'u1',
+ model: DEFAULT_MODEL,
+ admitted_at: new Date(now.getTime() - i * 60_000),
+ })
+ }
+ const state = await requestSession({
+ userId: 'u1',
+ model: DEFAULT_MODEL,
+ deps,
+ })
+ expect(state.status).toBe('queued')
+ if (state.status !== 'queued') throw new Error('unreachable')
+ // No rate-limit info for unrated models — the CLI skips the quota line.
+ expect(state.rateLimit).toBeUndefined()
+ })
+
+ test('queued GLM response carries the current admit count', async () => {
+ deps._tick(GLM_OPEN_TIME)
+ const now = deps._now()
+ // 2 admits in the window — under the limit so the user still queues.
+ deps.admits.push({
+ user_id: 'u1',
+ model: GLM_MODEL,
+ admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
+ })
+ deps.admits.push({
+ user_id: 'u1',
+ model: GLM_MODEL,
+ admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
+ })
+ const state = await requestSession({
+ userId: 'u1',
+ model: GLM_MODEL,
+ deps,
+ })
+ if (state.status !== 'queued') throw new Error('unreachable')
+ expect(state.rateLimit).toEqual({
+ model: GLM_MODEL,
+ limit: GLM_LIMIT,
+ windowHours: GLM_WINDOW_HOURS,
+ recentCount: 2,
+ })
+ })
+
+ test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => {
+ // Reclaim path: user has an active+unexpired GLM session and restarts
+ // the CLI. POST must rotate their instance id (takeover) and NOT reject
+ // with rate_limited — otherwise they'd be stranded with a live session
+ // they can't reconnect to. The 5th admission is already in the log, so
+ // this also exercises "at the cap" rather than "over the cap".
+ deps._tick(GLM_OPEN_TIME)
+ const now = deps._now()
+ // Seed 5 prior admits (the cap), with the latest one matching the
+ // active row we're about to install.
+ const ages = [19, 4, 3, 2, 0]
+ for (const hoursAgo of ages) {
+ deps.admits.push({
+ user_id: 'u1',
+ model: GLM_MODEL,
+ admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
+ })
+ }
+ // Install the active row directly (skipping the normal request path so
+ // we don't have to unwind the rate-limit gate to set up the fixture).
+ const admittedAt = new Date(now.getTime() - 30 * 60 * 1000)
+ deps.rows.set('u1', {
+ user_id: 'u1',
+ status: 'active',
+ active_instance_id: 'inst-pre',
+ model: GLM_MODEL,
+ queued_at: admittedAt,
+ admitted_at: admittedAt,
+ expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
+ created_at: admittedAt,
+ updated_at: admittedAt,
+ })
+
+ const state = await requestSession({
+ userId: 'u1',
+ model: GLM_MODEL,
+ deps,
+ })
+ expect(state.status).toBe('active')
+ if (state.status !== 'active') throw new Error('unreachable')
+ // Instance id rotated; quota snapshot still reflects the full window.
+ expect(state.instanceId).not.toBe('inst-pre')
+ expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+ })
+
+ test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => {
+ // Same reclaim exception for queued rows: if a user has already queued
+ // (say they slipped in just before their 5th admit landed), a subsequent
+ // POST from the same CLI must preserve their queue position instead of
+ // flipping to rate_limited.
+ deps._tick(GLM_OPEN_TIME)
+ const now = deps._now()
+ for (let i = 0; i < GLM_LIMIT; i++) {
+ deps.admits.push({
+ user_id: 'u1',
+ model: GLM_MODEL,
+ admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
+ })
+ }
+ const queuedAt = new Date(now.getTime() - 5 * 60 * 1000)
+ deps.rows.set('u1', {
+ user_id: 'u1',
+ status: 'queued',
+ active_instance_id: 'inst-pre',
+ model: GLM_MODEL,
+ queued_at: queuedAt,
+ admitted_at: null,
+ expires_at: null,
+ created_at: queuedAt,
+ updated_at: queuedAt,
+ })
+
+ const state = await requestSession({
+ userId: 'u1',
+ model: GLM_MODEL,
+ deps,
+ })
+ expect(state.status).toBe('queued')
+ if (state.status !== 'queued') throw new Error('unreachable')
+ // Same position (1) since we preserved queued_at and nobody else is
+ // ahead; the instance id rotated so any prior CLI is superseded.
+ expect(state.instanceId).not.toBe('inst-pre')
+ expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+ })
+
+ test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => {
+ // The stored row's expires_at is in the past, so it doesn't represent
+ // an in-flight session. This POST is effectively a fresh request and
+ // must be blocked by the quota.
+ deps._tick(GLM_OPEN_TIME)
+ const now = deps._now()
+ const ages = [19, 4, 3, 2, 1]
+ for (const hoursAgo of ages) {
+ deps.admits.push({
+ user_id: 'u1',
+ model: GLM_MODEL,
+ admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
+ })
+ }
+ const admittedAt = new Date(now.getTime() - 2 * SESSION_LEN)
+ deps.rows.set('u1', {
+ user_id: 'u1',
+ status: 'active',
+ active_instance_id: 'inst-pre',
+ model: GLM_MODEL,
+ queued_at: admittedAt,
+ admitted_at: admittedAt,
+ expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
+ created_at: admittedAt,
+ updated_at: admittedAt,
+ })
+ const state = await requestSession({
+ userId: 'u1',
+ model: GLM_MODEL,
+ deps,
+ })
+ expect(state.status).toBe('rate_limited')
+ })
+
+ test('instant-admit bumps the quota count for the freshly-written admit row', async () => {
+ const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 })
+ admitDeps._tick(GLM_OPEN_TIME)
+ // 1 existing admit in the window; this new call should instant-admit and
+ // write a second row, so the response's recentCount reflects 2.
+ const now = admitDeps._now()
+ admitDeps.admits.push({
+ user_id: 'u1',
+ model: GLM_MODEL,
+ admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
+ })
+ const state = await requestSession({
+ userId: 'u1',
+ model: GLM_MODEL,
+ deps: admitDeps,
+ })
+ if (state.status !== 'active') throw new Error('unreachable')
+ expect(state.rateLimit?.recentCount).toBe(2)
+ })
})
describe('getSessionState', () => {
@@ -341,6 +632,39 @@ describe('getSessionState', () => {
expect(state).toEqual({ status: 'superseded' })
})
+ test('getSessionState surfaces rateLimit on queued/active polls', async () => {
+ // Regression: the POST response attached rateLimit, but GET polls did
+ // not — so the "Sessions N/M used" line flashed once then disappeared on
+ // the next 5s poll. GET must attach the same quota snapshot. Rate
+ // limits only apply to GLM, so this test uses GLM explicitly (inside
+ // deployment hours) rather than the Minimax DEFAULT_MODEL.
+ deps._tick(new Date('2026-04-17T16:00:00Z'))
+ const now = deps._now()
+ deps.admits.push({
+ user_id: 'u1',
+ model: 'z-ai/glm-5.1',
+ admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
+ })
+ await requestSession({ userId: 'u1', model: 'z-ai/glm-5.1', deps })
+ const row = deps.rows.get('u1')!
+ row.status = 'active'
+ row.admitted_at = now
+ row.expires_at = new Date(now.getTime() + SESSION_LEN)
+
+ const state = await getSessionState({
+ userId: 'u1',
+ claimedInstanceId: row.active_instance_id,
+ deps,
+ })
+ if (state.status !== 'active') throw new Error('unreachable')
+ expect(state.rateLimit).toEqual({
+ model: 'z-ai/glm-5.1',
+ limit: 5,
+ windowHours: 20,
+ recentCount: 1,
+ })
+ })
+
test('omitted claimedInstanceId on active session returns active (read-only)', async () => {
// Polling without an id (e.g. very first GET before POST has resolved)
// must not be classified as superseded — only an explicit mismatch is.
diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts
index 3f3c051d2..9f0b74c9f 100644
--- a/web/src/server/free-session/admission.ts
+++ b/web/src/server/free-session/admission.ts
@@ -1,4 +1,7 @@
-import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+import {
+ FREEBUFF_MODELS,
+ isFreebuffModelAvailable,
+} from '@codebuff/common/constants/freebuff-models'
import {
ADMISSION_TICK_MS,
@@ -111,7 +114,10 @@ export async function runAdmissionTick(
// advisory locks and a single update each.
const perModel = await Promise.all(
models.map(async (model) => {
- const health = fleet[model] ?? 'healthy'
+ const isRegisteredModel = FREEBUFF_MODELS.some((m) => m.id === model)
+ const health = !isRegisteredModel || isFreebuffModelAvailable(model, now)
+ ? fleet[model] ?? 'healthy'
+ : 'unhealthy'
const { admitted, skipped } = await deps.admitFromQueue({
model,
sessionLengthMs: deps.sessionLengthMs,
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 450540443..02c5c05c9 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -1,4 +1,6 @@
import {
+ FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+ isFreebuffModelAvailable,
isFreebuffModelId as isSelectableFreebuffModel,
resolveFreebuffModel,
} from '@codebuff/common/constants/freebuff-models'
@@ -16,15 +18,65 @@ import {
FreeSessionModelLockedError,
getSessionRow,
joinOrTakeOver,
+ listRecentAdmits,
promoteQueuedUser,
queueDepthsByModel,
queuePositionFor,
} from './store'
import { toSessionStateResponse } from './session-view'
-import type { FreebuffSessionServerResponse } from '@codebuff/common/types/freebuff-session'
+import type {
+ FreebuffSessionRateLimit,
+ FreebuffSessionServerResponse,
+} from '@codebuff/common/types/freebuff-session'
import type { InternalSessionRow, SessionStateResponse } from './types'
+/**
+ * Per-model admission rate limits. Keyed by freebuff model id; a model not
+ * in the map has no rate limit applied. Today only GLM 5.1 is limited
+ * (Minimax is cheap enough to leave unlimited).
+ *
+ * Hard-coded rather than env-driven: the values need to be observable in the
+ * code review, and the CLI already renders the numbers via `rateLimit` on
+ * queued/active responses — changing them is a deliberate, typed edit.
+ */
+const RATE_LIMITS: Record = {
+ 'z-ai/glm-5.1': { limit: 5, windowHours: 20 },
+}
+
+/** Fetch the caller's current quota snapshot for `model`, or undefined if the
+ * model isn't rate-limited. Used by both POST (after admit) and GET polls so
+ * the CLI's "N of M sessions used" line stays live instead of disappearing
+ * after the first poll. Also returns the oldest admit in-window so callers
+ * that need `retryAfterMs` don't have to re-query. */
+async function fetchRateLimitSnapshot(
+ userId: string,
+ model: string,
+ deps: SessionDeps,
+): Promise<
+ { info: FreebuffSessionRateLimit; oldest: Date | null } | undefined
+> {
+ const cfg = RATE_LIMITS[model]
+ if (!cfg) return undefined
+ const now = nowOf(deps)
+ const since = new Date(now.getTime() - cfg.windowHours * 60 * 60 * 1000)
+ const admits = await deps.listRecentAdmits({
+ userId,
+ model,
+ since,
+ limit: cfg.limit,
+ })
+ return {
+ info: {
+ model,
+ limit: cfg.limit,
+ windowHours: cfg.windowHours,
+ recentCount: admits.length,
+ },
+ oldest: admits[0] ?? null,
+ }
+}
+
export interface SessionDeps {
getSessionRow: (userId: string) => Promise
joinOrTakeOver: (params: {
@@ -43,6 +95,15 @@ export interface SessionDeps {
* bound to a given model. Compared against the model's configured
* `instantAdmitCapacity` to decide whether a new joiner skips the queue. */
activeCountForModel: (model: string) => Promise
+ /** Rate-limit helper: oldest-first admission timestamps for (userId, model)
+ * inside the window. The caller uses `rows.length` as the count (capped
+ * at `limit`) and `rows[0]` as the oldest for `retryAfterMs`. */
+ listRecentAdmits: (params: {
+ userId: string
+ model: string
+ since: Date
+ limit: number
+ }) => Promise
/** Instant-admit promotion: flips a specific queued row to active. Returns
* the updated row or null if the row wasn't in a queued state. */
promoteQueuedUser: (params: {
@@ -71,6 +132,7 @@ const defaultDeps: SessionDeps = {
queueDepthsByModel,
queuePositionFor,
activeCountForModel,
+ listRecentAdmits,
promoteQueuedUser,
getInstantAdmitCapacity,
isWaitingRoomEnabled,
@@ -122,6 +184,21 @@ export type RequestSessionResult =
currentModel: string
requestedModel: string
}
+ | {
+ /** User has hit the per-model admission quota in the rolling window.
+ * See `FreebuffSessionServerResponse`'s `rate_limited` variant. */
+ status: 'rate_limited'
+ model: string
+ limit: number
+ windowHours: number
+ recentCount: number
+ retryAfterMs: number
+ }
+ | {
+ status: 'model_unavailable'
+ requestedModel: string
+ availableHours: string
+ }
/**
* Client calls this on CLI startup with the model they want to use.
@@ -152,6 +229,7 @@ export async function requestSession(params: {
}): Promise {
const deps = params.deps ?? defaultDeps
const model = resolveFreebuffModel(params.model)
+ const now = nowOf(deps)
if (params.userBanned) {
return { status: 'banned' }
}
@@ -161,13 +239,60 @@ export async function requestSession(params: {
) {
return { status: 'disabled' }
}
+ if (!isFreebuffModelAvailable(model, now)) {
+ return {
+ status: 'model_unavailable',
+ requestedModel: model,
+ availableHours: FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+ }
+ }
+
+ // Rate-limit check runs before joinOrTakeOver so heavy users never even
+ // create a queued row. Only models listed in RATE_LIMITS are gated; others
+ // (Minimax today) fall through unchanged.
+ //
+ // Takeover/reclaim exception: a user who already holds a queued or
+ // active+unexpired row on this same model is re-anchoring (CLI restart,
+ // same-account tab switch) rather than starting a new session. Admit
+ // counts are written at promotion time, so the quota only needs to gate
+ // fresh admissions — blocking a reclaim here would strand a user with an
+ // active 5th session unable to reconnect after a CLI restart.
+ const existing = await deps.getSessionRow(params.userId)
+ const isReclaim =
+ !!existing &&
+ existing.model === model &&
+ (existing.status === 'queued' ||
+ (existing.status === 'active' &&
+ !!existing.expires_at &&
+ existing.expires_at.getTime() > now.getTime()))
+
+ if (!isReclaim) {
+ const snapshot = await fetchRateLimitSnapshot(params.userId, model, deps)
+ if (snapshot && snapshot.info.recentCount >= snapshot.info.limit) {
+ // Oldest admit's window-anniversary is when one slot opens back up.
+ // Clamped at 0 so a clock skew can't surface a negative retry-after.
+ const windowMs = snapshot.info.windowHours * 60 * 60 * 1000
+ const retryAfterMs = Math.max(
+ 0,
+ (snapshot.oldest?.getTime() ?? 0) + windowMs - now.getTime(),
+ )
+ return {
+ status: 'rate_limited',
+ model,
+ limit: snapshot.info.limit,
+ windowHours: snapshot.info.windowHours,
+ recentCount: snapshot.info.recentCount,
+ retryAfterMs,
+ }
+ }
+ }
let row: InternalSessionRow
try {
row = await deps.joinOrTakeOver({
userId: params.userId,
model,
- now: nowOf(deps),
+ now,
})
} catch (err) {
if (err instanceof FreeSessionModelLockedError) {
@@ -199,7 +324,7 @@ export async function requestSession(params: {
userId: params.userId,
model,
sessionLengthMs: deps.sessionLengthMs,
- now: nowOf(deps),
+ now,
})
if (promoted) row = promoted
}
@@ -212,7 +337,21 @@ export async function requestSession(params: {
`joinOrTakeOver returned a row that maps to no view (user=${params.userId})`,
)
}
- return view
+ return attachRateLimit(params.userId, view, deps)
+}
+
+/** Thread the current quota snapshot onto queued/active views so the CLI can
+ * render "N of M sessions used". Other statuses pass through unchanged.
+ * Called on both POST and GET so the line stays live across polls. */
+async function attachRateLimit(
+ userId: string,
+ view: SessionStateResponse,
+ deps: SessionDeps,
+): Promise {
+ if (view.status !== 'queued' && view.status !== 'active') return view
+ const snapshot = await fetchRateLimitSnapshot(userId, view.model, deps)
+ if (!snapshot) return view
+ return { ...view, rateLimit: snapshot.info }
}
/**
@@ -267,7 +406,7 @@ export async function getSessionState(params: {
const view = await viewForRow(params.userId, deps, row)
if (!view) return noneResponse()
- return view
+ return attachRateLimit(params.userId, view, deps)
}
export async function endUserSession(params: {
diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts
index b3bd2bc48..e84331b69 100644
--- a/web/src/server/free-session/store.ts
+++ b/web/src/server/free-session/store.ts
@@ -1,7 +1,7 @@
import { db } from '@codebuff/internal/db'
import { coerceBool } from '@codebuff/internal/db/advisory-lock'
import * as schema from '@codebuff/internal/db/schema'
-import { and, asc, count, eq, lt, sql } from 'drizzle-orm'
+import { and, asc, count, eq, gte, lt, sql } from 'drizzle-orm'
import { FREEBUFF_ADMISSION_LOCK_ID } from './config'
@@ -369,6 +369,16 @@ export async function admitFromQueue(params: {
)
.returning()
+ if (admitted.length > 0) {
+ await tx.insert(schema.freeSessionAdmit).values(
+ admitted.map((r) => ({
+ user_id: r.user_id,
+ model: r.model,
+ admitted_at: now,
+ })),
+ )
+ }
+
return { admitted: admitted as InternalSessionRow[], skipped: null }
})
}
@@ -391,23 +401,63 @@ export async function promoteQueuedUser(params: {
}): Promise {
const { userId, model, sessionLengthMs, now } = params
const expiresAt = new Date(now.getTime() + sessionLengthMs)
- const [row] = await db
- .update(schema.freeSession)
- .set({
- status: 'active',
+ return db.transaction(async (tx) => {
+ const [row] = await tx
+ .update(schema.freeSession)
+ .set({
+ status: 'active',
+ admitted_at: now,
+ expires_at: expiresAt,
+ updated_at: now,
+ })
+ .where(
+ and(
+ eq(schema.freeSession.user_id, userId),
+ eq(schema.freeSession.status, 'queued'),
+ eq(schema.freeSession.model, model),
+ ),
+ )
+ .returning()
+ if (!row) return null
+ await tx.insert(schema.freeSessionAdmit).values({
+ user_id: userId,
+ model,
admitted_at: now,
- expires_at: expiresAt,
- updated_at: now,
})
+ return row as InternalSessionRow
+ })
+}
+
+/**
+ * List admissions for `userId` on `model` whose `admitted_at` is within the
+ * window `[since, ∞)`, ordered oldest-first. Caller gets both the count
+ * (array length, capped at `limit`) and the oldest timestamp (`rows[0]`) —
+ * the oldest is needed to compute `retryAfterMs` when the window is full,
+ * so one query covers both the check and the reject path.
+ *
+ * Drives the per-user, per-model rate limit (e.g. at most 5 GLM sessions in
+ * the last 20h) enforced before `joinOrTakeOver`.
+ */
+export async function listRecentAdmits(params: {
+ userId: string
+ model: string
+ since: Date
+ limit: number
+}): Promise {
+ const { userId, model, since, limit } = params
+ const rows = await db
+ .select({ admitted_at: schema.freeSessionAdmit.admitted_at })
+ .from(schema.freeSessionAdmit)
.where(
and(
- eq(schema.freeSession.user_id, userId),
- eq(schema.freeSession.status, 'queued'),
- eq(schema.freeSession.model, model),
+ eq(schema.freeSessionAdmit.user_id, userId),
+ eq(schema.freeSessionAdmit.model, model),
+ gte(schema.freeSessionAdmit.admitted_at, since),
),
)
- .returning()
- return (row as InternalSessionRow | undefined) ?? null
+ .orderBy(asc(schema.freeSessionAdmit.admitted_at))
+ .limit(limit)
+ return rows.map((r) => r.admitted_at)
}
/** Stable 31-bit hash so model-keyed advisory lock ids don't overflow int4. */