diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts index 030857c8d..36d6b75c5 100644 --- a/agents/__tests__/editor.test.ts +++ b/agents/__tests__/editor.test.ts @@ -67,6 +67,11 @@ describe('editor agent', () => { expect(glmEditor.model).toBe('z-ai/glm-5.1') }) + test('creates minimax editor', () => { + const minimaxEditor = createCodeEditor({ model: 'minimax' }) + expect(minimaxEditor.model).toBe('minimax/minimax-m2.7') + }) + test('gpt-5 editor does not include think tags in instructions', () => { const gpt5Editor = createCodeEditor({ model: 'gpt-5' }) expect(gpt5Editor.instructionsPrompt).not.toContain('') @@ -79,6 +84,12 @@ describe('editor agent', () => { expect(glmEditor.instructionsPrompt).not.toContain('') }) + test('minimax editor does not include think tags in instructions', () => { + const minimaxEditor = createCodeEditor({ model: 'minimax' }) + expect(minimaxEditor.instructionsPrompt).not.toContain('') + expect(minimaxEditor.instructionsPrompt).not.toContain('') + }) + test('opus editor includes think tags in instructions', () => { const opusEditor = createCodeEditor({ model: 'opus' }) expect(opusEditor.instructionsPrompt).toContain('') diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index 3d208aa13..c98544d0f 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -4,7 +4,7 @@ import { publisher } from '../constants' import type { AgentDefinition } from '../types/agent-definition' export const createCodeEditor = (options: { - model: 'gpt-5' | 'opus' | 'glm' + model: 'gpt-5' | 'opus' | 'glm' | 'minimax' }): Omit => { const { model } = options return { @@ -12,6 +12,8 @@ export const createCodeEditor = (options: { model: options.model === 'gpt-5' ? 'openai/gpt-5.1' + : options.model === 'minimax' + ? 'minimax/minimax-m2.7' : options.model === 'glm' ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7', @@ -65,7 +67,7 @@ OR for new files or major rewrites: } -${model === 'gpt-5' || model === 'glm' +${model === 'gpt-5' || model === 'glm' || model === 'minimax' ? '' : `Before you start writing your implementation, you should use tags to think about the best way to implement the changes. diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index b28a77c31..3608f3631 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -423,8 +423,6 @@ export type ModelName = // Other open source models | 'moonshotai/kimi-k2' | 'moonshotai/kimi-k2:nitro' - | 'moonshotai/kimi-k2.5' - | 'moonshotai/kimi-k2.5:nitro' | 'z-ai/glm-5' | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' diff --git a/cli/src/app.tsx b/cli/src/app.tsx index 0661d7d3c..cac6e20ec 100644 --- a/cli/src/app.tsx +++ b/cli/src/app.tsx @@ -380,6 +380,7 @@ const AuthedSurface = ({ // 'queued' → waiting our turn // 'country_blocked' → terminal region-gate message // 'banned' → terminal account-banned message + // 'rate_limited' → hit per-model session quota; terminal for this run // // 'ended' deliberately falls through to : the agent may still be // finishing work under the server-side grace period, and the chat surface @@ -390,7 +391,8 @@ const AuthedSurface = ({ session.status === 'queued' || session.status === 'none' || session.status === 'country_blocked' || - session.status === 'banned') + session.status === 'banned' || + session.status === 'rate_limited') ) { return } diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index 6663c7e1e..09727ea6e 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -174,7 +174,11 @@ export const Chat = ({ }) const hasSubscription = subscriptionData?.hasSubscription ?? false - const { adData, recordImpression } = useGravityAd({ enabled: IS_FREEBUFF || !hasSubscription }) + const { adData, recordImpression } = useGravityAd({ + enabled: IS_FREEBUFF || !hasSubscription, + provider: 'gravity', + fallbackProvider: 'carbon', + }) // Set initial mode from CLI flag on mount useEffect(() => { diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx index a33d89540..0850a0bd7 100644 --- a/cli/src/components/freebuff-model-selector.tsx +++ b/cli/src/components/freebuff-model-selector.tsx @@ -3,9 +3,16 @@ import { useKeyboard } from '@opentui/react' import React, { useCallback, useEffect, useMemo, useState } from 'react' import { Button } from './button' -import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' +import { + FALLBACK_FREEBUFF_MODEL_ID, + FREEBUFF_DEPLOYMENT_HOURS_LABEL, + FREEBUFF_GLM_MODEL_ID, + FREEBUFF_MODELS, + isFreebuffModelAvailable, +} from '@codebuff/common/constants/freebuff-models' import { joinFreebuffQueue } from '../hooks/use-freebuff-session' +import { useNow } from '../hooks/use-now' import { useFreebuffModelStore } from '../state/freebuff-model-store' import { useFreebuffSessionStore } from '../state/freebuff-session-store' import { useTerminalDimensions } from '../hooks/use-terminal-dimensions' @@ -13,6 +20,11 @@ import { useTheme } from '../hooks/use-theme' import type { KeyEvent } from '@opentui/core' +const FREEBUFF_MODEL_SELECTOR_MODELS = [ + ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID), + ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID), +] + /** * Dual-purpose model picker: * - Pre-chat landing (session 'none'): user hasn't joined any queue. Picking @@ -33,7 +45,9 @@ export const FreebuffModelSelector: React.FC = () => { const theme = useTheme() const { terminalWidth } = useTerminalDimensions() const selectedModel = useFreebuffModelStore((s) => s.selectedModel) + const setSelectedModel = useFreebuffModelStore((s) => s.setSelectedModel) const session = useFreebuffSessionStore((s) => s.session) + const now = useNow(60_000) const [pending, setPending] = useState(null) const [hoveredId, setHoveredId] = useState(null) // Keyboard cursor — separate from the actually-selected model so that @@ -45,6 +59,20 @@ export const FreebuffModelSelector: React.FC = () => { setFocusedId(selectedModel) }, [selectedModel]) + useEffect(() => { + // Landing-screen safety net: if the in-memory selection becomes + // unavailable (e.g. deployment hours close while the picker is open), + // swap to the always-available fallback so Enter doesn't POST a model + // the server will immediately reject. In-memory only — the user's saved + // preference (e.g. GLM) is preserved for the next launch. + if ( + (session?.status === 'none' || !session) && + !isFreebuffModelAvailable(selectedModel, new Date(now)) + ) { + setSelectedModel(FALLBACK_FREEBUFF_MODEL_ID) + } + }, [now, selectedModel, session, setSelectedModel]) + // Landing ('none'): depths come from the server snapshot, no "self" to // subtract. In-queue ('queued'): for the user's queue, "ahead" is // `position - 1` (themselves don't count); for every other queue, switching @@ -85,18 +113,22 @@ export const FreebuffModelSelector: React.FC = () => { ) // Decide row vs column layout based on whether both buttons actually fit - // side-by-side. Each button's inner text is "● {displayName} · {tagline} {hint}", + // side-by-side. Each button's inner text is + // "● {displayName} · {tagline} · {hours} {hint}", // plus 2 cols of border and 2 cols of padding. Buttons are separated by a // gap of 2. If the total exceeds the terminal width, stack vertically. const stackVertically = useMemo(() => { const BUTTON_CHROME = 4 // 2 border + 2 padding const GAP = 2 - const total = FREEBUFF_MODELS.reduce((sum, model, idx) => { + const total = FREEBUFF_MODEL_SELECTOR_MODELS.reduce((sum, model, idx) => { const inner = 2 /* indicator + space */ + model.displayName.length + 3 /* " · " */ + model.tagline.length + + (model.availability === 'deployment_hours' + ? 3 + FREEBUFF_DEPLOYMENT_HOURS_LABEL.length + : 0) + 2 /* " " */ + hintWidth return sum + inner + BUTTON_CHROME + (idx > 0 ? GAP : 0) @@ -115,10 +147,11 @@ export const FreebuffModelSelector: React.FC = () => { (modelId: string) => { if (pending) return if (modelId === committedModelId) return + if (!isFreebuffModelAvailable(modelId, new Date(now))) return setPending(modelId) joinFreebuffQueue(modelId).finally(() => setPending(null)) }, - [pending, committedModelId], + [pending, committedModelId, now], ) // Tab / Shift+Tab and arrow keys move the focus highlight only; Enter or @@ -136,25 +169,30 @@ export const FreebuffModelSelector: React.FC = () => { const isCommit = name === 'return' || name === 'enter' || name === 'space' if (!isForward && !isBackward && !isCommit) return if (isCommit) { - if (focusedId !== committedModelId) { + if ( + focusedId !== committedModelId && + isFreebuffModelAvailable(focusedId, new Date(now)) + ) { key.preventDefault?.() pick(focusedId) } return } - const currentIdx = FREEBUFF_MODELS.findIndex((m) => m.id === focusedId) + const currentIdx = FREEBUFF_MODEL_SELECTOR_MODELS.findIndex( + (m) => m.id === focusedId, + ) if (currentIdx === -1) return - const len = FREEBUFF_MODELS.length + const len = FREEBUFF_MODEL_SELECTOR_MODELS.length const nextIdx = isForward ? (currentIdx + 1) % len : (currentIdx - 1 + len) % len - const target = FREEBUFF_MODELS[nextIdx] + const target = FREEBUFF_MODEL_SELECTOR_MODELS[nextIdx] if (target) { key.preventDefault?.() setFocusedId(target.id) } }, - [pending, pick, focusedId, committedModelId], + [pending, pick, focusedId, committedModelId, now], ), ) @@ -173,7 +211,7 @@ export const FreebuffModelSelector: React.FC = () => { alignItems: 'flex-start', }} > - {FREEBUFF_MODELS.map((model) => { + {FREEBUFF_MODEL_SELECTOR_MODELS.map((model) => { // 'Selected' means the dot is filled and the label is bold. On the // landing screen ('none') this tracks the pre-focused pick; on the // queued screen it tracks the model the server has us on. Either @@ -181,15 +219,22 @@ export const FreebuffModelSelector: React.FC = () => { const isSelected = model.id === selectedModel const isHovered = hoveredId === model.id const isFocused = focusedId === model.id && !isSelected + const isAvailable = isFreebuffModelAvailable(model.id, new Date(now)) const indicator = isSelected ? '●' : '○' const indicatorColor = isSelected ? theme.primary : theme.muted - const labelColor = isSelected ? theme.foreground : theme.muted + const labelColor = isSelected && isAvailable ? theme.foreground : theme.muted // Clickable whenever picking would actually do something — i.e. // anything except re-picking the queue we're already in. - const interactable = !pending && model.id !== committedModelId + const interactable = !pending && isAvailable && model.id !== committedModelId const ahead = aheadByModel?.[model.id] const hint = - ahead === undefined ? '' : ahead === 0 ? 'No wait' : `${ahead} ahead` + !isAvailable + ? 'Closed' + : ahead === undefined + ? '' + : ahead === 0 + ? 'No wait' + : `${ahead} ahead` const borderColor = isSelected ? theme.primary @@ -202,7 +247,7 @@ export const FreebuffModelSelector: React.FC = () => { key={model.id} onClick={() => { setFocusedId(model.id) - pick(model.id) + if (isAvailable) pick(model.id) }} onMouseOver={() => interactable && setHoveredId(model.id)} onMouseOut={() => setHoveredId((curr) => (curr === model.id ? null : curr))} @@ -223,6 +268,9 @@ export const FreebuffModelSelector: React.FC = () => { {model.displayName} · {model.tagline} + {model.availability === 'deployment_hours' && ( + · {FREEBUFF_DEPLOYMENT_HOURS_LABEL} + )} {hint.padEnd(hintWidth)} diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index e67823f7a..f2a09022e 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -43,6 +43,18 @@ const formatElapsed = (ms: number): string => { return `${minutes}m ${seconds.toString().padStart(2, '0')}s` } +/** "in ~3h 20m" / "in ~45 min" / "in under a minute". Used on the + * rate-limited screen so users know when they can try again. */ +const formatRetryAfter = (ms: number): string => { + if (!Number.isFinite(ms) || ms <= 0) return 'any moment now' + const minutes = Math.round(ms / 60_000) + if (minutes < 1) return 'under a minute' + if (minutes < 60) return `${minutes} min` + const hours = Math.floor(minutes / 60) + const rem = minutes % 60 + return rem === 0 ? `${hours}h` : `${hours}h ${rem}m` +} + export const WaitingRoomScreen: React.FC = ({ session, error, @@ -72,11 +84,12 @@ export const WaitingRoomScreen: React.FC = ({ // Always enable ads in the waiting room — this is where monetization lives. // forceStart bypasses the "wait for first user message" gate inside the hook, // which would otherwise block ads here since no conversation exists yet. - // Uses Carbon (BuySellAds); in-chat ads still use the Gravity default. + // Try Gravity first, then fall back to Carbon when Gravity doesn't fill. const { adData, recordImpression } = useGravityAd({ enabled: true, forceStart: true, - provider: 'carbon', + provider: 'gravity', + fallbackProvider: 'carbon', }) useFreebuffCtrlCExit() @@ -216,6 +229,18 @@ export const WaitingRoomScreen: React.FC = ({ Elapsed {formatElapsed(elapsedMs)} + {/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only + rendered for rate-limited models so the Minimax queue stays + clutter-free. */} + {session.rateLimit && ( + + Sessions + + {session.rateLimit.recentCount} / {session.rateLimit.limit} + + used in last {session.rateLimit.windowHours}h + + )} )} @@ -253,11 +278,34 @@ export const WaitingRoomScreen: React.FC = ({ ⚠ Account unavailable - This account can't use freebuff. If you think this is a + This account has been suspended and can't use freebuff. If you think this is a mistake, contact support@codebuff.com. Press Ctrl+C to exit. )} + + {/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the + last 20h). Terminal for this run — the user can exit and come + back once the oldest session in the window rolls off. */} + {session?.status === 'rate_limited' && ( + <> + + ⚠ Session limit reached + + + You've used{' '} + + {session.recentCount} of {session.limit} + {' '} + hour-long sessions on {session.model} in the last{' '} + {session.windowHours}h. Try again in{' '} + + {formatRetryAfter(session.retryAfterMs)} + + . Press Ctrl+C to exit. + + + )} diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index 79deea1cf..b7a91eb1e 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -1,4 +1,8 @@ import { env } from '@codebuff/common/env' +import { + FALLBACK_FREEBUFF_MODEL_ID, + resolveFreebuffModel, +} from '@codebuff/common/constants/freebuff-models' import { useEffect } from 'react' import { @@ -9,6 +13,7 @@ import { useFreebuffSessionStore } from '../state/freebuff-session-store' import { getAuthTokenDetails } from '../utils/auth' import { IS_FREEBUFF } from '../utils/constants' import { logger } from '../utils/logger' +import { saveFreebuffModelPreference } from '../utils/settings' import type { FreebuffSessionResponse } from '../types/freebuff-session' @@ -75,14 +80,31 @@ async function callSession( return body } } - // 409 from POST means the user picked a different model than their active - // session is bound to. Surface as a non-throw `model_locked` so the UI can - // show a confirmation prompt (DELETE then re-POST to switch). + // 409 from POST means the selected model cannot be joined right now, either + // because an active session is locked to another model or because a + // Surface model-switch conflicts and temporary model availability closures + // as non-throw states. if (resp.status === 409 && method === 'POST') { const body = (await resp.json().catch(() => null)) as | FreebuffSessionResponse | null - if (body && body.status === 'model_locked') { + if ( + body && + (body.status === 'model_locked' || body.status === 'model_unavailable') + ) { + return body + } + } + // 429 from POST is the per-model session-quota reject (e.g. too many GLM + // sessions in the last 20h). Terminal for the current poll — the CLI shows + // a screen explaining the limit and when the user can try again. The 429 + // status (rather than 200) keeps older CLIs in their error path so they + // back off instead of tight-polling an unrecognized 200 body. + if (resp.status === 429 && method === 'POST') { + const body = (await resp.json().catch(() => null)) as + | FreebuffSessionResponse + | null + if (body && body.status === 'rate_limited') { return body } } @@ -119,6 +141,8 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null { case 'country_blocked': case 'banned': case 'model_locked': + case 'rate_limited': + case 'model_unavailable': return null } } @@ -260,7 +284,13 @@ export function returnToFreebuffLanding( */ export function joinFreebuffQueue(model: string): Promise { if (!IS_FREEBUFF) return Promise.resolve() - useFreebuffModelStore.getState().setSelectedModel(model) + // This is the only explicit user-pick path (called from the picker on + // click / Enter), so persistence belongs here — and ONLY here. Server- + // driven flips (`model_locked`, `model_unavailable`, takeover) go + // through `setSelectedModel` directly, which never writes to disk. + const resolved = resolveFreebuffModel(model) + useFreebuffModelStore.getState().setSelectedModel(resolved) + saveFreebuffModelPreference(resolved) return restartFreebuffSession('rejoin') } @@ -398,6 +428,19 @@ export function useFreebuffSession(): UseFreebuffSessionResult { schedule(0) return } + if (next.status === 'model_unavailable') { + // Server says the requested model isn't available right now (e.g. + // GLM outside deployment hours). Flip to the always-available + // fallback for this run. In-memory only — `setSelectedModel` + // doesn't persist, so the user's saved preference (e.g. GLM) + // is preserved for their next launch during deployment hours. + useFreebuffModelStore + .getState() + .setSelectedModel(FALLBACK_FREEBUFF_MODEL_ID) + nextMethod = 'GET' + schedule(0) + return + } // Startup takeover: the initial probe GET saw we already hold a seat // (from a prior CLI instance). POST now to rotate our instance id so diff --git a/cli/src/hooks/use-gravity-ad.ts b/cli/src/hooks/use-gravity-ad.ts index e52b4bdd8..36a18faae 100644 --- a/cli/src/hooks/use-gravity-ad.ts +++ b/cli/src/hooks/use-gravity-ad.ts @@ -108,12 +108,15 @@ export const useGravityAd = (options?: { /** Skip the "wait for first user message" gate. Used by the freebuff * waiting room, which has no conversation but still needs ads. */ forceStart?: boolean - /** Which ad network to query. Defaults to Gravity. */ + /** Primary ad network to query. Defaults to Gravity. */ provider?: AdProvider + /** Backup ad network to try when the primary returns no fill or errors. */ + fallbackProvider?: AdProvider }): GravityAdState => { const enabled = options?.enabled ?? true const forceStart = options?.forceStart ?? false const provider: AdProvider = options?.provider ?? 'gravity' + const fallbackProvider = options?.fallbackProvider const [ad, setAd] = useState(null) const [adData, setAdData] = useState(null) const [isLoading, setIsLoading] = useState(false) @@ -278,49 +281,63 @@ export const useGravityAd = (options?: { } } - try { - const response = await fetch(`${WEBSITE_URL}/api/v1/ads`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${authToken}`, - }, - body: JSON.stringify({ - provider, - messages: adMessages, - sessionId: useChatStore.getState().chatSessionId, - device: getDeviceInfo(), - // Carbon requires a real browser-ish useragent for targeting/fraud - // detection. Gravity ignores it. We source one centrally so every - // provider that needs it sees the same value. - userAgent: getAdUserAgent(), - }), - }) + const providersToTry = + fallbackProvider && fallbackProvider !== provider + ? [provider, fallbackProvider] + : [provider] - if (!response.ok) { - logger.warn( - { provider, status: response.status, response: await response.json() }, - '[ads] Web API returned error', - ) - return null - } + for (const providerToTry of providersToTry) { + try { + const response = await fetch(`${WEBSITE_URL}/api/v1/ads`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${authToken}`, + }, + body: JSON.stringify({ + provider: providerToTry, + messages: adMessages, + sessionId: useChatStore.getState().chatSessionId, + device: getDeviceInfo(), + // Carbon requires a real browser-ish useragent for targeting/fraud + // detection. Gravity ignores it. We source one centrally so every + // provider that needs it sees the same value. + userAgent: getAdUserAgent(), + }), + }) - const data = await response.json() - const variant = data.variant ?? 'banner' + if (!response.ok) { + logger.warn( + { + provider: providerToTry, + status: response.status, + response: await response.json(), + }, + '[ads] Web API returned error', + ) + continue + } - if (variant === 'choice' && Array.isArray(data.ads) && data.ads.length > 0) { - return { variant: 'choice', ads: data.ads as AdResponse[] } - } + const data = await response.json() + const variant = data.variant ?? 'banner' - if (data.ad) { - return { variant: 'banner', ad: data.ad as AdResponse } - } + if ( + variant === 'choice' && + Array.isArray(data.ads) && + data.ads.length > 0 + ) { + return { variant: 'choice', ads: data.ads as AdResponse[] } + } - return null - } catch (err) { - logger.error({ err }, '[ads] Failed to fetch ad') - return null + if (data.ad) { + return { variant: 'banner', ad: data.ad as AdResponse } + } + } catch (err) { + logger.error({ err, provider: providerToTry }, '[ads] Failed to fetch ad') + } } + + return null } // Update tick function (uses ref to avoid useCallback dependency issues) @@ -413,7 +430,7 @@ export const useGravityAd = (options?: { clearInterval(id) ctrlRef.current.intervalId = null } - }, [shouldStart, shouldHideAds]) + }, [shouldStart, shouldHideAds, provider, fallbackProvider]) // Don't return ad when ads should be hidden const visible = shouldStart && !shouldHideAds diff --git a/cli/src/state/freebuff-model-store.ts b/cli/src/state/freebuff-model-store.ts index 182a38831..c602d8464 100644 --- a/cli/src/state/freebuff-model-store.ts +++ b/cli/src/state/freebuff-model-store.ts @@ -1,19 +1,23 @@ import { DEFAULT_FREEBUFF_MODEL_ID, + resolveAvailableFreebuffModel, resolveFreebuffModel, } from '@codebuff/common/constants/freebuff-models' import { create } from 'zustand' -import { - loadFreebuffModelPreference, - saveFreebuffModelPreference, -} from '../utils/settings' +import { loadFreebuffModelPreference } from '../utils/settings' /** * Holds the user's currently-selected freebuff model. Initialized from the * persisted settings file so freebuff defaults to whatever model the user - * last picked. Writing through `setSelectedModel` also persists to disk so - * the next launch picks it up without an explicit save call. + * last picked. + * + * `setSelectedModel` is in-memory only — it does NOT persist. Persistence + * happens exclusively in `joinFreebuffQueue` (the explicit-pick path), so + * server-driven auto-flips (`model_locked`, `model_unavailable`, takeover) + * can update the in-memory selection without overwriting the user's saved + * preference. The latter previously caused users to get permanently flipped + * to the fallback model after a single auto-fallback. * * Components in the waiting room read this to highlight the current row in * the model picker; the session hook reads it to decide which queue to join. @@ -24,14 +28,11 @@ interface FreebuffModelStore { } export const useFreebuffModelStore = create((set) => ({ - selectedModel: resolveFreebuffModel( + selectedModel: resolveAvailableFreebuffModel( loadFreebuffModelPreference() ?? DEFAULT_FREEBUFF_MODEL_ID, ), - setSelectedModel: (model) => { - const resolved = resolveFreebuffModel(model) - saveFreebuffModelPreference(resolved) - set({ selectedModel: resolved }) - }, + setSelectedModel: (model) => + set({ selectedModel: resolveFreebuffModel(model) }), })) /** Imperative read for non-React callers (the session hook's tick loop and diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts index 59206eb84..6106b3928 100644 --- a/cli/src/utils/local-agent-registry.ts +++ b/cli/src/utils/local-agent-registry.ts @@ -370,7 +370,7 @@ export const loadAgentDefinitions = (): AgentDefinition[] => { } // Override the model of free-mode agents to match the user's pick from the - // freebuff waiting room. Bundled definitions hardcode glm-5.1; we swap in + // freebuff waiting room. Bundled definitions hardcode a free model; we swap in // whatever the user chose so the chat-completions request body carries the // matching model and the server-side session gate doesn't reject it as a // model mismatch. diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index e44c74cc6..308e12df6 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -26,7 +26,10 @@ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const */ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator - 'base2-free': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']), + 'base2-free': new Set([ + 'minimax/minimax-m2.7', + 'z-ai/glm-5.1', + ]), // File exploration agents 'file-picker': new Set(['google/gemini-2.5-flash-lite']), @@ -41,10 +44,16 @@ export const FREE_MODE_AGENT_MODELS: Record> = { 'basher': new Set(['google/gemini-3.1-flash-lite-preview']), // Editor for free mode - 'editor-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']), + 'editor-lite': new Set([ + 'minimax/minimax-m2.7', + 'z-ai/glm-5.1', + ]), // Code reviewer for free mode - 'code-reviewer-lite': new Set(['minimax/minimax-m2.7', 'z-ai/glm-5.1']), + 'code-reviewer-lite': new Set([ + 'minimax/minimax-m2.7', + 'z-ai/glm-5.1', + ]), } /** diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index d71ebd619..2e1ef8d8e 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -13,24 +13,43 @@ export interface FreebuffModelOption { displayName: string /** One-line description shown next to the label. */ tagline: string + /** Availability policy for the selector and server-side admission. */ + availability: 'always' | 'deployment_hours' } +export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT' +export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1' +export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7' + export const FREEBUFF_MODELS = [ { - id: 'z-ai/glm-5.1', - displayName: 'GLM 5.1', - tagline: 'Smartest', - }, - { - id: 'minimax/minimax-m2.7', + id: FREEBUFF_MINIMAX_MODEL_ID, displayName: 'MiniMax M2.7', tagline: 'Fastest', + availability: 'always', + }, + { + id: FREEBUFF_GLM_MODEL_ID, + displayName: 'GLM 5.1', + tagline: 'Smartest', + availability: 'deployment_hours', }, ] as const satisfies readonly FreebuffModelOption[] export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id'] -export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_MODELS[0].id +/** What new freebuff users see selected in the picker. May not be currently + * available (GLM is closed outside deployment hours); callers that need an + * always-available id for resolution / auto-fallbacks should use + * FALLBACK_FREEBUFF_MODEL_ID instead. */ +export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID + +/** Always-available fallback used when the requested model can't be served + * right now (unknown id, deployment hours closed, etc.). Kept distinct from + * DEFAULT_FREEBUFF_MODEL_ID so a new user's "preferred default" can be the + * smartest model without auto-flipping anyone to a closed deployment. */ +export const FALLBACK_FREEBUFF_MODEL_ID: FreebuffModelId = + FREEBUFF_MINIMAX_MODEL_ID export function isFreebuffModelId( id: string | null | undefined, @@ -42,12 +61,58 @@ export function isFreebuffModelId( export function resolveFreebuffModel( id: string | null | undefined, ): FreebuffModelId { - return isFreebuffModelId(id) ? id : DEFAULT_FREEBUFF_MODEL_ID + return isFreebuffModelId(id) ? id : FALLBACK_FREEBUFF_MODEL_ID } export function getFreebuffModel(id: string): FreebuffModelOption { return ( FREEBUFF_MODELS.find((m) => m.id === id) ?? - FREEBUFF_MODELS.find((m) => m.id === DEFAULT_FREEBUFF_MODEL_ID)! + FREEBUFF_MODELS.find((m) => m.id === FALLBACK_FREEBUFF_MODEL_ID)! ) } + +function getZonedParts( + date: Date, + timeZone: string, +): { weekday: string; minutes: number } { + const parts = new Intl.DateTimeFormat('en-US', { + timeZone, + weekday: 'short', + hour: '2-digit', + minute: '2-digit', + hourCycle: 'h23', + }).formatToParts(date) + const value = (type: string) => parts.find((part) => part.type === type)?.value + const hour = Number(value('hour') ?? 0) + const minute = Number(value('minute') ?? 0) + return { + weekday: value('weekday') ?? '', + minutes: hour * 60 + minute, + } +} + +export function isFreebuffDeploymentHours(now: Date = new Date()): boolean { + const eastern = getZonedParts(now, 'America/New_York') + const pacific = getZonedParts(now, 'America/Los_Angeles') + if (eastern.weekday === 'Sat' || eastern.weekday === 'Sun') return false + return eastern.minutes >= 9 * 60 && pacific.minutes < 17 * 60 +} + +export function isFreebuffModelAvailable( + id: string, + now: Date = new Date(), +): boolean { + const model = FREEBUFF_MODELS.find((m) => m.id === id) + if (!model) return false + return model.availability === 'always' || isFreebuffDeploymentHours(now) +} + +export function resolveAvailableFreebuffModel( + id: string | null | undefined, + now: Date = new Date(), +): FreebuffModelId { + const resolved = resolveFreebuffModel(id) + return isFreebuffModelAvailable(resolved, now) + ? resolved + : FALLBACK_FREEBUFF_MODEL_ID +} diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index b28a77c31..3608f3631 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -423,8 +423,6 @@ export type ModelName = // Other open source models | 'moonshotai/kimi-k2' | 'moonshotai/kimi-k2:nitro' - | 'moonshotai/kimi-k2.5' - | 'moonshotai/kimi-k2.5:nitro' | 'z-ai/glm-5' | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts index e42d9f0be..7789c91f2 100644 --- a/common/src/types/freebuff-session.ts +++ b/common/src/types/freebuff-session.ts @@ -5,6 +5,22 @@ * * The CLI uses these shapes directly; there are no client-only states. */ + +/** + * Per-model usage counter surfaced to the CLI so the waiting-room UI can + * render "N of M sessions used" alongside queue/active state. Present when + * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits + * per 20-hour window). `recentCount` is the number of admissions inside + * `windowHours` at the time the response was produced — see also the + * standalone `rate_limited` status for the reject path. + */ +export interface FreebuffSessionRateLimit { + model: string + limit: number + windowHours: number + recentCount: number +} + export type FreebuffSessionServerResponse = | { /** Waiting room is globally off; free-mode requests flow through @@ -38,6 +54,10 @@ export type FreebuffSessionServerResponse = queueDepthByModel: Record estimatedWaitMs: number queuedAt: string + /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent + * for unlimited models or when the status was produced outside the + * rate-limit check path (e.g. pure read via GET). */ + rateLimit?: FreebuffSessionRateLimit } | { status: 'active' @@ -47,6 +67,10 @@ export type FreebuffSessionServerResponse = admittedAt: string expiresAt: string remainingMs: number + /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent + * for unlimited models or when the status was produced outside the + * rate-limit check path (e.g. pure read via GET). */ + rateLimit?: FreebuffSessionRateLimit } | { /** Session is over. While `instanceId` is present we're inside the @@ -92,6 +116,12 @@ export type FreebuffSessionServerResponse = currentModel: string requestedModel: string } + | { + /** Requested model is valid but not selectable right now. */ + status: 'model_unavailable' + requestedModel: string + availableHours: string + } | { /** Account is banned. Returned from every endpoint so banned bots can't * join the queue at all (otherwise they inflate `queueDepth` until the @@ -99,3 +129,24 @@ export type FreebuffSessionServerResponse = * stops polling and shows a banned message. */ status: 'banned' } + | { + /** User has used up their per-model admission quota in the rolling + * window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST + * /session before the user is placed in the queue. `retryAfterMs` is + * the time until the oldest admission inside the window falls off + * and one quota slot opens up — clients should show the user when + * they can try again. Terminal for the CLI's current poll session; + * the user can exit and come back later. */ + status: 'rate_limited' + /** The freebuff model the user tried to join. */ + model: string + /** Max admissions permitted per window (e.g. 5). */ + limit: number + /** Rolling window size in hours (e.g. 20). */ + windowHours: number + /** Admission count inside the window at check time — will be ≥ limit. */ + recentCount: number + /** Milliseconds from now until the oldest admission in the window + * exits and the user regains one quota slot. */ + retryAfterMs: number + } diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index b1384d7b6..353bfb046 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -5,7 +5,7 @@ The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs: 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones. -2. **Gate on per-deployment health** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` admit that tick; a degraded minimax-m2.7 no longer stalls glm-5.1 admissions. +2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available. 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput. Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session. @@ -149,8 +149,8 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r | Constant | Location | Default | Purpose | |---|---|---|---| | `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. | -| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `glm-5.1`, `minimax-m2.7` | Selectable models; each gets its own queue and admission slot. | -| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | glm-5.1 only | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. | +| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `minimax-m2.7`, `glm-5.1` | Selectable models; each gets its own queue and admission slot. | +| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. | | `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. | | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | @@ -180,12 +180,12 @@ Response shapes: { "status": "queued", "instanceId": "e47…", - "model": "z-ai/glm-5.1", + "model": "minimax/minimax-m2.7", "position": 17, // 1-indexed within this model's queue "queueDepth": 43, // size of this model's queue "queueDepthByModel": { // snapshot of every model's queue — powers the - "z-ai/glm-5.1": 43, // "N ahead" hint in the selector. Missing - "minimax/minimax-m2.7": 4 // entries should be treated as 0. + "minimax/minimax-m2.7": 43, // "N ahead" hint in the selector. Missing + "z-ai/glm-5.1": 4 // entries should be treated as 0. }, "estimatedWaitMs": 384000, "queuedAt": "2026-04-17T12:00:00Z" @@ -195,7 +195,7 @@ Response shapes: { "status": "active", "instanceId": "e47…", - "model": "z-ai/glm-5.1", + "model": "minimax/minimax-m2.7", "admittedAt": "2026-04-17T12:00:00Z", "expiresAt": "2026-04-17T13:00:00Z", "remainingMs": 3600000 @@ -219,7 +219,7 @@ Response shapes: // to actually switch. { "status": "model_locked", - "currentModel": "z-ai/glm-5.1", + "currentModel": "minimax/minimax-m2.7", "requestedModel": "minimax/minimax-m2.7" } ``` @@ -285,7 +285,7 @@ waitMs = (position - 1) * 24_000 - Position 1 → 0 (next tick admits you) - Position 2 → 24s, and so on. -`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses (during a per-deployment Fireworks incident only the affected model's queue stalls; healthy models keep draining), so the real wait can be longer or shorter. +`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a GLM Fireworks incident or outside 9am ET-5pm PT, only GLM's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter. ## CLI Integration (frontend-side contract) @@ -324,7 +324,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr | Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | | Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. | | Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. | -| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded minimax-m2.7 doesn't block glm-5.1 admissions. | +| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. | | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy | ## Testing diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 6426fac98..a597e0852 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.46", + "version": "0.0.48", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { diff --git a/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql b/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql new file mode 100644 index 000000000..53a24ec98 --- /dev/null +++ b/packages/internal/src/db/migrations/0046_cloudy_firedrake.sql @@ -0,0 +1,9 @@ +CREATE TABLE "free_session_admit" ( + "id" text PRIMARY KEY NOT NULL, + "user_id" text NOT NULL, + "model" text NOT NULL, + "admitted_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +ALTER TABLE "free_session_admit" ADD CONSTRAINT "free_session_admit_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint +CREATE INDEX "idx_free_session_admit_user_model_time" ON "free_session_admit" USING btree ("user_id","model","admitted_at"); \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/0046_snapshot.json b/packages/internal/src/db/migrations/meta/0046_snapshot.json new file mode 100644 index 000000000..48747dd94 --- /dev/null +++ b/packages/internal/src/db/migrations/meta/0046_snapshot.json @@ -0,0 +1,3307 @@ +{ + "id": "3bf6a16c-2fd6-4c9d-a395-f4ca2c080a3c", + "prevId": "76196ef1-2384-4edd-b832-c9ff8085d809", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.account": { + "name": "account", + "schema": "", + "columns": { + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "providerAccountId": { + "name": "providerAccountId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "refresh_token": { + "name": "refresh_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_token": { + "name": "access_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "token_type": { + "name": "token_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "id_token": { + "name": "id_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "session_state": { + "name": "session_state", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "account_userId_user_id_fk": { + "name": "account_userId_user_id_fk", + "tableFrom": "account", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "account_provider_providerAccountId_pk": { + "name": "account_provider_providerAccountId_pk", + "columns": [ + "provider", + "providerAccountId" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ad_impression": { + "name": "ad_impression", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'gravity'" + }, + "ad_text": { + "name": "ad_text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "cta": { + "name": "cta", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "favicon": { + "name": "favicon", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "click_url": { + "name": "click_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "imp_url": { + "name": "imp_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "extra_pixels": { + "name": "extra_pixels", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "payout": { + "name": "payout", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false + }, + "credits_granted": { + "name": "credits_granted", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "grant_operation_id": { + "name": "grant_operation_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "served_at": { + "name": "served_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "impression_fired_at": { + "name": "impression_fired_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "clicked_at": { + "name": "clicked_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_ad_impression_user": { + "name": "idx_ad_impression_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "served_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ad_impression_imp_url": { + "name": "idx_ad_impression_imp_url", + "columns": [ + { + "expression": "imp_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "ad_impression_user_id_user_id_fk": { + "name": "ad_impression_user_id_user_id_fk", + "tableFrom": "ad_impression", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "ad_impression_imp_url_unique": { + "name": "ad_impression_imp_url_unique", + "nullsNotDistinct": false, + "columns": [ + "imp_url" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_config": { + "name": "agent_config", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "version": { + "name": "version", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "major": { + "name": "major", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)", + "type": "stored" + } + }, + "minor": { + "name": "minor", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)", + "type": "stored" + } + }, + "patch": { + "name": "patch", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)", + "type": "stored" + } + }, + "data": { + "name": "data", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_agent_config_publisher": { + "name": "idx_agent_config_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_config_publisher_id_publisher_id_fk": { + "name": "agent_config_publisher_id_publisher_id_fk", + "tableFrom": "agent_config", + "tableTo": "publisher", + "columnsFrom": [ + "publisher_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "agent_config_publisher_id_id_version_pk": { + "name": "agent_config_publisher_id_id_version_pk", + "columns": [ + "publisher_id", + "id", + "version" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_run": { + "name": "agent_run", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '/', 1)\n ELSE NULL\n END", + "type": "stored" + } + }, + "agent_name": { + "name": "agent_name", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n ELSE agent_id\n END", + "type": "stored" + } + }, + "agent_version": { + "name": "agent_version", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '@', 2)\n ELSE NULL\n END", + "type": "stored" + } + }, + "ancestor_run_ids": { + "name": "ancestor_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "root_run_id": { + "name": "root_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END", + "type": "stored" + } + }, + "parent_run_id": { + "name": "parent_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END", + "type": "stored" + } + }, + "depth": { + "name": "depth", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)", + "type": "stored" + } + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "total_steps": { + "name": "total_steps", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "direct_credits": { + "name": "direct_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "total_credits": { + "name": "total_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "status": { + "name": "status", + "type": "agent_run_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'running'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_agent_run_user_id": { + "name": "idx_agent_run_user_id", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_parent": { + "name": "idx_agent_run_parent", + "columns": [ + { + "expression": "parent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_root": { + "name": "idx_agent_run_root", + "columns": [ + { + "expression": "root_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_agent_id": { + "name": "idx_agent_run_agent_id", + "columns": [ + { + "expression": "agent_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_publisher": { + "name": "idx_agent_run_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_status": { + "name": "idx_agent_run_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'running'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_ancestors_gin": { + "name": "idx_agent_run_ancestors_gin", + "columns": [ + { + "expression": "ancestor_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_agent_run_completed_publisher_agent": { + "name": "idx_agent_run_completed_publisher_agent", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_recent": { + "name": "idx_agent_run_completed_recent", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_version": { + "name": "idx_agent_run_completed_version", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_version", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_user": { + "name": "idx_agent_run_completed_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_run_user_id_user_id_fk": { + "name": "agent_run_user_id_user_id_fk", + "tableFrom": "agent_run", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_step": { + "name": "agent_step", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "agent_run_id": { + "name": "agent_run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "step_number": { + "name": "step_number", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "credits": { + "name": "credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true, + "default": "'0'" + }, + "child_run_ids": { + "name": "child_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "spawned_count": { + "name": "spawned_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "array_length(child_run_ids, 1)", + "type": "stored" + } + }, + "message_id": { + "name": "message_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "agent_step_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'completed'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "unique_step_number_per_run": { + "name": "unique_step_number_per_run", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "step_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_run_id": { + "name": "idx_agent_step_run_id", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_children_gin": { + "name": "idx_agent_step_children_gin", + "columns": [ + { + "expression": "child_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": { + "agent_step_agent_run_id_agent_run_id_fk": { + "name": "agent_step_agent_run_id_agent_run_id_fk", + "tableFrom": "agent_step", + "tableTo": "agent_run", + "columnsFrom": [ + "agent_run_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.credit_ledger": { + "name": "credit_ledger", + "schema": "", + "columns": { + "operation_id": { + "name": "operation_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "principal": { + "name": "principal", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "balance": { + "name": "balance", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "grant_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_credit_ledger_active_balance": { + "name": "idx_credit_ledger_active_balance", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "balance", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "priority", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_org": { + "name": "idx_credit_ledger_org", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_subscription": { + "name": "idx_credit_ledger_subscription", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "credit_ledger_user_id_user_id_fk": { + "name": "credit_ledger_user_id_user_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "credit_ledger_org_id_org_id_fk": { + "name": "credit_ledger_org_id_org_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.encrypted_api_keys": { + "name": "encrypted_api_keys", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "api_key_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "api_key": { + "name": "api_key", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "encrypted_api_keys_user_id_user_id_fk": { + "name": "encrypted_api_keys_user_id_user_id_fk", + "tableFrom": "encrypted_api_keys", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "encrypted_api_keys_user_id_type_pk": { + "name": "encrypted_api_keys_user_id_type_pk", + "columns": [ + "user_id", + "type" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.fingerprint": { + "name": "fingerprint", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "sig_hash": { + "name": "sig_hash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.free_session": { + "name": "free_session", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "status": { + "name": "status", + "type": "free_session_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "active_instance_id": { + "name": "active_instance_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "queued_at": { + "name": "queued_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "admitted_at": { + "name": "admitted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_free_session_queue": { + "name": "idx_free_session_queue", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "model", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "queued_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_free_session_expiry": { + "name": "idx_free_session_expiry", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "free_session_user_id_user_id_fk": { + "name": "free_session_user_id_user_id_fk", + "tableFrom": "free_session", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.free_session_admit": { + "name": "free_session_admit", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "admitted_at": { + "name": "admitted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_free_session_admit_user_model_time": { + "name": "idx_free_session_admit_user_model_time", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "model", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "admitted_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "free_session_admit_user_id_user_id_fk": { + "name": "free_session_admit_user_id_user_id_fk", + "tableFrom": "free_session_admit", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.git_eval_results": { + "name": "git_eval_results", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "cost_mode": { + "name": "cost_mode", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reasoner_model": { + "name": "reasoner_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_model": { + "name": "agent_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "cost": { + "name": "cost", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "is_public": { + "name": "is_public", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.limit_override": { + "name": "limit_override", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "credits_per_block": { + "name": "credits_per_block", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "block_duration_hours": { + "name": "block_duration_hours", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "weekly_credit_limit": { + "name": "weekly_credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "limit_override_user_id_user_id_fk": { + "name": "limit_override_user_id_user_id_fk", + "tableFrom": "limit_override", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.message": { + "name": "message", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "client_id": { + "name": "client_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "client_request_id": { + "name": "client_request_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "request": { + "name": "request", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "last_message": { + "name": "last_message", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "\"message\".\"request\" -> -1", + "type": "stored" + } + }, + "reasoning_text": { + "name": "reasoning_text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "response": { + "name": "response", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "input_tokens": { + "name": "input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "cache_creation_input_tokens": { + "name": "cache_creation_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "cache_read_input_tokens": { + "name": "cache_read_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "reasoning_tokens": { + "name": "reasoning_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "output_tokens": { + "name": "output_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cost": { + "name": "cost", + "type": "numeric(100, 20)", + "primaryKey": false, + "notNull": true + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "byok": { + "name": "byok", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "latency_ms": { + "name": "latency_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "ttft_ms": { + "name": "ttft_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "message_user_id_idx": { + "name": "message_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_finished_at_user_id_idx": { + "name": "message_finished_at_user_id_idx", + "columns": [ + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_idx": { + "name": "message_org_id_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_finished_at_idx": { + "name": "message_org_id_finished_at_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "message_user_id_user_id_fk": { + "name": "message_user_id_user_id_fk", + "tableFrom": "message", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "message_org_id_org_id_fk": { + "name": "message_org_id_org_id_fk", + "tableFrom": "message", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org": { + "name": "org", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "owner_id": { + "name": "owner_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "current_period_start": { + "name": "current_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "current_period_end": { + "name": "current_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "credit_limit": { + "name": "credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "billing_alerts": { + "name": "billing_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "usage_alerts": { + "name": "usage_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "weekly_reports": { + "name": "weekly_reports", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_owner_id_user_id_fk": { + "name": "org_owner_id_user_id_fk", + "tableFrom": "org", + "tableTo": "user", + "columnsFrom": [ + "owner_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_slug_unique": { + "name": "org_slug_unique", + "nullsNotDistinct": false, + "columns": [ + "slug" + ] + }, + "org_stripe_customer_id_unique": { + "name": "org_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_feature": { + "name": "org_feature", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "feature": { + "name": "feature", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "config": { + "name": "config", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_org_feature_active": { + "name": "idx_org_feature_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_feature_org_id_org_id_fk": { + "name": "org_feature_org_id_org_id_fk", + "tableFrom": "org_feature", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_feature_org_id_feature_pk": { + "name": "org_feature_org_id_feature_pk", + "columns": [ + "org_id", + "feature" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_invite": { + "name": "org_invite", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "invited_by": { + "name": "invited_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "accepted_at": { + "name": "accepted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "accepted_by": { + "name": "accepted_by", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_org_invite_token": { + "name": "idx_org_invite_token", + "columns": [ + { + "expression": "token", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_email": { + "name": "idx_org_invite_email", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_expires": { + "name": "idx_org_invite_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_invite_org_id_org_id_fk": { + "name": "org_invite_org_id_org_id_fk", + "tableFrom": "org_invite", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_invite_invited_by_user_id_fk": { + "name": "org_invite_invited_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "invited_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "org_invite_accepted_by_user_id_fk": { + "name": "org_invite_accepted_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "accepted_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_invite_token_unique": { + "name": "org_invite_token_unique", + "nullsNotDistinct": false, + "columns": [ + "token" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_member": { + "name": "org_member", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "joined_at": { + "name": "joined_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_member_org_id_org_id_fk": { + "name": "org_member_org_id_org_id_fk", + "tableFrom": "org_member", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_member_user_id_user_id_fk": { + "name": "org_member_user_id_user_id_fk", + "tableFrom": "org_member", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_member_org_id_user_id_pk": { + "name": "org_member_org_id_user_id_pk", + "columns": [ + "org_id", + "user_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_repo": { + "name": "org_repo", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_name": { + "name": "repo_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_owner": { + "name": "repo_owner", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "approved_by": { + "name": "approved_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "approved_at": { + "name": "approved_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + } + }, + "indexes": { + "idx_org_repo_active": { + "name": "idx_org_repo_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_repo_unique": { + "name": "idx_org_repo_unique", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "repo_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_repo_org_id_org_id_fk": { + "name": "org_repo_org_id_org_id_fk", + "tableFrom": "org_repo", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_repo_approved_by_user_id_fk": { + "name": "org_repo_approved_by_user_id_fk", + "tableFrom": "org_repo", + "tableTo": "user", + "columnsFrom": [ + "approved_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.publisher": { + "name": "publisher", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "verified": { + "name": "verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "bio": { + "name": "bio", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "avatar_url": { + "name": "avatar_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_by": { + "name": "created_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "publisher_user_id_user_id_fk": { + "name": "publisher_user_id_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_org_id_org_id_fk": { + "name": "publisher_org_id_org_id_fk", + "tableFrom": "publisher", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_created_by_user_id_fk": { + "name": "publisher_created_by_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "created_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": { + "publisher_single_owner": { + "name": "publisher_single_owner", + "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)" + } + }, + "isRLSEnabled": false + }, + "public.referral": { + "name": "referral", + "schema": "", + "columns": { + "referrer_id": { + "name": "referrer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "referred_id": { + "name": "referred_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "referral_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_legacy": { + "name": "is_legacy", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "referral_referrer_id_user_id_fk": { + "name": "referral_referrer_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referrer_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "referral_referred_id_user_id_fk": { + "name": "referral_referred_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referred_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "referral_referrer_id_referred_id_pk": { + "name": "referral_referrer_id_referred_id_pk", + "columns": [ + "referrer_id", + "referred_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.session": { + "name": "session", + "schema": "", + "columns": { + "sessionToken": { + "name": "sessionToken", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "fingerprint_id": { + "name": "fingerprint_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "session_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'web'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "session_userId_user_id_fk": { + "name": "session_userId_user_id_fk", + "tableFrom": "session", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "session_fingerprint_id_fingerprint_id_fk": { + "name": "session_fingerprint_id_fingerprint_id_fk", + "tableFrom": "session", + "tableTo": "fingerprint", + "columnsFrom": [ + "fingerprint_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.subscription": { + "name": "subscription", + "schema": "", + "columns": { + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_price_id": { + "name": "stripe_price_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "tier": { + "name": "tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "scheduled_tier": { + "name": "scheduled_tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "subscription_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'active'" + }, + "billing_period_start": { + "name": "billing_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "billing_period_end": { + "name": "billing_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "cancel_at_period_end": { + "name": "cancel_at_period_end", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "canceled_at": { + "name": "canceled_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_subscription_customer": { + "name": "idx_subscription_customer", + "columns": [ + { + "expression": "stripe_customer_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_user": { + "name": "idx_subscription_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_status": { + "name": "idx_subscription_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"subscription\".\"status\" = 'active'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "subscription_user_id_user_id_fk": { + "name": "subscription_user_id_user_id_fk", + "tableFrom": "subscription", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sync_failure": { + "name": "sync_failure", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_attempt_at": { + "name": "last_attempt_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "retry_count": { + "name": "retry_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "last_error": { + "name": "last_error", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_sync_failure_retry": { + "name": "idx_sync_failure_retry", + "columns": [ + { + "expression": "retry_count", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "last_attempt_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"sync_failure\".\"retry_count\" < 5", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.user": { + "name": "user", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "emailVerified": { + "name": "emailVerified", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "image": { + "name": "image", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "next_quota_reset": { + "name": "next_quota_reset", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "default": "now() + INTERVAL '1 month'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "referral_code": { + "name": "referral_code", + "type": "text", + "primaryKey": false, + "notNull": false, + "default": "'ref-' || gen_random_uuid()" + }, + "referral_limit": { + "name": "referral_limit", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "discord_id": { + "name": "discord_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "handle": { + "name": "handle", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "banned": { + "name": "banned", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "fallback_to_a_la_carte": { + "name": "fallback_to_a_la_carte", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "user_email_unique": { + "name": "user_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + }, + "user_stripe_customer_id_unique": { + "name": "user_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + }, + "user_referral_code_unique": { + "name": "user_referral_code_unique", + "nullsNotDistinct": false, + "columns": [ + "referral_code" + ] + }, + "user_discord_id_unique": { + "name": "user_discord_id_unique", + "nullsNotDistinct": false, + "columns": [ + "discord_id" + ] + }, + "user_handle_unique": { + "name": "user_handle_unique", + "nullsNotDistinct": false, + "columns": [ + "handle" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.verificationToken": { + "name": "verificationToken", + "schema": "", + "columns": { + "identifier": { + "name": "identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "verificationToken_identifier_token_pk": { + "name": "verificationToken_identifier_token_pk", + "columns": [ + "identifier", + "token" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.referral_status": { + "name": "referral_status", + "schema": "public", + "values": [ + "pending", + "completed" + ] + }, + "public.agent_run_status": { + "name": "agent_run_status", + "schema": "public", + "values": [ + "running", + "completed", + "failed", + "cancelled" + ] + }, + "public.agent_step_status": { + "name": "agent_step_status", + "schema": "public", + "values": [ + "running", + "completed", + "skipped" + ] + }, + "public.api_key_type": { + "name": "api_key_type", + "schema": "public", + "values": [ + "anthropic", + "gemini", + "openai" + ] + }, + "public.free_session_status": { + "name": "free_session_status", + "schema": "public", + "values": [ + "queued", + "active" + ] + }, + "public.grant_type": { + "name": "grant_type", + "schema": "public", + "values": [ + "free", + "referral", + "referral_legacy", + "subscription", + "purchase", + "admin", + "organization", + "ad" + ] + }, + "public.org_role": { + "name": "org_role", + "schema": "public", + "values": [ + "owner", + "admin", + "member" + ] + }, + "public.session_type": { + "name": "session_type", + "schema": "public", + "values": [ + "web", + "pat", + "cli" + ] + }, + "public.subscription_status": { + "name": "subscription_status", + "schema": "public", + "values": [ + "incomplete", + "incomplete_expired", + "trialing", + "active", + "past_due", + "canceled", + "unpaid", + "paused" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json index f67ef37dc..78747c831 100644 --- a/packages/internal/src/db/migrations/meta/_journal.json +++ b/packages/internal/src/db/migrations/meta/_journal.json @@ -323,6 +323,13 @@ "when": 1776813242936, "tag": "0045_mean_sleeper", "breakpoints": true + }, + { + "idx": 46, + "version": "7", + "when": 1776898844362, + "tag": "0046_cloudy_firedrake", + "breakpoints": true } ] } \ No newline at end of file diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts index b6f170d29..2ead1fc6d 100644 --- a/packages/internal/src/db/schema.ts +++ b/packages/internal/src/db/schema.ts @@ -870,3 +870,37 @@ export const freeSession = pgTable( index('idx_free_session_expiry').on(table.expires_at), ], ) + +/** + * Audit log of every admission — one row per queued→active transition. Used + * to rate-limit heavy users (e.g. no more than 5 GLM sessions per 20h). + * + * Separate from `free_session` because that table is one-row-per-user (state, + * not history); the UPSERT path there would otherwise destroy prior admissions. + */ +export const freeSessionAdmit = pgTable( + 'free_session_admit', + { + id: text('id') + .primaryKey() + .$defaultFn(() => crypto.randomUUID()), + user_id: text('user_id') + .notNull() + .references(() => user.id, { onDelete: 'cascade' }), + model: text('model').notNull(), + admitted_at: timestamp('admitted_at', { + mode: 'date', + withTimezone: true, + }) + .notNull() + .defaultNow(), + }, + (table) => [ + // Rate-limit lookup: WHERE user_id=$1 AND model=$2 AND admitted_at > $cutoff + index('idx_free_session_admit_user_model_time').on( + table.user_id, + table.model, + table.admitted_at, + ), + ], +) diff --git a/scripts/test-fireworks-cache-intervals.ts b/scripts/test-fireworks-cache-intervals.ts index 0ed71193f..8d4e86740 100644 --- a/scripts/test-fireworks-cache-intervals.ts +++ b/scripts/test-fireworks-cache-intervals.ts @@ -13,7 +13,6 @@ * * Models: * glm-5.1 (default) — z-ai/glm-5.1 - * kimi-k2.5 — moonshotai/kimi-k2.5 * minimax — minimax/minimax-m2.5 * * Flags: @@ -39,7 +38,7 @@ const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1' type ModelConfig = { id: string standardModel: string - deploymentModel: string + deploymentModel?: string inputCostPerToken: number cachedInputCostPerToken: number outputCostPerToken: number @@ -54,14 +53,6 @@ const MODEL_CONFIGS: Record = { cachedInputCostPerToken: 0.26 / 1_000_000, outputCostPerToken: 4.4 / 1_000_000, }, - 'kimi-k2.5': { - id: 'moonshotai/kimi-k2.5', - standardModel: 'accounts/fireworks/models/kimi-k2p5', - deploymentModel: 'accounts/james-65d217/deployments/mx8l5rq2', - inputCostPerToken: 0.6 / 1_000_000, - cachedInputCostPerToken: 0.1 / 1_000_000, - outputCostPerToken: 3.0 / 1_000_000, - }, minimax: { id: 'minimax/minimax-m2.5', standardModel: 'accounts/fireworks/models/minimax-m2p5', @@ -117,8 +108,12 @@ function parseArgs(): { const { modelKey, useDeployment: USE_DEPLOYMENT, intervals: INTERVALS_SEC } = parseArgs() const MODEL = MODEL_CONFIGS[modelKey] +if (USE_DEPLOYMENT && !MODEL.deploymentModel) { + console.error(`❌ No custom deployment configured for ${MODEL.id}`) + process.exit(1) +} const FIREWORKS_MODEL = USE_DEPLOYMENT - ? MODEL.deploymentModel + ? MODEL.deploymentModel! : MODEL.standardModel const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts index 67028228d..a1e4950f8 100644 --- a/scripts/test-fireworks-long.ts +++ b/scripts/test-fireworks-long.ts @@ -12,12 +12,17 @@ * Models: * glm-5.1 (default) — z-ai/glm-5.1 * minimax — minimax/minimax-m2.5 + * minimax-m2.7 — minimax/minimax-m2.7 * * Flags: * --deployment Use custom deployment instead of serverless (standard API) * Serverless is the default + * Examples: + * bun scripts/test-fireworks-long.ts glm-5.1 --deployment */ +import { FIREWORKS_DEPLOYMENT_MAP } from '../web/src/llm-api/fireworks-config' + export { } const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1' @@ -25,7 +30,7 @@ const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1' type ModelConfig = { id: string // OpenRouter-style ID (for display) standardModel: string // Fireworks standard API model ID - deploymentModel: string // Fireworks custom deployment model ID + deploymentModel?: string // Fireworks custom deployment model ID inputCostPerToken: number cachedInputCostPerToken: number outputCostPerToken: number @@ -35,19 +40,11 @@ const MODEL_CONFIGS: Record = { 'glm-5.1': { id: 'z-ai/glm-5.1', standardModel: 'accounts/fireworks/models/glm-5p1', - deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea', + deploymentModel: FIREWORKS_DEPLOYMENT_MAP['z-ai/glm-5.1'], inputCostPerToken: 1.40 / 1_000_000, cachedInputCostPerToken: 0.26 / 1_000_000, outputCostPerToken: 4.40 / 1_000_000, }, - 'kimi-k2.5': { - id: 'moonshotai/kimi-k2.5', - standardModel: 'accounts/fireworks/models/kimi-k2p5', - deploymentModel: 'accounts/james-65d217/deployments/mx8l5rq2', - inputCostPerToken: 0.60 / 1_000_000, - cachedInputCostPerToken: 0.10 / 1_000_000, - outputCostPerToken: 3.00 / 1_000_000, - }, minimax: { id: 'minimax/minimax-m2.5', standardModel: 'accounts/fireworks/models/minimax-m2p5', @@ -67,9 +64,16 @@ const MODEL_CONFIGS: Record = { } const DEFAULT_MODEL = 'glm-5.1' +const MODEL_ALIASES: Record = { + glm: 'glm-5.1', + 'z-ai/glm-5.1': 'glm-5.1', + 'minimax/minimax-m2.5': 'minimax', + 'minimax/minimax-m2.7': 'minimax-m2.7', +} function getModelConfig(modelArg?: string): ModelConfig { - const key = modelArg ?? DEFAULT_MODEL + const rawKey = modelArg ?? DEFAULT_MODEL + const key = MODEL_ALIASES[rawKey] ?? rawKey const config = MODEL_CONFIGS[key] if (!config) { console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`) @@ -83,7 +87,11 @@ const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a != const MODEL = getModelConfig(modelArg) // Default to serverless (standard API); use --deployment for custom deployment -const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel +if (USE_DEPLOYMENT && !MODEL.deploymentModel) { + console.error(`❌ No custom deployment configured for ${MODEL.id}`) + process.exit(1) +} +const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel! : MODEL.standardModel const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken @@ -455,4 +463,4 @@ async function main() { console.log('Done!') } -main() \ No newline at end of file +main() diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 51a3eb46b..1aac8800c 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -1,6 +1,7 @@ import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test' import { NextRequest } from 'next/server' +import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models' import { formatQuotaResetCountdown, postChatCompletions } from '../_post' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' @@ -528,7 +529,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: { Authorization: 'Bearer test-api-key-new-free' }, body: JSON.stringify({ - model: 'z-ai/glm-5.1', + model: 'minimax/minimax-m2.7', stream: false, codebuff_metadata: { run_id: 'run-free', @@ -555,6 +556,76 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(response.status).toBe(200) }) + it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => { + const fetchedBodies: Record[] = [] + const fetchViaFireworks = mock( + async (_url: string | URL | Request, init?: RequestInit) => { + fetchedBodies.push(JSON.parse(init?.body as string)) + return new Response( + JSON.stringify({ + id: 'test-id', + model: 'accounts/james-65d217/deployments/mjb4i7ea', + choices: [{ message: { content: 'test response' } }], + usage: { + prompt_tokens: 10, + completion_tokens: 20, + total_tokens: 30, + }, + }), + { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }, + ) + }, + ) as unknown as typeof globalThis.fetch + + const req = new NextRequest( + 'http://localhost:3000/api/v1/chat/completions', + { + method: 'POST', + headers: { Authorization: 'Bearer test-api-key-new-free' }, + body: JSON.stringify({ + model: 'z-ai/glm-5.1', + stream: false, + codebuff_metadata: { + run_id: 'run-free', + client_id: 'test-client-id-123', + cost_mode: 'free', + }, + }), + }, + ) + + const response = await postChatCompletions({ + req, + getUserInfoFromApiKey: mockGetUserInfoFromApiKey, + logger: mockLogger, + trackEvent: mockTrackEvent, + getUserUsageData: mockGetUserUsageData, + getAgentRunFromId: mockGetAgentRunFromId, + fetch: fetchViaFireworks, + insertMessageBigquery: mockInsertMessageBigquery, + loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, + }) + + const body = await response.json() + if (isFreebuffDeploymentHours()) { + expect(response.status).toBe(200) + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].model).toBe( + 'accounts/james-65d217/deployments/mjb4i7ea', + ) + expect(body.model).toBe('z-ai/glm-5.1') + expect(body.provider).toBe('Fireworks') + } else { + expect(response.status).toBe(503) + expect(fetchedBodies).toHaveLength(0) + expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS') + } + }) + it('skips credit check when in FREE mode even with 0 credits', async () => { const req = new NextRequest( 'http://localhost:3000/api/v1/chat/completions', @@ -562,7 +633,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: { Authorization: 'Bearer test-api-key-no-credits' }, body: JSON.stringify({ - model: 'z-ai/glm-5.1', + model: 'minimax/minimax-m2.7', stream: false, codebuff_metadata: { run_id: 'run-free', @@ -671,7 +742,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: { Authorization: 'Bearer test-api-key-new-free' }, body: JSON.stringify({ - model: 'z-ai/glm-5.1', + model: 'minimax/minimax-m2.7', stream: true, codebuff_metadata: { run_id: 'run-123', @@ -853,7 +924,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: { Authorization: 'Bearer test-api-key-123' }, body: JSON.stringify({ - model: 'z-ai/glm-5.1', + model: 'minimax/minimax-m2.7', stream: false, codebuff_metadata: { run_id: 'run-free', diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index 657c17f6d..e4675e488 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -3,6 +3,7 @@ import { describe, expect, test } from 'bun:test' import { deleteFreebuffSession, FREEBUFF_INSTANCE_HEADER, + FREEBUFF_MODEL_HEADER, getFreebuffSession, postFreebuffSession, } from '../_handlers' @@ -12,16 +13,17 @@ import type { SessionDeps } from '@/server/free-session/public-api' import type { InternalSessionRow } from '@/server/free-session/types' import type { NextRequest } from 'next/server' -const DEFAULT_MODEL = 'z-ai/glm-5.1' +const DEFAULT_MODEL = 'minimax/minimax-m2.7' function makeReq( apiKey: string | null, - opts: { instanceId?: string; cfCountry?: string } = {}, + opts: { instanceId?: string; cfCountry?: string; model?: string } = {}, ): NextRequest { const headers = new Headers() if (apiKey) headers.set('Authorization', `Bearer ${apiKey}`) if (opts.instanceId) headers.set(FREEBUFF_INSTANCE_HEADER, opts.instanceId) if (opts.cfCountry) headers.set('cf-ipcountry', opts.cfCountry) + if (opts.model) headers.set(FREEBUFF_MODEL_HEADER, opts.model) return { headers, } as unknown as NextRequest @@ -44,6 +46,9 @@ function makeSessionDeps(overrides: Partial = {}): SessionDeps & { getInstantAdmitCapacity: () => 0, activeCountForModel: async () => 0, promoteQueuedUser: async () => null, + // No admits in handler tests — the rate-limit check reads empty and + // every request falls through to the queue. + listRecentAdmits: async () => [], now: () => now, getSessionRow: async (userId) => rows.get(userId) ?? null, queueDepthsByModel: async () => { @@ -153,6 +158,19 @@ describe('POST /api/v1/freebuff/session', () => { expect(body.status).toBe('queued') }) + test('returns model_unavailable for GLM outside deployment hours', async () => { + const sessionDeps = makeSessionDeps() + const resp = await postFreebuffSession( + makeReq('ok', { model: 'z-ai/glm-5.1' }), + makeDeps(sessionDeps, 'u1'), + ) + expect(resp.status).toBe(409) + const body = await resp.json() + expect(body.status).toBe('model_unavailable') + expect(body.availableHours).toBe('9am ET-5pm PT') + expect(sessionDeps.rows.size).toBe(0) + }) + // Banned bots with valid API keys were POSTing every few seconds and // inflating queueDepth between the 15s admission-tick sweeps. Rejecting at // the HTTP layer with 403 (terminal, like country_blocked) keeps them out diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts index ec17568a3..9a2d61899 100644 --- a/web/src/app/api/v1/freebuff/session/_handlers.ts +++ b/web/src/app/api/v1/freebuff/session/_handlers.ts @@ -138,12 +138,21 @@ export async function postFreebuffSession( model: requestedModel, deps: deps.sessionDeps, }) - // model_locked is a 409 so it's distinguishable from a normal queued/active - // response on the client. banned is a 403 (terminal, mirrors country_blocked) - // so older CLIs that don't know the status fall into their `!resp.ok` error - // path and back off instead of tight-polling on the unrecognized 200 body. + // model_locked / model_unavailable are 409 so they're distinguishable + // from normal queued/active responses on the client. banned is a 403 + // (terminal, mirrors country_blocked) so older CLIs that don't know the + // status fall into their `!resp.ok` error path and back off instead of + // tight-polling on the unrecognized 200 body. rate_limited uses 429 for + // the same reason as banned — older CLIs back off, newer CLIs parse the + // structured body. const status = - state.status === 'model_locked' ? 409 : state.status === 'banned' ? 403 : 200 + state.status === 'model_locked' || state.status === 'model_unavailable' + ? 409 + : state.status === 'banned' + ? 403 + : state.status === 'rate_limited' + ? 429 + : 200 return NextResponse.json(state, { status }) } catch (error) { return serverError(deps, 'POST', auth.userId, error) diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts index 9ed91fd0a..be17a6e2e 100644 --- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts +++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts @@ -3,7 +3,7 @@ import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test' import { createFireworksRequestWithFallback, DEPLOYMENT_COOLDOWN_MS, - FireworksError, + isDeploymentHours, isDeploymentCoolingDown, markDeploymentScalingUp, resetDeploymentCooldown, @@ -13,6 +13,11 @@ import type { Logger } from '@codebuff/common/types/contracts/logger' const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1' const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea' +const IN_DEPLOYMENT_HOURS = new Date('2026-04-17T16:00:00Z') // Friday, 12pm ET / 9am PT +const BEFORE_DEPLOYMENT_HOURS = new Date('2026-04-17T12:59:00Z') // Friday, 8:59am ET +const AFTER_DEPLOYMENT_HOURS = new Date('2026-04-18T00:00:00Z') // Friday, 5pm PT +const WEEKDAY_AFTER_DEPLOYMENT_HOURS = new Date('2026-04-21T00:01:00Z') // Monday, 5:01pm PT +const WEEKEND_DEPLOYMENT_HOURS = new Date('2026-04-18T16:00:00Z') // Saturday function createMockLogger(): Logger { return { @@ -23,18 +28,20 @@ function createMockLogger(): Logger { } } -// Helper: create a Date at a specific ET hour using a known EDT date (June 2025, UTC-4) -function dateAtEtHour(hour: number): Date { - // June 15, 2025 is EDT (UTC-4), so ET hour H = UTC hour H+4 - const utcHour = hour + 4 - if (utcHour < 24) { - return new Date(`2025-06-15T${String(utcHour).padStart(2, '0')}:30:00Z`) - } - // Wraps to next day - return new Date(`2025-06-16T${String(utcHour - 24).padStart(2, '0')}:30:00Z`) -} - describe('Fireworks deployment routing', () => { + describe('deployment hours', () => { + it('is active from 9am ET until before 5pm PT on weekdays', () => { + expect(isDeploymentHours(BEFORE_DEPLOYMENT_HOURS)).toBe(false) + expect(isDeploymentHours(IN_DEPLOYMENT_HOURS)).toBe(true) + expect(isDeploymentHours(AFTER_DEPLOYMENT_HOURS)).toBe(false) + expect(isDeploymentHours(WEEKDAY_AFTER_DEPLOYMENT_HOURS)).toBe(false) + }) + + it('is inactive on weekends', () => { + expect(isDeploymentHours(WEEKEND_DEPLOYMENT_HOURS)).toBe(false) + }) + }) + describe('deployment cooldown', () => { beforeEach(() => { resetDeploymentCooldown() @@ -81,27 +88,9 @@ describe('Fireworks deployment routing', () => { model: 'z-ai/glm-5.1', messages: [{ role: 'user' as const, content: 'test' }], } - - function spyDeploymentHours(inHours: boolean) { - // Control isDeploymentHours by mocking Date.prototype.toLocaleString - // When called with the ET timezone options, return an hour inside or outside the window - const original = Date.prototype.toLocaleString - const spy = { - restore: () => { - Date.prototype.toLocaleString = original - }, - } - Date.prototype.toLocaleString = function ( - this: Date, - ...args: Parameters - ) { - const options = args[1] as Intl.DateTimeFormatOptions | undefined - if (options?.timeZone === 'America/New_York' && options?.hour === 'numeric') { - return inHours ? '14' : '3' - } - return original.apply(this, args) - } - return spy + const liteBody = { + ...minimalBody, + codebuff_metadata: { cost_mode: 'lite' }, } it('uses standard API when custom deployment is disabled', async () => { @@ -128,7 +117,6 @@ describe('Fireworks deployment routing', () => { }) it('tries custom deployment during deployment hours', async () => { - const spy = spyDeploymentHours(true) const fetchCalls: string[] = [] const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { @@ -137,160 +125,115 @@ describe('Fireworks deployment routing', () => { return new Response(JSON.stringify({ ok: true }), { status: 200 }) }) as unknown as typeof globalThis.fetch - try { - const response = await createFireworksRequestWithFallback({ - body: minimalBody as never, - originalModel: 'z-ai/glm-5.1', - fetch: mockFetch, - logger, - useCustomDeployment: true, - sessionId: 'test-user-id', - }) - - expect(response.status).toBe(200) - expect(fetchCalls).toHaveLength(1) - expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) - } finally { - spy.restore() - } + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toHaveLength(1) + expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) }) - it('falls back to standard API on 503 DEPLOYMENT_SCALING_UP', async () => { - const spy = spyDeploymentHours(true) + it('returns deployment 503 on DEPLOYMENT_SCALING_UP without serverless fallback', async () => { const fetchCalls: string[] = [] - let callCount = 0 const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { const body = JSON.parse(init?.body as string) fetchCalls.push(body.model) - callCount++ - - if (callCount === 1) { - return new Response( - JSON.stringify({ - error: { - message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.', - code: 'DEPLOYMENT_SCALING_UP', - type: 'error', - }, - }), - { status: 503, statusText: 'Service Unavailable' }, - ) - } - - return new Response(JSON.stringify({ ok: true }), { status: 200 }) + return new Response( + JSON.stringify({ + error: { + message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.', + code: 'DEPLOYMENT_SCALING_UP', + type: 'error', + }, + }), + { status: 503, statusText: 'Service Unavailable' }, + ) }) as unknown as typeof globalThis.fetch - try { - const response = await createFireworksRequestWithFallback({ - body: minimalBody as never, - originalModel: 'z-ai/glm-5.1', - fetch: mockFetch, - logger, - useCustomDeployment: true, - sessionId: 'test-user-id', - }) - - expect(response.status).toBe(200) - expect(fetchCalls).toHaveLength(2) - expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) - expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID) - // Verify cooldown was activated - expect(isDeploymentCoolingDown()).toBe(true) - } finally { - spy.restore() - } + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(503) + expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID]) + expect(isDeploymentCoolingDown()).toBe(true) }) - it('falls back to standard API on non-scaling 503 from deployment', async () => { - const spy = spyDeploymentHours(true) + it('returns non-scaling deployment 503 without serverless fallback', async () => { const fetchCalls: string[] = [] - let callCount = 0 const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { const body = JSON.parse(init?.body as string) fetchCalls.push(body.model) - callCount++ - - if (callCount === 1) { - return new Response( - JSON.stringify({ - error: { - message: 'Service temporarily unavailable', - code: 'SERVICE_UNAVAILABLE', - type: 'error', - }, - }), - { status: 503, statusText: 'Service Unavailable' }, - ) - } - - return new Response(JSON.stringify({ ok: true }), { status: 200 }) + return new Response( + JSON.stringify({ + error: { + message: 'Service temporarily unavailable', + code: 'SERVICE_UNAVAILABLE', + type: 'error', + }, + }), + { status: 503, statusText: 'Service Unavailable' }, + ) }) as unknown as typeof globalThis.fetch - try { - const response = await createFireworksRequestWithFallback({ - body: minimalBody as never, - originalModel: 'z-ai/glm-5.1', - fetch: mockFetch, - logger, - useCustomDeployment: true, - sessionId: 'test-user-id', - }) - - expect(response.status).toBe(200) - expect(fetchCalls).toHaveLength(2) - expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) - expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID) - // Non-scaling 503 should NOT activate the cooldown - expect(isDeploymentCoolingDown()).toBe(false) - } finally { - spy.restore() - } + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(503) + expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID]) + expect(isDeploymentCoolingDown()).toBe(false) }) - it('falls back to standard API on 500 Internal Error from deployment', async () => { - const spy = spyDeploymentHours(true) + it('returns 500 Internal Error from deployment without serverless fallback', async () => { const fetchCalls: string[] = [] - let callCount = 0 const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { const body = JSON.parse(init?.body as string) fetchCalls.push(body.model) - callCount++ - - if (callCount === 1) { - return new Response( - JSON.stringify({ error: 'Internal error' }), - { status: 500, statusText: 'Internal Server Error' }, - ) - } - - return new Response(JSON.stringify({ ok: true }), { status: 200 }) + return new Response( + JSON.stringify({ error: 'Internal error' }), + { status: 500, statusText: 'Internal Server Error' }, + ) }) as unknown as typeof globalThis.fetch - try { - const response = await createFireworksRequestWithFallback({ - body: minimalBody as never, - originalModel: 'z-ai/glm-5.1', - fetch: mockFetch, - logger, - useCustomDeployment: true, - sessionId: 'test-user-id', - }) - - expect(response.status).toBe(200) - expect(fetchCalls).toHaveLength(2) - expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) - expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID) - expect(isDeploymentCoolingDown()).toBe(false) - } finally { - spy.restore() - } + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(500) + expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID]) + expect(isDeploymentCoolingDown()).toBe(false) }) - it('skips deployment during cooldown and goes straight to standard API', async () => { - const spy = spyDeploymentHours(true) + it('returns cooldown error without serverless fallback', async () => { markDeploymentScalingUp() const fetchCalls: string[] = [] @@ -300,26 +243,21 @@ describe('Fireworks deployment routing', () => { return new Response(JSON.stringify({ ok: true }), { status: 200 }) }) as unknown as typeof globalThis.fetch - try { - const response = await createFireworksRequestWithFallback({ - body: minimalBody as never, - originalModel: 'z-ai/glm-5.1', - fetch: mockFetch, - logger, - useCustomDeployment: true, - sessionId: 'test-user-id', - }) - - expect(response.status).toBe(200) - expect(fetchCalls).toHaveLength(1) - expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID) - } finally { - spy.restore() - } + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(503) + expect(fetchCalls).toHaveLength(0) }) it('uses standard API for models without a custom deployment', async () => { - const spy = spyDeploymentHours(true) const fetchCalls: string[] = [] const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { @@ -328,27 +266,66 @@ describe('Fireworks deployment routing', () => { return new Response(JSON.stringify({ ok: true }), { status: 200 }) }) as unknown as typeof globalThis.fetch - try { - const response = await createFireworksRequestWithFallback({ - body: { ...minimalBody, model: 'some-other/model' } as never, - originalModel: 'some-other/model', - fetch: mockFetch, - logger, - useCustomDeployment: true, - sessionId: 'test-user-id', - }) - - expect(response.status).toBe(200) - expect(fetchCalls).toHaveLength(1) - // Model without mapping falls through to the original model - expect(fetchCalls[0]).toBe('some-other/model') - } finally { - spy.restore() - } + const response = await createFireworksRequestWithFallback({ + body: { ...minimalBody, model: 'some-other/model' } as never, + originalModel: 'some-other/model', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: BEFORE_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toHaveLength(1) + // Model without mapping falls through to the original model + expect(fetchCalls[0]).toBe('some-other/model') + }) + + it('returns an availability error for deployment models outside hours', async () => { + const mockFetch = mock(async () => { + throw new Error('should not fetch outside deployment hours') + }) as unknown as typeof globalThis.fetch + + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: BEFORE_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(503) + const body = await response.json() + expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS') + }) + + it('falls back to the standard Fireworks API in lite mode outside deployment hours', async () => { + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + const response = await createFireworksRequestWithFallback({ + body: liteBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: BEFORE_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toEqual([STANDARD_MODEL_ID]) }) it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => { - const spy = spyDeploymentHours(true) const fetchCalls: string[] = [] const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { @@ -360,23 +337,20 @@ describe('Fireworks deployment routing', () => { ) }) as unknown as typeof globalThis.fetch - try { - const response = await createFireworksRequestWithFallback({ - body: minimalBody as never, - originalModel: 'z-ai/glm-5.1', - fetch: mockFetch, - logger, - useCustomDeployment: true, - sessionId: 'test-user-id', - }) - - // Non-5xx errors from deployment are returned as-is (caller handles them) - expect(response.status).toBe(429) - expect(fetchCalls).toHaveLength(1) - expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) - } finally { - spy.restore() - } + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + // Non-5xx errors from deployment are returned as-is (caller handles them) + expect(response.status).toBe(429) + expect(fetchCalls).toHaveLength(1) + expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) }) it('transforms reasoning to reasoning_effort (defaults to medium)', async () => { @@ -535,17 +509,44 @@ describe('Fireworks deployment routing', () => { expect(fetchedBodies[0].reasoning_effort).toBe('low') }) - it('logs when trying deployment and when falling back on 5xx', async () => { - const spy = spyDeploymentHours(true) - let callCount = 0 - + it('logs when trying deployment and when deployment returns 5xx', async () => { const mockFetch = mock(async () => { - callCount++ - if (callCount === 1) { + return new Response( + JSON.stringify({ + error: { + message: 'Scaling up', + code: 'DEPLOYMENT_SCALING_UP', + type: 'error', + }, + }), + { status: 503, statusText: 'Service Unavailable' }, + ) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(logger.info).toHaveBeenCalledTimes(2) + }) + + it('falls back to the standard Fireworks API in lite mode after deployment scaling 503', async () => { + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + if (fetchCalls.length === 1) { return new Response( JSON.stringify({ error: { - message: 'Scaling up', + message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.', code: 'DEPLOYMENT_SCALING_UP', type: 'error', }, @@ -556,20 +557,70 @@ describe('Fireworks deployment routing', () => { return new Response(JSON.stringify({ ok: true }), { status: 200 }) }) as unknown as typeof globalThis.fetch - try { - await createFireworksRequestWithFallback({ - body: minimalBody as never, - originalModel: 'z-ai/glm-5.1', - fetch: mockFetch, - logger, - useCustomDeployment: true, - sessionId: 'test-user-id', - }) - - expect(logger.info).toHaveBeenCalledTimes(2) - } finally { - spy.restore() - } + const response = await createFireworksRequestWithFallback({ + body: liteBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID, STANDARD_MODEL_ID]) + expect(isDeploymentCoolingDown()).toBe(true) + }) + + it('falls back to the standard Fireworks API in lite mode during deployment cooldown', async () => { + markDeploymentScalingUp() + + const fetchCalls: string[] = [] + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + const response = await createFireworksRequestWithFallback({ + body: liteBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toEqual([STANDARD_MODEL_ID]) + }) + + it('falls back to the standard Fireworks API in lite mode when the deployment request throws', async () => { + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + if (fetchCalls.length === 1) { + throw new Error('socket hang up') + } + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + const response = await createFireworksRequestWithFallback({ + body: liteBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID, STANDARD_MODEL_ID]) + expect(logger.warn).toHaveBeenCalledTimes(1) }) }) }) diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts index fb6d59580..566728250 100644 --- a/web/src/llm-api/fireworks-config.ts +++ b/web/src/llm-api/fireworks-config.ts @@ -10,7 +10,6 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217' export const FIREWORKS_DEPLOYMENT_MAP: Record = { // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', - // 'moonshotai/kimi-k2.5': 'accounts/james-65d217/deployments/mx8l5rq2', - // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd', 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea', + // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd', } diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 6e304638d..a2f4f80a8 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -1,5 +1,9 @@ import { Agent } from 'undici' +import { + FREEBUFF_DEPLOYMENT_HOURS_LABEL, + isFreebuffDeploymentHours, +} from '@codebuff/common/constants/freebuff-models' import { PROFIT_MARGIN } from '@codebuff/common/constants/limits' import { getErrorObject } from '@codebuff/common/util/error' import { env } from '@codebuff/internal/env' @@ -32,15 +36,14 @@ const FIREWORKS_MODEL_MAP: Record = { 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5', 'minimax/minimax-m2.7': 'accounts/fireworks/models/minimax-m2p7', 'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1', - 'moonshotai/kimi-k2.5': 'accounts/fireworks/models/kimi-k2p5', } /** Flag to enable custom Fireworks deployments (set to false to use global API only) */ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true -/** Check if current time is within deployment hours (always enabled) */ -export function isDeploymentHours(_now: Date = new Date()): boolean { - return true +/** Check if current time is within deployment hours: Mon-Fri, 9am ET to 5pm PT. */ +export function isDeploymentHours(now: Date = new Date()): boolean { + return isFreebuffDeploymentHours(now) } /** @@ -93,7 +96,7 @@ function createFireworksRequest(params: { // Transform OpenRouter-style `reasoning` object into Fireworks' `reasoning_effort`. // Unlike OpenAI, Fireworks supports reasoning_effort together with function tools - // (e.g. GLM-4.5/5.1 and Kimi K2 are designed for interleaved reasoning + tool use). + // (e.g. GLM-4.5/5.1 are designed for interleaved reasoning + tool use). if (fireworksBody.reasoning && typeof fireworksBody.reasoning === 'object') { const reasoning = fireworksBody.reasoning as { enabled?: boolean @@ -165,15 +168,10 @@ const FIREWORKS_PRICING_MAP: Record = { cachedInputCostPerToken: 0.26 / 1_000_000, outputCostPerToken: 4.40 / 1_000_000, }, - 'moonshotai/kimi-k2.5': { - inputCostPerToken: 0.60 / 1_000_000, - cachedInputCostPerToken: 0.10 / 1_000_000, - outputCostPerToken: 3.00 / 1_000_000, - }, } function getFireworksPricing(model: string): FireworksPricing { - return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1'] + return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_PRICING_MAP['z-ai/glm-5.1'] } function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData { @@ -708,9 +706,10 @@ async function parseFireworksError(response: Response): Promise } /** - * Tries the custom Fireworks deployment during business hours (10am–8pm ET), - * falling back to the standard API if the deployment returns 503 DEPLOYMENT_SCALING_UP. - * Outside deployment hours or during cooldown, goes straight to the standard API. + * Uses custom Fireworks deployments only during deployment hours. Deployment + * mapped models never fall back to the serverless API outside hours, during + * cooldown, or after deployment 5xxs; those states surface as provider errors + * so freebuff can offer MiniMax as the always-on option. */ export async function createFireworksRequestWithFallback(params: { body: ChatCompletionRequestBody @@ -719,45 +718,109 @@ export async function createFireworksRequestWithFallback(params: { logger: Logger useCustomDeployment?: boolean sessionId: string + now?: Date }): Promise { const { body, originalModel, fetch, logger, sessionId } = params + const now = params.now ?? new Date() const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel] - const shouldTryDeployment = - useCustomDeployment && - deploymentModelId && - isDeploymentHours() && - !isDeploymentCoolingDown() + const hasDeployment = useCustomDeployment && Boolean(deploymentModelId) + const shouldFallbackToStandardApi = body.codebuff_metadata?.cost_mode === 'lite' + + const createStandardApiRequest = () => + createFireworksRequest({ body, originalModel, fetch, sessionId }) + + if (hasDeployment && !isDeploymentHours(now)) { + if (shouldFallbackToStandardApi) { + logger.info( + { model: originalModel }, + 'Falling back to Fireworks standard API outside deployment hours', + ) + return createStandardApiRequest() + } + return new Response( + JSON.stringify({ + error: { + message: `${originalModel} is only available during ${FREEBUFF_DEPLOYMENT_HOURS_LABEL}. Use minimax/minimax-m2.7 outside those hours.`, + code: 'DEPLOYMENT_OUTSIDE_HOURS', + type: 'availability_error', + }, + }), + { status: 503, statusText: 'Service Unavailable' }, + ) + } + + if (hasDeployment && isDeploymentCoolingDown()) { + if (shouldFallbackToStandardApi) { + logger.info( + { model: originalModel }, + 'Falling back to Fireworks standard API during deployment cooldown', + ) + return createStandardApiRequest() + } + return new Response( + JSON.stringify({ + error: { + message: `${originalModel} deployment is temporarily unavailable. Use minimax/minimax-m2.7 while it recovers.`, + code: 'DEPLOYMENT_COOLDOWN', + type: 'availability_error', + }, + }), + { status: 503, statusText: 'Service Unavailable' }, + ) + } - if (shouldTryDeployment) { + if (hasDeployment && deploymentModelId) { logger.info( { model: originalModel, deploymentModel: deploymentModelId }, 'Trying Fireworks custom deployment', ) - const response = await createFireworksRequest({ - body, - originalModel, - fetch, - modelIdOverride: deploymentModelId, - sessionId, - }) + let response: Response + try { + response = await createFireworksRequest({ + body, + originalModel, + fetch, + modelIdOverride: deploymentModelId, + sessionId, + }) + } catch (error) { + if (shouldFallbackToStandardApi) { + logger.warn( + { model: originalModel, error: getErrorObject(error) }, + 'Fireworks custom deployment request failed, falling back to standard API', + ) + return createStandardApiRequest() + } + throw error + } if (response.status >= 500) { const errorText = await response.text() logger.info( { model: originalModel, status: response.status, errorText: errorText.slice(0, 200) }, - 'Fireworks custom deployment returned 5xx, falling back to standard API', + 'Fireworks custom deployment returned 5xx', ) if (errorText.includes('DEPLOYMENT_SCALING_UP')) { markDeploymentScalingUp() } - // Fall through to standard API request below - } else { - return response + if (shouldFallbackToStandardApi) { + logger.info( + { model: originalModel, status: response.status }, + 'Falling back to Fireworks standard API after deployment 5xx', + ) + return createStandardApiRequest() + } + return new Response(errorText, { + status: response.status, + statusText: response.statusText, + headers: response.headers, + }) } + return response } - return createFireworksRequest({ body, originalModel, fetch, sessionId }) + return createStandardApiRequest() } function creditsToFakeCost(credits: number): number { diff --git a/web/src/server/free-session/__tests__/config.test.ts b/web/src/server/free-session/__tests__/config.test.ts new file mode 100644 index 000000000..93f5fdcf0 --- /dev/null +++ b/web/src/server/free-session/__tests__/config.test.ts @@ -0,0 +1,13 @@ +import { describe, expect, test } from 'bun:test' + +import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' + +import { getInstantAdmitCapacity } from '../config' + +describe('free session config', () => { + test('every selectable freebuff model has instant-admit capacity', () => { + for (const model of FREEBUFF_MODELS) { + expect(getInstantAdmitCapacity(model.id)).toBeGreaterThan(0) + } + }) +}) diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index a824f6d22..8b08d63df 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -13,14 +13,22 @@ import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 const GRACE_MS = 30 * 60 * 1000 -const DEFAULT_MODEL = 'z-ai/glm-5.1' +const DEFAULT_MODEL = 'minimax/minimax-m2.7' + +interface AdmitRecord { + user_id: string + model: string + admitted_at: Date +} function makeDeps(overrides: Partial = {}): SessionDeps & { rows: Map + admits: AdmitRecord[] _tick: (n: Date) => void _now: () => Date } { const rows = new Map() + const admits: AdmitRecord[] = [] let currentNow = new Date('2026-04-17T12:00:00Z') let instanceCounter = 0 @@ -28,10 +36,12 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { const deps: SessionDeps & { rows: Map + admits: AdmitRecord[] _tick: (n: Date) => void _now: () => Date } = { rows, + admits, _tick: (n: Date) => { currentNow = n }, @@ -50,6 +60,18 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { } return n }, + listRecentAdmits: async ({ userId, model, since, limit }) => { + return admits + .filter( + (a) => + a.user_id === userId && + a.model === model && + a.admitted_at.getTime() >= since.getTime(), + ) + .sort((a, b) => a.admitted_at.getTime() - b.admitted_at.getTime()) + .slice(0, limit) + .map((a) => a.admitted_at) + }, promoteQueuedUser: async ({ userId, model, sessionLengthMs, now }) => { const row = rows.get(userId) if (!row || row.status !== 'queued' || row.model !== model) return null @@ -57,6 +79,7 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { row.admitted_at = now row.expires_at = new Date(now.getTime() + sessionLengthMs) row.updated_at = now + admits.push({ user_id: userId, model, admitted_at: now }) return row }, now: () => currentNow, @@ -177,19 +200,34 @@ describe('requestSession', () => { expect(state.instanceId).toBe('inst-1') }) + test('deployment-hours-only model is unavailable outside deployment hours', async () => { + const state = await requestSession({ + userId: 'u1', + model: 'z-ai/glm-5.1', + deps, + }) + expect(state).toEqual({ + status: 'model_unavailable', + requestedModel: 'z-ai/glm-5.1', + availableHours: '9am ET-5pm PT', + }) + expect(deps.rows.size).toBe(0) + }) + test('queued response includes a per-model depth snapshot for the selector', async () => { - // Seed 2 users in glm + 1 in minimax so the returned map captures both. + deps._tick(new Date('2026-04-17T16:00:00Z')) + // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both. await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) deps._tick(new Date(deps._now().getTime() + 1000)) await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps }) deps._tick(new Date(deps._now().getTime() + 1000)) - await requestSession({ userId: 'u3', model: 'minimax/minimax-m2.7', deps }) + await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps }) const state = await getSessionState({ userId: 'u1', deps }) if (state.status !== 'queued') throw new Error('unreachable') expect(state.queueDepthByModel).toEqual({ [DEFAULT_MODEL]: 2, - 'minimax/minimax-m2.7': 1, + 'z-ai/glm-5.1': 1, }) }) @@ -264,11 +302,12 @@ describe('requestSession', () => { }) test('instant-admit: per-model capacities are independent', async () => { - // GLM saturated at 1 active, MiniMax still has room. + // MiniMax saturated at 1 active, GLM still has room. const admitDeps = makeDeps({ getInstantAdmitCapacity: (model) => model === DEFAULT_MODEL ? 1 : 10, }) + admitDeps._tick(new Date('2026-04-17T16:00:00Z')) await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps: admitDeps }) const s2 = await requestSession({ userId: 'u2', @@ -277,12 +316,264 @@ describe('requestSession', () => { }) const s3 = await requestSession({ userId: 'u3', - model: 'minimax/minimax-m2.7', + model: 'z-ai/glm-5.1', deps: admitDeps, }) expect(s2.status).toBe('queued') expect(s3.status).toBe('active') }) + + // Per-user rate limit (5 GLM admissions per 20h) — the wire limit is + // hard-coded in public-api.ts, so tests seed the fake admit log directly + // rather than configuring it. GLM also has deployment-hours gating, so + // these tests bump `now` into the open window (12pm ET on a weekday) + // before issuing the request. + const GLM_MODEL = 'z-ai/glm-5.1' + const GLM_LIMIT = 5 + const GLM_WINDOW_HOURS = 20 + const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z') + + test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => { + deps._tick(GLM_OPEN_TIME) + // Seed 5 admits inside the 20h window, spaced so we can verify retryAfter + // points at the oldest one sliding off. + const now = deps._now() + // Oldest: 19h ago (still in window). Next 4: 1h, 2h, 3h, 4h ago. + const ages = [19, 4, 3, 2, 1] + for (const hoursAgo of ages) { + deps.admits.push({ + user_id: 'u1', + model: GLM_MODEL, + admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), + }) + } + + const state = await requestSession({ + userId: 'u1', + model: GLM_MODEL, + deps, + }) + expect(state.status).toBe('rate_limited') + if (state.status !== 'rate_limited') throw new Error('unreachable') + expect(state.model).toBe(GLM_MODEL) + expect(state.limit).toBe(GLM_LIMIT) + expect(state.windowHours).toBe(GLM_WINDOW_HOURS) + expect(state.recentCount).toBe(GLM_LIMIT) + // Oldest admit is 19h ago; slot opens when it hits 20h, i.e. in 1h. + expect(state.retryAfterMs).toBe(60 * 60 * 1000) + // Blocked before any row is written — the user doesn't take a queue slot. + expect(deps.rows.has('u1')).toBe(false) + }) + + test('rate_limited: admits outside the 20h window do not count', async () => { + deps._tick(GLM_OPEN_TIME) + // 5 admits, each just over 20h old → all fall off the window. + const now = deps._now() + for (let i = 0; i < 5; i++) { + deps.admits.push({ + user_id: 'u1', + model: GLM_MODEL, + admitted_at: new Date( + now.getTime() - (GLM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i), + ), + }) + } + const state = await requestSession({ + userId: 'u1', + model: GLM_MODEL, + deps, + }) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.rateLimit?.recentCount).toBe(0) + }) + + test('rate_limited: Minimax is unlimited even with many recent admits', async () => { + const now = deps._now() + for (let i = 0; i < 20; i++) { + deps.admits.push({ + user_id: 'u1', + model: DEFAULT_MODEL, + admitted_at: new Date(now.getTime() - i * 60_000), + }) + } + const state = await requestSession({ + userId: 'u1', + model: DEFAULT_MODEL, + deps, + }) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + // No rate-limit info for unrated models — the CLI skips the quota line. + expect(state.rateLimit).toBeUndefined() + }) + + test('queued GLM response carries the current admit count', async () => { + deps._tick(GLM_OPEN_TIME) + const now = deps._now() + // 2 admits in the window — under the limit so the user still queues. + deps.admits.push({ + user_id: 'u1', + model: GLM_MODEL, + admitted_at: new Date(now.getTime() - 60 * 60 * 1000), + }) + deps.admits.push({ + user_id: 'u1', + model: GLM_MODEL, + admitted_at: new Date(now.getTime() - 30 * 60 * 1000), + }) + const state = await requestSession({ + userId: 'u1', + model: GLM_MODEL, + deps, + }) + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.rateLimit).toEqual({ + model: GLM_MODEL, + limit: GLM_LIMIT, + windowHours: GLM_WINDOW_HOURS, + recentCount: 2, + }) + }) + + test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => { + // Reclaim path: user has an active+unexpired GLM session and restarts + // the CLI. POST must rotate their instance id (takeover) and NOT reject + // with rate_limited — otherwise they'd be stranded with a live session + // they can't reconnect to. The 5th admission is already in the log, so + // this also exercises "at the cap" rather than "over the cap". + deps._tick(GLM_OPEN_TIME) + const now = deps._now() + // Seed 5 prior admits (the cap), with the latest one matching the + // active row we're about to install. + const ages = [19, 4, 3, 2, 0] + for (const hoursAgo of ages) { + deps.admits.push({ + user_id: 'u1', + model: GLM_MODEL, + admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), + }) + } + // Install the active row directly (skipping the normal request path so + // we don't have to unwind the rate-limit gate to set up the fixture). + const admittedAt = new Date(now.getTime() - 30 * 60 * 1000) + deps.rows.set('u1', { + user_id: 'u1', + status: 'active', + active_instance_id: 'inst-pre', + model: GLM_MODEL, + queued_at: admittedAt, + admitted_at: admittedAt, + expires_at: new Date(admittedAt.getTime() + SESSION_LEN), + created_at: admittedAt, + updated_at: admittedAt, + }) + + const state = await requestSession({ + userId: 'u1', + model: GLM_MODEL, + deps, + }) + expect(state.status).toBe('active') + if (state.status !== 'active') throw new Error('unreachable') + // Instance id rotated; quota snapshot still reflects the full window. + expect(state.instanceId).not.toBe('inst-pre') + expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT) + }) + + test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => { + // Same reclaim exception for queued rows: if a user has already queued + // (say they slipped in just before their 5th admit landed), a subsequent + // POST from the same CLI must preserve their queue position instead of + // flipping to rate_limited. + deps._tick(GLM_OPEN_TIME) + const now = deps._now() + for (let i = 0; i < GLM_LIMIT; i++) { + deps.admits.push({ + user_id: 'u1', + model: GLM_MODEL, + admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000), + }) + } + const queuedAt = new Date(now.getTime() - 5 * 60 * 1000) + deps.rows.set('u1', { + user_id: 'u1', + status: 'queued', + active_instance_id: 'inst-pre', + model: GLM_MODEL, + queued_at: queuedAt, + admitted_at: null, + expires_at: null, + created_at: queuedAt, + updated_at: queuedAt, + }) + + const state = await requestSession({ + userId: 'u1', + model: GLM_MODEL, + deps, + }) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + // Same position (1) since we preserved queued_at and nobody else is + // ahead; the instance id rotated so any prior CLI is superseded. + expect(state.instanceId).not.toBe('inst-pre') + expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT) + }) + + test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => { + // The stored row's expires_at is in the past, so it doesn't represent + // an in-flight session. This POST is effectively a fresh request and + // must be blocked by the quota. + deps._tick(GLM_OPEN_TIME) + const now = deps._now() + const ages = [19, 4, 3, 2, 1] + for (const hoursAgo of ages) { + deps.admits.push({ + user_id: 'u1', + model: GLM_MODEL, + admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), + }) + } + const admittedAt = new Date(now.getTime() - 2 * SESSION_LEN) + deps.rows.set('u1', { + user_id: 'u1', + status: 'active', + active_instance_id: 'inst-pre', + model: GLM_MODEL, + queued_at: admittedAt, + admitted_at: admittedAt, + expires_at: new Date(admittedAt.getTime() + SESSION_LEN), + created_at: admittedAt, + updated_at: admittedAt, + }) + const state = await requestSession({ + userId: 'u1', + model: GLM_MODEL, + deps, + }) + expect(state.status).toBe('rate_limited') + }) + + test('instant-admit bumps the quota count for the freshly-written admit row', async () => { + const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 }) + admitDeps._tick(GLM_OPEN_TIME) + // 1 existing admit in the window; this new call should instant-admit and + // write a second row, so the response's recentCount reflects 2. + const now = admitDeps._now() + admitDeps.admits.push({ + user_id: 'u1', + model: GLM_MODEL, + admitted_at: new Date(now.getTime() - 30 * 60 * 1000), + }) + const state = await requestSession({ + userId: 'u1', + model: GLM_MODEL, + deps: admitDeps, + }) + if (state.status !== 'active') throw new Error('unreachable') + expect(state.rateLimit?.recentCount).toBe(2) + }) }) describe('getSessionState', () => { @@ -341,6 +632,39 @@ describe('getSessionState', () => { expect(state).toEqual({ status: 'superseded' }) }) + test('getSessionState surfaces rateLimit on queued/active polls', async () => { + // Regression: the POST response attached rateLimit, but GET polls did + // not — so the "Sessions N/M used" line flashed once then disappeared on + // the next 5s poll. GET must attach the same quota snapshot. Rate + // limits only apply to GLM, so this test uses GLM explicitly (inside + // deployment hours) rather than the Minimax DEFAULT_MODEL. + deps._tick(new Date('2026-04-17T16:00:00Z')) + const now = deps._now() + deps.admits.push({ + user_id: 'u1', + model: 'z-ai/glm-5.1', + admitted_at: new Date(now.getTime() - 60 * 60 * 1000), + }) + await requestSession({ userId: 'u1', model: 'z-ai/glm-5.1', deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = now + row.expires_at = new Date(now.getTime() + SESSION_LEN) + + const state = await getSessionState({ + userId: 'u1', + claimedInstanceId: row.active_instance_id, + deps, + }) + if (state.status !== 'active') throw new Error('unreachable') + expect(state.rateLimit).toEqual({ + model: 'z-ai/glm-5.1', + limit: 5, + windowHours: 20, + recentCount: 1, + }) + }) + test('omitted claimedInstanceId on active session returns active (read-only)', async () => { // Polling without an id (e.g. very first GET before POST has resolved) // must not be classified as superseded — only an explicit mismatch is. diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index 3f3c051d2..9f0b74c9f 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -1,4 +1,7 @@ -import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' +import { + FREEBUFF_MODELS, + isFreebuffModelAvailable, +} from '@codebuff/common/constants/freebuff-models' import { ADMISSION_TICK_MS, @@ -111,7 +114,10 @@ export async function runAdmissionTick( // advisory locks and a single update each. const perModel = await Promise.all( models.map(async (model) => { - const health = fleet[model] ?? 'healthy' + const isRegisteredModel = FREEBUFF_MODELS.some((m) => m.id === model) + const health = !isRegisteredModel || isFreebuffModelAvailable(model, now) + ? fleet[model] ?? 'healthy' + : 'unhealthy' const { admitted, skipped } = await deps.admitFromQueue({ model, sessionLengthMs: deps.sessionLengthMs, diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 450540443..02c5c05c9 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -1,4 +1,6 @@ import { + FREEBUFF_DEPLOYMENT_HOURS_LABEL, + isFreebuffModelAvailable, isFreebuffModelId as isSelectableFreebuffModel, resolveFreebuffModel, } from '@codebuff/common/constants/freebuff-models' @@ -16,15 +18,65 @@ import { FreeSessionModelLockedError, getSessionRow, joinOrTakeOver, + listRecentAdmits, promoteQueuedUser, queueDepthsByModel, queuePositionFor, } from './store' import { toSessionStateResponse } from './session-view' -import type { FreebuffSessionServerResponse } from '@codebuff/common/types/freebuff-session' +import type { + FreebuffSessionRateLimit, + FreebuffSessionServerResponse, +} from '@codebuff/common/types/freebuff-session' import type { InternalSessionRow, SessionStateResponse } from './types' +/** + * Per-model admission rate limits. Keyed by freebuff model id; a model not + * in the map has no rate limit applied. Today only GLM 5.1 is limited + * (Minimax is cheap enough to leave unlimited). + * + * Hard-coded rather than env-driven: the values need to be observable in the + * code review, and the CLI already renders the numbers via `rateLimit` on + * queued/active responses — changing them is a deliberate, typed edit. + */ +const RATE_LIMITS: Record = { + 'z-ai/glm-5.1': { limit: 5, windowHours: 20 }, +} + +/** Fetch the caller's current quota snapshot for `model`, or undefined if the + * model isn't rate-limited. Used by both POST (after admit) and GET polls so + * the CLI's "N of M sessions used" line stays live instead of disappearing + * after the first poll. Also returns the oldest admit in-window so callers + * that need `retryAfterMs` don't have to re-query. */ +async function fetchRateLimitSnapshot( + userId: string, + model: string, + deps: SessionDeps, +): Promise< + { info: FreebuffSessionRateLimit; oldest: Date | null } | undefined +> { + const cfg = RATE_LIMITS[model] + if (!cfg) return undefined + const now = nowOf(deps) + const since = new Date(now.getTime() - cfg.windowHours * 60 * 60 * 1000) + const admits = await deps.listRecentAdmits({ + userId, + model, + since, + limit: cfg.limit, + }) + return { + info: { + model, + limit: cfg.limit, + windowHours: cfg.windowHours, + recentCount: admits.length, + }, + oldest: admits[0] ?? null, + } +} + export interface SessionDeps { getSessionRow: (userId: string) => Promise joinOrTakeOver: (params: { @@ -43,6 +95,15 @@ export interface SessionDeps { * bound to a given model. Compared against the model's configured * `instantAdmitCapacity` to decide whether a new joiner skips the queue. */ activeCountForModel: (model: string) => Promise + /** Rate-limit helper: oldest-first admission timestamps for (userId, model) + * inside the window. The caller uses `rows.length` as the count (capped + * at `limit`) and `rows[0]` as the oldest for `retryAfterMs`. */ + listRecentAdmits: (params: { + userId: string + model: string + since: Date + limit: number + }) => Promise /** Instant-admit promotion: flips a specific queued row to active. Returns * the updated row or null if the row wasn't in a queued state. */ promoteQueuedUser: (params: { @@ -71,6 +132,7 @@ const defaultDeps: SessionDeps = { queueDepthsByModel, queuePositionFor, activeCountForModel, + listRecentAdmits, promoteQueuedUser, getInstantAdmitCapacity, isWaitingRoomEnabled, @@ -122,6 +184,21 @@ export type RequestSessionResult = currentModel: string requestedModel: string } + | { + /** User has hit the per-model admission quota in the rolling window. + * See `FreebuffSessionServerResponse`'s `rate_limited` variant. */ + status: 'rate_limited' + model: string + limit: number + windowHours: number + recentCount: number + retryAfterMs: number + } + | { + status: 'model_unavailable' + requestedModel: string + availableHours: string + } /** * Client calls this on CLI startup with the model they want to use. @@ -152,6 +229,7 @@ export async function requestSession(params: { }): Promise { const deps = params.deps ?? defaultDeps const model = resolveFreebuffModel(params.model) + const now = nowOf(deps) if (params.userBanned) { return { status: 'banned' } } @@ -161,13 +239,60 @@ export async function requestSession(params: { ) { return { status: 'disabled' } } + if (!isFreebuffModelAvailable(model, now)) { + return { + status: 'model_unavailable', + requestedModel: model, + availableHours: FREEBUFF_DEPLOYMENT_HOURS_LABEL, + } + } + + // Rate-limit check runs before joinOrTakeOver so heavy users never even + // create a queued row. Only models listed in RATE_LIMITS are gated; others + // (Minimax today) fall through unchanged. + // + // Takeover/reclaim exception: a user who already holds a queued or + // active+unexpired row on this same model is re-anchoring (CLI restart, + // same-account tab switch) rather than starting a new session. Admit + // counts are written at promotion time, so the quota only needs to gate + // fresh admissions — blocking a reclaim here would strand a user with an + // active 5th session unable to reconnect after a CLI restart. + const existing = await deps.getSessionRow(params.userId) + const isReclaim = + !!existing && + existing.model === model && + (existing.status === 'queued' || + (existing.status === 'active' && + !!existing.expires_at && + existing.expires_at.getTime() > now.getTime())) + + if (!isReclaim) { + const snapshot = await fetchRateLimitSnapshot(params.userId, model, deps) + if (snapshot && snapshot.info.recentCount >= snapshot.info.limit) { + // Oldest admit's window-anniversary is when one slot opens back up. + // Clamped at 0 so a clock skew can't surface a negative retry-after. + const windowMs = snapshot.info.windowHours * 60 * 60 * 1000 + const retryAfterMs = Math.max( + 0, + (snapshot.oldest?.getTime() ?? 0) + windowMs - now.getTime(), + ) + return { + status: 'rate_limited', + model, + limit: snapshot.info.limit, + windowHours: snapshot.info.windowHours, + recentCount: snapshot.info.recentCount, + retryAfterMs, + } + } + } let row: InternalSessionRow try { row = await deps.joinOrTakeOver({ userId: params.userId, model, - now: nowOf(deps), + now, }) } catch (err) { if (err instanceof FreeSessionModelLockedError) { @@ -199,7 +324,7 @@ export async function requestSession(params: { userId: params.userId, model, sessionLengthMs: deps.sessionLengthMs, - now: nowOf(deps), + now, }) if (promoted) row = promoted } @@ -212,7 +337,21 @@ export async function requestSession(params: { `joinOrTakeOver returned a row that maps to no view (user=${params.userId})`, ) } - return view + return attachRateLimit(params.userId, view, deps) +} + +/** Thread the current quota snapshot onto queued/active views so the CLI can + * render "N of M sessions used". Other statuses pass through unchanged. + * Called on both POST and GET so the line stays live across polls. */ +async function attachRateLimit( + userId: string, + view: SessionStateResponse, + deps: SessionDeps, +): Promise { + if (view.status !== 'queued' && view.status !== 'active') return view + const snapshot = await fetchRateLimitSnapshot(userId, view.model, deps) + if (!snapshot) return view + return { ...view, rateLimit: snapshot.info } } /** @@ -267,7 +406,7 @@ export async function getSessionState(params: { const view = await viewForRow(params.userId, deps, row) if (!view) return noneResponse() - return view + return attachRateLimit(params.userId, view, deps) } export async function endUserSession(params: { diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts index b3bd2bc48..e84331b69 100644 --- a/web/src/server/free-session/store.ts +++ b/web/src/server/free-session/store.ts @@ -1,7 +1,7 @@ import { db } from '@codebuff/internal/db' import { coerceBool } from '@codebuff/internal/db/advisory-lock' import * as schema from '@codebuff/internal/db/schema' -import { and, asc, count, eq, lt, sql } from 'drizzle-orm' +import { and, asc, count, eq, gte, lt, sql } from 'drizzle-orm' import { FREEBUFF_ADMISSION_LOCK_ID } from './config' @@ -369,6 +369,16 @@ export async function admitFromQueue(params: { ) .returning() + if (admitted.length > 0) { + await tx.insert(schema.freeSessionAdmit).values( + admitted.map((r) => ({ + user_id: r.user_id, + model: r.model, + admitted_at: now, + })), + ) + } + return { admitted: admitted as InternalSessionRow[], skipped: null } }) } @@ -391,23 +401,63 @@ export async function promoteQueuedUser(params: { }): Promise { const { userId, model, sessionLengthMs, now } = params const expiresAt = new Date(now.getTime() + sessionLengthMs) - const [row] = await db - .update(schema.freeSession) - .set({ - status: 'active', + return db.transaction(async (tx) => { + const [row] = await tx + .update(schema.freeSession) + .set({ + status: 'active', + admitted_at: now, + expires_at: expiresAt, + updated_at: now, + }) + .where( + and( + eq(schema.freeSession.user_id, userId), + eq(schema.freeSession.status, 'queued'), + eq(schema.freeSession.model, model), + ), + ) + .returning() + if (!row) return null + await tx.insert(schema.freeSessionAdmit).values({ + user_id: userId, + model, admitted_at: now, - expires_at: expiresAt, - updated_at: now, }) + return row as InternalSessionRow + }) +} + +/** + * List admissions for `userId` on `model` whose `admitted_at` is within the + * window `[since, ∞)`, ordered oldest-first. Caller gets both the count + * (array length, capped at `limit`) and the oldest timestamp (`rows[0]`) — + * the oldest is needed to compute `retryAfterMs` when the window is full, + * so one query covers both the check and the reject path. + * + * Drives the per-user, per-model rate limit (e.g. at most 5 GLM sessions in + * the last 20h) enforced before `joinOrTakeOver`. + */ +export async function listRecentAdmits(params: { + userId: string + model: string + since: Date + limit: number +}): Promise { + const { userId, model, since, limit } = params + const rows = await db + .select({ admitted_at: schema.freeSessionAdmit.admitted_at }) + .from(schema.freeSessionAdmit) .where( and( - eq(schema.freeSession.user_id, userId), - eq(schema.freeSession.status, 'queued'), - eq(schema.freeSession.model, model), + eq(schema.freeSessionAdmit.user_id, userId), + eq(schema.freeSessionAdmit.model, model), + gte(schema.freeSessionAdmit.admitted_at, since), ), ) - .returning() - return (row as InternalSessionRow | undefined) ?? null + .orderBy(asc(schema.freeSessionAdmit.admitted_at)) + .limit(limit) + return rows.map((r) => r.admitted_at) } /** Stable 31-bit hash so model-keyed advisory lock ids don't overflow int4. */