diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index 8b6c431ba..5c7b63928 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -3,13 +3,14 @@ import { CLAUDE_OAUTH_ENABLED } from '@codebuff/common/constants/claude-oauth' import { safeOpen } from '../utils/open-url' import { handleAdsEnable, handleAdsDisable } from './ads' -import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders' -import { useThemeStore } from '../hooks/use-theme' import { handleHelpCommand } from './help' import { handleImageCommand } from './image' import { handleInitializationFlowLocally } from './init' +import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders' import { runBashCommand } from './router' import { handleUsageCommand } from './usage' +import { endAndRejoinFreebuffSession } from '../hooks/use-freebuff-session' +import { useThemeStore } from '../hooks/use-theme' import { WEBSITE_URL } from '../login/constants' import { useChatStore } from '../state/chat-store' import { useFeedbackStore } from '../state/feedback-store' @@ -178,6 +179,7 @@ const FREEBUFF_REMOVED_COMMANDS = new Set([ const FREEBUFF_ONLY_COMMANDS = new Set([ 'connect', 'plan', + 'end-session', ]) const ALL_COMMANDS: CommandDefinition[] = [ @@ -611,6 +613,25 @@ const ALL_COMMANDS: CommandDefinition[] = [ clearInput(params) }, }), + // /end-session (freebuff-only) — end the active session early and re-queue. The + // hook flips status from 'active' → 'queued', which unmounts and + // mounts , where the user can pick a different model. + defineCommand({ + name: 'end-session', + handler: (params) => { + params.setMessages((prev) => [ + ...prev, + getUserMessage(params.inputValue.trim()), + getSystemMessage('Ending session and returning to the waiting room…'), + ]) + params.saveToHistory(params.inputValue.trim()) + clearInput(params) + endAndRejoinFreebuffSession().catch(() => { + // The hook surfaces poll errors via the session store; nothing to do + // here beyond letting the chat history reflect the attempt. + }) + }, + }), ] export const COMMAND_REGISTRY: CommandDefinition[] = IS_FREEBUFF diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx new file mode 100644 index 000000000..63099ec1f --- /dev/null +++ b/cli/src/components/freebuff-model-selector.tsx @@ -0,0 +1,130 @@ +import { TextAttributes } from '@opentui/core' +import { useKeyboard } from '@opentui/react' +import React, { useCallback, useMemo, useState } from 'react' + +import { Button } from './button' +import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' + +import { switchFreebuffModel } from '../hooks/use-freebuff-session' +import { useFreebuffModelStore } from '../state/freebuff-model-store' +import { useFreebuffSessionStore } from '../state/freebuff-session-store' +import { useTheme } from '../hooks/use-theme' + +import type { KeyEvent } from '@opentui/core' + +/** + * Lets the user pick which model's queue they're in. Tapping (or pressing the + * row's number key) on a different model triggers a re-POST: the server moves + * them to the back of the new model's queue. + * + * Each row shows a live "N ahead" count sourced from the server's + * `queueDepthByModel` snapshot so the choice is informed (e.g. "3 ahead" vs + * "12 ahead") rather than a blind preference toggle. + */ +export const FreebuffModelSelector: React.FC = () => { + const theme = useTheme() + const selectedModel = useFreebuffModelStore((s) => s.selectedModel) + const session = useFreebuffSessionStore((s) => s.session) + const [pending, setPending] = useState(null) + const [hoveredId, setHoveredId] = useState(null) + + // For the user's current queue, "ahead" is `position - 1` (themselves don't + // count). For every other queue, switching would land them at the back, so + // it's that queue's full depth. Null before the first queued snapshot so + // the UI doesn't flash misleading zeros. + const aheadByModel = useMemo | null>(() => { + if (session?.status !== 'queued') return null + const depths = session.queueDepthByModel ?? {} + const out: Record = {} + for (const { id } of FREEBUFF_MODELS) { + out[id] = + id === session.model ? Math.max(0, session.position - 1) : depths[id] ?? 0 + } + return out + }, [session]) + + const pick = useCallback( + (modelId: string) => { + if (pending) return + if (modelId === selectedModel) return + setPending(modelId) + switchFreebuffModel(modelId).finally(() => setPending(null)) + }, + [pending, selectedModel], + ) + + // Number-key shortcuts (1-9) so keyboard-only users can switch without + // hunting for a clickable region. + useKeyboard( + useCallback( + (key: KeyEvent) => { + if (pending) return + const name = key.name ?? '' + if (!/^[1-9]$/.test(name)) return + const digit = Number(name) + if (digit > FREEBUFF_MODELS.length) return + const target = FREEBUFF_MODELS[digit - 1] + if (target && target.id !== selectedModel) { + key.preventDefault?.() + pick(target.id) + } + }, + [pending, pick, selectedModel], + ), + ) + + return ( + + + Model — tap or press 1-{FREEBUFF_MODELS.length} to switch + + {FREEBUFF_MODELS.map((model, idx) => { + const isSelected = model.id === selectedModel + const isPending = pending === model.id + const isHovered = hoveredId === model.id + const indicator = isSelected ? '●' : '○' + const indicatorColor = isSelected ? theme.primary : theme.muted + const labelColor = isSelected ? theme.foreground : theme.muted + const interactable = !pending && !isSelected + const ahead = aheadByModel?.[model.id] + const hint = + ahead === undefined + ? model.tagline + : ahead === 0 + ? 'No wait' + : `${ahead} ahead` + return ( + + ) + })} + + ) +} diff --git a/cli/src/components/status-bar.tsx b/cli/src/components/status-bar.tsx index 857854b85..e8f29fe26 100644 --- a/cli/src/components/status-bar.tsx +++ b/cli/src/components/status-bar.tsx @@ -1,3 +1,4 @@ +import { getFreebuffModel } from '@codebuff/common/constants/freebuff-models' import { TextAttributes } from '@opentui/core' import React, { useEffect, useState } from 'react' @@ -143,9 +144,14 @@ export const StatusBar = ({ case 'idle': if (sessionProgress !== null) { const isUrgent = sessionProgress.remainingMs < COUNTDOWN_VISIBLE_MS + const modelName = + freebuffSession?.status === 'active' + ? getFreebuffModel(freebuffSession.model).displayName + : null return ( - Free session · {formatSessionRemaining(sessionProgress.remainingMs)} + {modelName ? `${modelName} · ` : ''}Free session ·{' '} + {formatSessionRemaining(sessionProgress.remainingMs)} ) } diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index 08e967d28..5ee240299 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -5,6 +5,7 @@ import React, { useMemo, useState } from 'react' import { AdBanner } from './ad-banner' import { Button } from './button' import { ChoiceAdBanner } from './choice-ad-banner' +import { FreebuffModelSelector } from './freebuff-model-selector' import { ShimmerText } from './shimmer-text' import { useFreebuffCtrlCExit } from '../hooks/use-freebuff-ctrl-c-exit' import { useGravityAd } from '../hooks/use-gravity-ad' @@ -200,6 +201,10 @@ export const WaitingRoomScreen: React.FC = ({ {formatElapsed(elapsedMs)} + + + + )} diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts index bd67811d3..fd2454087 100644 --- a/cli/src/data/slash-commands.ts +++ b/cli/src/data/slash-commands.ts @@ -47,6 +47,7 @@ const FREEBUFF_REMOVED_COMMAND_IDS = new Set([ const FREEBUFF_ONLY_COMMAND_IDS = new Set([ 'connect', 'plan', + 'end-session', ]) const ALL_SLASH_COMMANDS: SlashCommand[] = [ @@ -184,6 +185,11 @@ const ALL_SLASH_COMMANDS: SlashCommand[] = [ label: 'theme:toggle', description: 'Toggle between light and dark mode', }, + { + id: 'end-session', + label: 'end-session', + description: 'End your free session and return to the waiting room (lets you switch model)', + }, { id: 'logout', label: 'logout', diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index 06db946be..077382009 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -1,6 +1,10 @@ import { env } from '@codebuff/common/env' import { useEffect } from 'react' +import { + getSelectedFreebuffModel, + useFreebuffModelStore, +} from '../state/freebuff-model-store' import { useFreebuffSessionStore } from '../state/freebuff-session-store' import { getAuthTokenDetails } from '../utils/auth' import { IS_FREEBUFF } from '../utils/constants' @@ -16,6 +20,9 @@ const POLL_INTERVAL_ERROR_MS = 10_000 * account has rotated the id and respond with `{ status: 'superseded' }`. */ const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id' +/** Header sent on POST telling the server which model's queue to join. */ +const FREEBUFF_MODEL_HEADER = 'x-freebuff-model' + /** Play the terminal bell so users get an audible notification on admission. */ const playAdmissionSound = () => { try { @@ -33,12 +40,15 @@ const sessionEndpoint = (): string => { async function callSession( method: 'POST' | 'GET' | 'DELETE', token: string, - opts: { instanceId?: string; signal?: AbortSignal } = {}, + opts: { instanceId?: string; model?: string; signal?: AbortSignal } = {}, ): Promise { const headers: Record = { Authorization: `Bearer ${token}` } if (method === 'GET' && opts.instanceId) { headers[FREEBUFF_INSTANCE_HEADER] = opts.instanceId } + if (method === 'POST' && opts.model) { + headers[FREEBUFF_MODEL_HEADER] = opts.model + } const resp = await fetch(sessionEndpoint(), { method, headers, @@ -64,6 +74,17 @@ async function callSession( return body } } + // 409 from POST means the user picked a different model than their active + // session is bound to. Surface as a non-throw `model_locked` so the UI can + // show a confirmation prompt (DELETE then re-POST to switch). + if (resp.status === 409 && method === 'POST') { + const body = (await resp.json().catch(() => null)) as + | FreebuffSessionResponse + | null + if (body && body.status === 'model_locked') { + return body + } + } if (!resp.ok) { const text = await resp.text().catch(() => '') throw new Error( @@ -95,6 +116,7 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null { case 'disabled': case 'superseded': case 'country_blocked': + case 'model_locked': return null } } @@ -145,6 +167,41 @@ export async function refreshFreebuffSession(opts: { resetChat?: boolean } = {}) await controller?.refresh() } +/** + * User picked a different model in the waiting room. Persist the choice and + * re-POST so the server moves them to the back of the new model's queue. If + * the server has already admitted them on a different model, it responds + * with `model_locked`; the tick loop silently reverts the local selection to + * the locked model so the active session stays intact. Users who really want + * to switch can /end-session deliberately. + */ +export async function switchFreebuffModel(model: string): Promise { + if (!IS_FREEBUFF) return + const { setSelectedModel } = useFreebuffModelStore.getState() + setSelectedModel(model) + await controller?.refresh() +} + +/** + * End the current session and immediately rejoin the queue. Used by the + * "switch model" confirmation flow when the server returned `model_locked`, + * and by any UI that lets the user exit an active session early. + */ +export async function endAndRejoinFreebuffSession(): Promise { + if (!IS_FREEBUFF) return + const { token } = getAuthTokenDetails() + if (!token) return + try { + await callSession('DELETE', token) + } catch { + // Best-effort — even if DELETE fails the re-POST below will eventually + // succeed once the server-side sweep catches up. + } + const { useChatStore } = await import('../state/chat-store') + useChatStore.getState().reset() + await controller?.refresh() +} + export function markFreebuffSessionSuperseded(): void { if (!IS_FREEBUFF) return controller?.abort() @@ -159,6 +216,21 @@ export function markFreebuffSessionEnded(): void { controller?.apply({ status: 'ended' }) } +/** True when the session row represents a server-side slot the caller is + * holding (queued, active, or in the post-expiry grace window with a live + * instance id). DELETE only matters in those states; otherwise we'd fire a + * spurious request the server has nothing to act on. */ +function shouldReleaseSlot( + current: FreebuffSessionResponse | null, +): boolean { + if (!current) return false + return ( + current.status === 'queued' || + current.status === 'active' || + (current.status === 'ended' && Boolean(current.instanceId)) + ) +} + /** * Best-effort DELETE of the caller's session row. Used by exit paths that * skip React unmount (process.exit on Ctrl+C) so the seat frees up quickly @@ -167,13 +239,7 @@ export function markFreebuffSessionEnded(): void { export async function endFreebuffSessionBestEffort(): Promise { if (!IS_FREEBUFF) return const current = useFreebuffSessionStore.getState().session - if (!current) return - // Only fire DELETE if we actually held a slot. - const heldSlot = - current.status === 'queued' || - current.status === 'active' || - (current.status === 'ended' && Boolean(current.instanceId)) - if (!heldSlot) return + if (!shouldReleaseSlot(current)) return const { token } = getAuthTokenDetails() if (!token) return try { @@ -250,14 +316,27 @@ export function useFreebuffSession(): UseFreebuffSessionResult { // re-POST out from under an in-flight agent. const method: 'POST' | 'GET' = hasPosted ? 'GET' : 'POST' const instanceId = getFreebuffInstanceId() + const model = getSelectedFreebuffModel() try { const next = await callSession(method, token, { signal: abortController.signal, instanceId, + model, }) if (cancelled) return hasPosted = true + // Race recovery: user picked a different model in the waiting room at + // the exact moment the server admitted them with the original model. + // Silently revert the local selection and re-tick so the next call + // (a GET) lands the actual active session. Users who really want to + // switch can /end-session deliberately. + if (next.status === 'model_locked') { + useFreebuffModelStore.getState().setSelectedModel(next.currentModel) + schedule(0) + return + } + if (previousStatus === 'queued' && next.status === 'active') { playAdmissionSound() } @@ -319,12 +398,7 @@ export function useFreebuffSession(): UseFreebuffSessionResult { // Fire-and-forget DELETE. Only release if we actually held a slot so // we don't generate spurious DELETEs (e.g. HMR before POST completes). - if ( - current && - (current.status === 'queued' || - current.status === 'active' || - (current.status === 'ended' && current.instanceId)) - ) { + if (shouldReleaseSlot(current)) { callSession('DELETE', token).catch(() => {}) } setSession(null) diff --git a/cli/src/state/freebuff-model-store.ts b/cli/src/state/freebuff-model-store.ts new file mode 100644 index 000000000..182a38831 --- /dev/null +++ b/cli/src/state/freebuff-model-store.ts @@ -0,0 +1,41 @@ +import { + DEFAULT_FREEBUFF_MODEL_ID, + resolveFreebuffModel, +} from '@codebuff/common/constants/freebuff-models' +import { create } from 'zustand' + +import { + loadFreebuffModelPreference, + saveFreebuffModelPreference, +} from '../utils/settings' + +/** + * Holds the user's currently-selected freebuff model. Initialized from the + * persisted settings file so freebuff defaults to whatever model the user + * last picked. Writing through `setSelectedModel` also persists to disk so + * the next launch picks it up without an explicit save call. + * + * Components in the waiting room read this to highlight the current row in + * the model picker; the session hook reads it to decide which queue to join. + */ +interface FreebuffModelStore { + selectedModel: string + setSelectedModel: (model: string) => void +} + +export const useFreebuffModelStore = create((set) => ({ + selectedModel: resolveFreebuffModel( + loadFreebuffModelPreference() ?? DEFAULT_FREEBUFF_MODEL_ID, + ), + setSelectedModel: (model) => { + const resolved = resolveFreebuffModel(model) + saveFreebuffModelPreference(resolved) + set({ selectedModel: resolved }) + }, +})) + +/** Imperative read for non-React callers (the session hook's tick loop and + * the chat-completions metadata builder). */ +export function getSelectedFreebuffModel(): string { + return useFreebuffModelStore.getState().selectedModel +} diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts index 203a9f7a9..59206eb84 100644 --- a/cli/src/utils/local-agent-registry.ts +++ b/cli/src/utils/local-agent-registry.ts @@ -7,11 +7,26 @@ import { loadLocalAgents as sdkLoadLocalAgents, loadMCPConfigSync } from '@codeb import type { MCPConfig } from '@codebuff/common/types/mcp' +import { FREE_MODE_AGENT_MODELS } from '@codebuff/common/constants/free-agents' +import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' + +import { getSelectedFreebuffModel } from '../state/freebuff-model-store' import { getProjectRoot } from '../project-files' -import { AGENT_MODE_TO_ID, type AgentMode } from './constants' +import { AGENT_MODE_TO_ID, IS_FREEBUFF, type AgentMode } from './constants' import { logger } from './logger' import * as bundledAgentsModule from '../agents/bundled-agents.generated' +/** Agents whose hardcoded model gets swapped out for the user's currently + * selected freebuff model. Derived from the server's + * `FREE_MODE_AGENT_MODELS` — any agent whose allowlist contains every + * freebuff model is safe to retarget client-side without tripping the + * server's `free_mode_invalid_agent_model` rejection. */ +const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS: ReadonlySet = new Set( + Object.entries(FREE_MODE_AGENT_MODELS) + .filter(([, allowed]) => FREEBUFF_MODELS.every((m) => allowed.has(m.id))) + .map(([agentId]) => agentId), +) + import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition' // ============================================================================ @@ -354,6 +369,20 @@ export const loadAgentDefinitions = (): AgentDefinition[] => { } } + // Override the model of free-mode agents to match the user's pick from the + // freebuff waiting room. Bundled definitions hardcode glm-5.1; we swap in + // whatever the user chose so the chat-completions request body carries the + // matching model and the server-side session gate doesn't reject it as a + // model mismatch. + if (IS_FREEBUFF) { + const selectedModel = getSelectedFreebuffModel() + for (const def of definitions) { + if (FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS.has(def.id)) { + def.model = selectedModel + } + } + } + return definitions } diff --git a/cli/src/utils/settings.ts b/cli/src/utils/settings.ts index c469ae273..5dc901e69 100644 --- a/cli/src/utils/settings.ts +++ b/cli/src/utils/settings.ts @@ -1,6 +1,8 @@ import fs from 'fs' import path from 'path' +import { isFreebuffModelId } from '@codebuff/common/constants/freebuff-models' + import { getConfigDir } from './auth' import { AGENT_MODES } from './constants' import { logger } from './logger' @@ -20,6 +22,10 @@ const DEFAULT_SETTINGS: Settings = { export interface Settings { mode?: AgentMode adsEnabled?: boolean + /** Last model the user picked in the freebuff model selector. Restored on + * next freebuff launch so users land in the queue for their preferred + * model without re-picking. Persisted as the canonical model id. */ + freebuffModel?: string /** @deprecated Use server-side fallbackToALaCarte setting instead */ alwaysUseALaCarte?: boolean /** @deprecated Use server-side fallbackToALaCarte setting instead */ @@ -96,6 +102,12 @@ const validateSettings = (parsed: unknown): Settings => { settings.adsEnabled = obj.adsEnabled } + // Validate freebuffModel — drop unknown ids so a removed model doesn't + // strand the user on a non-existent queue. + if (typeof obj.freebuffModel === 'string' && isFreebuffModelId(obj.freebuffModel)) { + settings.freebuffModel = obj.freebuffModel + } + // Validate alwaysUseALaCarte (legacy) if (typeof obj.alwaysUseALaCarte === 'boolean') { settings.alwaysUseALaCarte = obj.alwaysUseALaCarte @@ -149,3 +161,19 @@ export const saveModePreference = (mode: AgentMode): void => { saveSettings({ mode }) } +/** + * Load the saved freebuff model preference. Returns undefined if none is + * saved yet — callers should fall back to DEFAULT_FREEBUFF_MODEL_ID. + */ +export const loadFreebuffModelPreference = (): string | undefined => { + return loadSettings().freebuffModel +} + +/** + * Save the freebuff model preference. Called whenever the user picks a model + * in the waiting room so the next launch defaults to it. + */ +export const saveFreebuffModelPreference = (model: string): void => { + saveSettings({ freebuffModel: model }) +} + diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts new file mode 100644 index 000000000..d71ebd619 --- /dev/null +++ b/common/src/constants/freebuff-models.ts @@ -0,0 +1,53 @@ +/** + * Models a freebuff user can pick between in the waiting-room model selector. + * + * Each model has its own queue (server keys queue position by `model`), so the + * list here is effectively the set of separate waiting lines. Order is the + * order shown in the UI. + */ +export interface FreebuffModelOption { + /** Stable ID used in the wire protocol and DB. Matches the model id passed + * to the chat-completions endpoint. */ + id: string + /** Short label for the selector UI. */ + displayName: string + /** One-line description shown next to the label. */ + tagline: string +} + +export const FREEBUFF_MODELS = [ + { + id: 'z-ai/glm-5.1', + displayName: 'GLM 5.1', + tagline: 'Smartest', + }, + { + id: 'minimax/minimax-m2.7', + displayName: 'MiniMax M2.7', + tagline: 'Fastest', + }, +] as const satisfies readonly FreebuffModelOption[] + +export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id'] + +export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_MODELS[0].id + +export function isFreebuffModelId( + id: string | null | undefined, +): id is FreebuffModelId { + if (!id) return false + return FREEBUFF_MODELS.some((m) => m.id === id) +} + +export function resolveFreebuffModel( + id: string | null | undefined, +): FreebuffModelId { + return isFreebuffModelId(id) ? id : DEFAULT_FREEBUFF_MODEL_ID +} + +export function getFreebuffModel(id: string): FreebuffModelOption { + return ( + FREEBUFF_MODELS.find((m) => m.id === id) ?? + FREEBUFF_MODELS.find((m) => m.id === DEFAULT_FREEBUFF_MODEL_ID)! + ) +} diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts index b2a6dabff..bb8936b41 100644 --- a/common/src/types/freebuff-session.ts +++ b/common/src/types/freebuff-session.ts @@ -21,15 +21,24 @@ export type FreebuffSessionServerResponse = | { status: 'queued' instanceId: string - /** 1-indexed position in the FIFO queue. */ + /** Model the user is queued for. Each model has its own queue. */ + model: string + /** 1-indexed position in the queue for `model`. */ position: number queueDepth: number + /** Current depth of every model's queue, so the CLI can show a live + * "N ahead" hint on each row of the model selector. Models with no + * queued rows at snapshot time may be absent; the CLI should treat a + * missing entry as 0. */ + queueDepthByModel: Record estimatedWaitMs: number queuedAt: string } | { status: 'active' instanceId: string + /** Model the active session is bound to — cannot change mid-session. */ + model: string admittedAt: string expiresAt: string remainingMs: number @@ -68,3 +77,13 @@ export type FreebuffSessionServerResponse = status: 'country_blocked' countryCode: string } + | { + /** User has an active session bound to a different model. Returned + * from POST /session when they pick a new model without ending their + * current session first. The CLI shows a confirmation prompt: "End + * your active GLM session to switch?" → on confirm, DELETE then + * re-POST with the new model. */ + status: 'model_locked' + currentModel: string + requestedModel: string + } diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 604046715..b1384d7b6 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -2,13 +2,13 @@ ## Overview -The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployment. It has three jobs: +The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs: -1. **Drip-admit users** — admit at a steady trickle (default 1 per `ADMISSION_TICK_MS`, currently 15s) so load ramps up gradually rather than stampeding the deployment when the queue is long. -2. **Gate on upstream health** — before each admission tick, probe the Fireworks metrics endpoint with a short timeout (`isFireworksAdmissible` in `web/src/server/free-session/admission.ts`). If it doesn't respond OK, admission halts until it does — this is the primary concurrency control, not a static cap. +1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones. +2. **Gate on per-deployment health** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` admit that tick; a degraded minimax-m2.7 no longer stalls glm-5.1 admissions. 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput. -Users who cannot be admitted immediately are placed in a FIFO queue and given an estimated wait time. Admitted users get a fixed-length session (default 1h) during which they can make free-mode requests subject to the existing per-user rate limits. +Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session. The entire system is gated by the env flag `FREEBUFF_WAITING_ROOM_ENABLED`. When `false`, the gate is a no-op and the admission ticker does not start; free-mode traffic flows through unchanged. @@ -33,28 +33,30 @@ flowchart LR SessionAPI["/api/v1/freebuff/session
(GET, POST, DELETE)"] ChatAPI["/api/v1/chat/completions"] Gate[checkSessionAdmissible] - Ticker[Admission Ticker
every 5s, 1 pod] + Ticker["Admission Ticker
every ADMISSION_TICK_MS
(all pods, per-model locks)"] Store[(free_session
Postgres)] - Probe[isFireworksAdmissible
Fireworks metrics GET] + Probe["getFleetHealth
Fireworks metrics GET
(cached ~25s)"] - CLI -- "POST on startup
(gets instance_id)" --> SessionAPI + CLI -- "POST on startup
(model + gets instance_id)" --> SessionAPI CLI -- "GET to poll state" --> SessionAPI CLI -- "chat requests
include instance_id" --> ChatAPI SessionAPI --> Store ChatAPI --> Gate Gate --> Store - Ticker --> Store + Ticker -- "per-model admit" --> Store Ticker --> Probe ``` ### Components -- **`free_session` table** (Postgres) — single source of truth for queue + active-session state. One row per user (PK on `user_id`). -- **Public API** (`web/src/server/free-session/public-api.ts`) — `requestSession`, `getSessionState`, `endUserSession`, `checkSessionAdmissible`. Pure business logic; DI-friendly. -- **Store** (`web/src/server/free-session/store.ts`) — all DB ops. Transaction boundaries and advisory locks live here. -- **Admission ticker** (`web/src/server/free-session/admission.ts`) — self-scheduling timer that runs every 5s, sweeps expired rows, and admits queued users up to capacity. +- **`free_session` table** (Postgres) — single source of truth for queue + active-session state. One row per user (PK on `user_id`), with a `model` column recording which queue the row belongs to. +- **Model registry** (`common/src/constants/freebuff-models.ts`) — `FREEBUFF_MODELS` is the authoritative list of selectable models. Adding a new freebuff model means adding an entry here; the admission ticker iterates this list every tick. +- **Public API** (`web/src/server/free-session/public-api.ts`) — `requestSession`, `getSessionState`, `endUserSession`, `checkSessionAdmissible`. Pure business logic; DI-friendly. `requestSession` accepts the user's chosen `model` and can return `model_locked` when a session is already active on a different model. +- **Store** (`web/src/server/free-session/store.ts`) — all DB ops. Transaction boundaries and per-model advisory locks live here. +- **Fleet health probe** (`web/src/server/free-session/fireworks-health.ts`) — `getFleetHealth()` does a single HTTP GET against the Fireworks metrics endpoint and returns a `Record`. Cached ~25s (under the Fireworks 30s exporter cadence and 6 req/min rate limit). Models without a dedicated deployment in `FIREWORKS_DEPLOYMENT_MAP` (e.g. serverless) are absent from the map and treated as `healthy` at call sites. +- **Admission ticker** (`web/src/server/free-session/admission.ts`) — self-scheduling timer that runs every `ADMISSION_TICK_MS`. Each tick sweeps expired rows once, resolves fleet health once, then admits one queued user per model in parallel (each guarded by a model-keyed advisory lock). - **HTTP routes** (`web/src/app/api/v1/freebuff/session/`) — thin wrappers that resolve the API key → `userId` and delegate to the public API. -- **Chat-completions gate** (`web/src/app/api/v1/chat/completions/_post.ts`) — for free-mode requests, calls `checkSessionAdmissible(userId, claimedInstanceId)` after the rate-limit check and rejects non-admissible requests with a structured error. +- **Chat-completions gate** (`web/src/app/api/v1/chat/completions/_post.ts`) — for free-mode requests, calls `checkSessionAdmissible(userId, claimedInstanceId)` after the rate-limit check and rejects non-admissible requests with a structured error. The admitted session's `model` is what gets sent to the upstream. ## Database Schema @@ -65,6 +67,7 @@ CREATE TABLE free_session ( user_id text PRIMARY KEY REFERENCES "user"(id) ON DELETE CASCADE, status free_session_status NOT NULL, active_instance_id text NOT NULL, + model text NOT NULL, queued_at timestamptz NOT NULL DEFAULT now(), admitted_at timestamptz, expires_at timestamptz, @@ -72,16 +75,18 @@ CREATE TABLE free_session ( updated_at timestamptz NOT NULL DEFAULT now() ); -CREATE INDEX idx_free_session_queue ON free_session (status, queued_at); +-- Per-model dequeue: WHERE status='queued' AND model=$1 ORDER BY queued_at +CREATE INDEX idx_free_session_queue ON free_session (status, model, queued_at); CREATE INDEX idx_free_session_expiry ON free_session (expires_at); ``` -Migration: `packages/internal/src/db/migrations/0043_vengeful_boomer.sql`. +Migrations: `packages/internal/src/db/migrations/0043_vengeful_boomer.sql` (initial table) and `0044_violet_stingray.sql` (added the `model` column and rebuilt the queue index). **Design notes** - **PK on `user_id`** is the structural enforcement of "one session per account". No app-logic race can produce two rows for one user. - **`active_instance_id`** rotates on every `POST /session` call. This is how we enforce one-CLI-at-a-time (see [Single-instance enforcement](#single-instance-enforcement)). +- **`model` column.** Populated by the POST handler; determines which queue the row belongs to while queued and is fixed for the life of an active session. Switching models while an active session is live is rejected (`model_locked`, 409). - **All timestamps server-supplied.** The client never sends `queued_at`, `admitted_at`, or `expires_at` — they are either `DEFAULT now()` or computed server-side during admission. - **FK CASCADE on user delete** keeps the table clean without a background job. @@ -127,18 +132,26 @@ The rotation is important: it happens even if the caller is already in the `acti ## Admission Loop -One pod runs the admission loop at a time, coordinated via Postgres advisory lock. All pods start a ticker on boot, but each tick acquires `pg_try_advisory_xact_lock(FREEBUFF_ADMISSION_LOCK_ID)` inside a transaction; if already held, the tick is a no-op on that pod. The lock is automatically released when the transaction commits. +All pods start a ticker on boot. Coordination is by **per-model** Postgres advisory locks: the lock id is `FREEBUFF_ADMISSION_LOCK_ID + hashStringToInt32(model)`, so different models can admit concurrently across pods while a single model is still serialized. Each per-model attempt takes the lock inside a transaction via `pg_try_advisory_xact_lock`; if the lock is held by another pod, that model is a no-op on this pod for this tick. The lock is released automatically when the transaction commits. Each tick does (in order): -1. **Sweep expired.** `DELETE FROM free_session WHERE status='active' AND expires_at < now() - grace`. Runs regardless of upstream health so zombie sessions are cleaned up even during an outage. -2. **Admit.** `admitFromQueue()` first calls `isFireworksAdmissible()` (short-timeout GET against the Fireworks metrics endpoint). If the probe fails, returns `{ skipped: 'health' }` — admission pauses and the queue grows until recovery. Otherwise opens a transaction, takes `pg_try_advisory_xact_lock(FREEBUFF_ADMISSION_LOCK_ID)`, and `SELECT ... WHERE status='queued' ORDER BY queued_at, user_id LIMIT 1 FOR UPDATE SKIP LOCKED` → `UPDATE` the row to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. One admit per tick keeps Fireworks from a thundering herd of newly-admitted CLIs. +1. **Sweep expired.** `DELETE FROM free_session WHERE status='active' AND expires_at < now() - grace`. Runs once per tick regardless of upstream health so zombie sessions are cleaned up even during an outage. +2. **Fleet health probe.** `getFleetHealth()` returns a `Record`. One HTTP call per tick (cached ~25s across pods) covers every model. Deployment absent from the fleet map (serverless) defaults to `healthy` at the call site. +3. **Admit per model, in parallel.** For each model in `FREEBUFF_MODELS`, call `admitFromQueue({ model, health, sessionLengthMs, now })`: + - If `health !== 'healthy'`, returns `{ admitted: [], skipped: health }` without touching Postgres — the model's queue pauses and grows until recovery. + - Otherwise opens a transaction, takes the per-model advisory lock, and `SELECT ... WHERE status='queued' AND model=$1 ORDER BY queued_at, user_id LIMIT 1 FOR UPDATE SKIP LOCKED` → `UPDATE` the row to `status='active'` with `admitted_at=now()`, `expires_at=now()+sessionLength`. One admit per model per tick keeps Fireworks from a thundering herd of newly-admitted CLIs. + +The final tick result carries a `queueDepthByModel` map and a single `skipped` reason (the first non-null skip across models) for observability. ### Tunables | Constant | Location | Default | Purpose | |---|---|---|---| -| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. One user is admitted per tick. | +| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. | +| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `glm-5.1`, `minimax-m2.7` | Selectable models; each gets its own queue and admission slot. | +| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | glm-5.1 only | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. | +| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. | | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | | `FREEBUFF_SESSION_GRACE_MS` | env | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | @@ -148,12 +161,14 @@ All endpoints authenticate via the standard `Authorization: Bearer ` or ### `POST /api/v1/freebuff/session` -**Called by the CLI on startup.** Idempotent. Semantics: +**Called by the CLI on startup and whenever the user picks a different model in the waiting room.** Body: `{ "model": "" }` (optional; falls back to the default model if omitted or unknown). Idempotent. Semantics: -- No existing row → create with `status='queued'`, fresh `active_instance_id`, `queued_at=now()`. -- Existing queued row → rotate `active_instance_id`, preserve `queued_at` (no queue jump). -- Existing active+unexpired row → rotate `active_instance_id`, preserve `status`/`admitted_at`/`expires_at`. -- Existing active+expired row → reset to queued with fresh `queued_at` (re-queue at back). +- No existing row → create with `status='queued'`, `model` = requested, fresh `active_instance_id`, `queued_at=now()`. +- Existing queued row, **same model** → rotate `active_instance_id`, preserve `queued_at` (no queue jump). +- Existing queued row, **different model** → switch `model` and reset `queued_at=now()` (move to back of the new model's queue). Rotating `active_instance_id`. +- Existing active+unexpired row, **same model** → rotate `active_instance_id`, preserve `status`/`admitted_at`/`expires_at`. +- Existing active+unexpired row, **different model** → reject with `model_locked` (HTTP 409); `active_instance_id` is **not** rotated so the other CLI stays valid. Client must DELETE the session before switching. +- Existing active+expired row → reset to queued with fresh `queued_at` and the requested `model` (re-queue at back). Response shapes: @@ -165,9 +180,14 @@ Response shapes: { "status": "queued", "instanceId": "e47…", - "position": 17, // 1-indexed - "queueDepth": 43, - "estimatedWaitMs": 3600000, + "model": "z-ai/glm-5.1", + "position": 17, // 1-indexed within this model's queue + "queueDepth": 43, // size of this model's queue + "queueDepthByModel": { // snapshot of every model's queue — powers the + "z-ai/glm-5.1": 43, // "N ahead" hint in the selector. Missing + "minimax/minimax-m2.7": 4 // entries should be treated as 0. + }, + "estimatedWaitMs": 384000, "queuedAt": "2026-04-17T12:00:00Z" } @@ -175,6 +195,7 @@ Response shapes: { "status": "active", "instanceId": "e47…", + "model": "z-ai/glm-5.1", "admittedAt": "2026-04-17T12:00:00Z", "expiresAt": "2026-04-17T13:00:00Z", "remainingMs": 3600000 @@ -192,6 +213,15 @@ Response shapes: "gracePeriodEndsAt": "2026-04-17T13:30:00Z", "gracePeriodRemainingMs": 1800000 } + +// POST only: user asked for a different model while an active session is +// bound to `currentModel`. HTTP 409. CLI must DELETE /session and re-POST +// to actually switch. +{ + "status": "model_locked", + "currentModel": "z-ai/glm-5.1", + "requestedModel": "minimax/minimax-m2.7" +} ``` ### `GET /api/v1/freebuff/session` @@ -246,29 +276,30 @@ This is a **trust-the-client** design: the server still admits requests during t ## Estimated Wait Time -Computed in `session-view.ts` as a rough one-minute-per-spot-ahead estimate: +Computed in `session-view.ts` (`WAIT_MS_PER_SPOT_AHEAD = 24_000`) as a rough per-spot estimate within the user's own model queue: ``` -waitMs = (position - 1) * 60_000 +waitMs = (position - 1) * 24_000 ``` - Position 1 → 0 (next tick admits you) -- Position 2 → one minute, and so on. +- Position 2 → 24s, and so on. -This estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses (during a Fireworks incident admission halts entirely), so the real wait can be longer or shorter. +`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses (during a per-deployment Fireworks incident only the affected model's queue stalls; healthy models keep draining), so the real wait can be longer or shorter. ## CLI Integration (frontend-side contract) The CLI: -1. **On startup**, calls `POST /api/v1/freebuff/session`. Stores `instanceId` in memory (not on disk — startup must re-admit). -2. **Loops while `status === 'queued'`:** polls `GET /api/v1/freebuff/session` (with `X-Freebuff-Instance-Id`) every ~5s and renders `position / queueDepth / estimatedWaitMs`. -3. **When `status === 'active'`**, renders `remainingMs` as a countdown. Re-polls GET every ~30s to stay honest with server-side state. -4. **When `status === 'ended'`** (the server-side draining/grace shape, with `instanceId`), hides the input and shows the Enter-to-rejoin banner while still forwarding the instance id on outgoing chat requests so in-flight agent work can finish. -5. **When `status === 'superseded'`**, stops polling and shows the "close the other CLI" screen. -6. **On every chat request**, includes `codebuff_metadata.freebuff_instance_id: `. -7. **Handles chat-gate errors:** the same statuses are reachable via the gate's 409/410/428/429 for fast in-flight feedback, and the CLI calls the matching `markFreebuff*` helper to flip local state without waiting for the next poll. -8. **On clean exit**, calls `DELETE /api/v1/freebuff/session` so the next user can be admitted sooner. +1. **On startup**, calls `POST /api/v1/freebuff/session` with the user's persisted model choice. Stores `instanceId` in memory (not on disk — startup must re-admit). +2. **Loops while `status === 'queued'`:** polls `GET /api/v1/freebuff/session` (with `X-Freebuff-Instance-Id`) every ~5s and renders `position / queueDepth / estimatedWaitMs` alongside the selected model. +3. **Model switch from the waiting room** → re-POSTs with the new model id. Server moves the row to the back of the new model's queue. If the server responds `model_locked` (we already got admitted on the old model in the meantime), the tick loop silently reverts the local selection to the locked model rather than interrupting the active session — users who really want to switch can `/end-session` deliberately. +4. **When `status === 'active'`**, renders `remainingMs` as a countdown. Re-polls GET every ~30s to stay honest with server-side state. Chat completions use the admitted session's model for the rest of the session. +5. **When `status === 'ended'`** (the server-side draining/grace shape, with `instanceId`), hides the input and shows the Enter-to-rejoin banner while still forwarding the instance id on outgoing chat requests so in-flight agent work can finish. +6. **When `status === 'superseded'`**, stops polling and shows the "close the other CLI" screen. +7. **On every chat request**, includes `codebuff_metadata.freebuff_instance_id: `. +8. **Handles chat-gate errors:** the same statuses are reachable via the gate's 409/410/428/429 for fast in-flight feedback, and the CLI calls the matching `markFreebuff*` helper to flip local state without waiting for the next poll. +9. **On clean exit**, calls `DELETE /api/v1/freebuff/session` so the next user can be admitted sooner. The `disabled` response means the server has the waiting room turned off. CLI treats it identically to `active` with infinite remaining time — no countdown, and chat requests can omit `freebuff_instance_id` entirely. @@ -276,7 +307,8 @@ The `disabled` response means the server has the waiting room turned off. CLI tr - **`/api/v1/freebuff/session` routes** are stateless per pod; all state lives in Postgres. Any pod can serve any request. - **Chat completions gate** is a single `SELECT` per free-mode request. At high QPS this is the hottest path — the `user_id` PK lookup is O(1). If it ever becomes a problem, the obvious fix is to cache the session row for ~1s per pod. -- **Admission loop** runs on every pod but is serialized by `pg_try_advisory_xact_lock`. At any given tick, exactly one pod actually admits; the rest early-return. +- **Admission loop** runs on every pod. Per-model advisory locks serialize admission *within* each model while allowing different models to admit on different pods concurrently. At any given tick, exactly one pod actually admits for each model; the rest early-return on that model's lock. +- **Fleet health probe** is cached per-pod (`HEALTH_CACHE_TTL_MS`, 25s). Each pod hits the Fireworks metrics endpoint at most ~2.4/min, staying under the 6 req/min account rate limit with a comfortable margin. ## Abuse Resistance Summary @@ -288,9 +320,11 @@ The `disabled` response means the server has the waiting room turned off. CLI tr | Client-forged timestamps | All timestamps server-supplied (`DEFAULT now()` or explicit) | | Queue jumping via timestamp manipulation | `queued_at` is server-supplied; FIFO order is server-determined | | Repeatedly calling POST to reset queue position | POST preserves `queued_at` for already-queued users | -| Two pods admitting the same user | `SELECT ... FOR UPDATE SKIP LOCKED` + advisory xact lock | -| Spamming POST/GET to starve admission tick | Admission uses Postgres advisory lock; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | -| Fireworks metrics endpoint down / slow | `isFireworksAdmissible()` fails closed (timeout or non-OK) → admission pauses, queue grows | +| Two pods admitting the same user | Per-model `SELECT ... FOR UPDATE SKIP LOCKED` + per-model advisory xact lock | +| Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | +| Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. | +| Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. | +| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded minimax-m2.7 doesn't block glm-5.1 admissions. | | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy | ## Testing @@ -298,8 +332,9 @@ The `disabled` response means the server has the waiting room turned off. CLI tr Pure logic covered by `web/src/server/free-session/__tests__/*.test.ts`: - `session-view.test.ts` — wait-time estimation, row→response mapping -- `public-api.test.ts` — all status transitions via in-memory DI store -- `admission.test.ts` — tick behaviour with mocked store + health checks +- `public-api.test.ts` — all status transitions via in-memory DI store (including `model_locked` and cross-model switching) +- `admission.test.ts` — tick behaviour with mocked store + per-model health (healthy/degraded/unhealthy, absent-entry-defaults-to-healthy for serverless models) +- `fireworks-health.test.ts` — `classifyOne` decision table: KV-blocks thresholds, 5xx fraction, prefill queue p90 histogram, per-deployment independence Handler tests in `web/src/app/api/v1/freebuff/session/__tests__/session.test.ts` cover auth + request routing with a mocked `SessionDeps`. diff --git a/packages/internal/src/db/migrations/0044_violet_stingray.sql b/packages/internal/src/db/migrations/0044_violet_stingray.sql new file mode 100644 index 000000000..e6942d1d9 --- /dev/null +++ b/packages/internal/src/db/migrations/0044_violet_stingray.sql @@ -0,0 +1,7 @@ +DROP INDEX "idx_free_session_queue";--> statement-breakpoint +-- Backfill any in-flight rows with the previous sole free-mode model. The +-- column is supposed to be required going forward, so we set a temporary +-- default to ride out the migration and drop it immediately after. +ALTER TABLE "free_session" ADD COLUMN "model" text NOT NULL DEFAULT 'z-ai/glm-5.1';--> statement-breakpoint +ALTER TABLE "free_session" ALTER COLUMN "model" DROP DEFAULT;--> statement-breakpoint +CREATE INDEX "idx_free_session_queue" ON "free_session" USING btree ("status","model","queued_at"); \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/0044_snapshot.json b/packages/internal/src/db/migrations/meta/0044_snapshot.json new file mode 100644 index 000000000..847f32bba --- /dev/null +++ b/packages/internal/src/db/migrations/meta/0044_snapshot.json @@ -0,0 +1,3214 @@ +{ + "id": "108f2bd2-7ddc-4c15-b351-28f2b55d5348", + "prevId": "7c9172ed-5f73-4bf8-93cc-2c7e6d82a9ad", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.account": { + "name": "account", + "schema": "", + "columns": { + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "providerAccountId": { + "name": "providerAccountId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "refresh_token": { + "name": "refresh_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_token": { + "name": "access_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "token_type": { + "name": "token_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "id_token": { + "name": "id_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "session_state": { + "name": "session_state", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "account_userId_user_id_fk": { + "name": "account_userId_user_id_fk", + "tableFrom": "account", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "account_provider_providerAccountId_pk": { + "name": "account_provider_providerAccountId_pk", + "columns": [ + "provider", + "providerAccountId" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ad_impression": { + "name": "ad_impression", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ad_text": { + "name": "ad_text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "cta": { + "name": "cta", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "favicon": { + "name": "favicon", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "click_url": { + "name": "click_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "imp_url": { + "name": "imp_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "payout": { + "name": "payout", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true + }, + "credits_granted": { + "name": "credits_granted", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "grant_operation_id": { + "name": "grant_operation_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "served_at": { + "name": "served_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "impression_fired_at": { + "name": "impression_fired_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "clicked_at": { + "name": "clicked_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_ad_impression_user": { + "name": "idx_ad_impression_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "served_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ad_impression_imp_url": { + "name": "idx_ad_impression_imp_url", + "columns": [ + { + "expression": "imp_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "ad_impression_user_id_user_id_fk": { + "name": "ad_impression_user_id_user_id_fk", + "tableFrom": "ad_impression", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "ad_impression_imp_url_unique": { + "name": "ad_impression_imp_url_unique", + "nullsNotDistinct": false, + "columns": [ + "imp_url" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_config": { + "name": "agent_config", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "version": { + "name": "version", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "major": { + "name": "major", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)", + "type": "stored" + } + }, + "minor": { + "name": "minor", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)", + "type": "stored" + } + }, + "patch": { + "name": "patch", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)", + "type": "stored" + } + }, + "data": { + "name": "data", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_agent_config_publisher": { + "name": "idx_agent_config_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_config_publisher_id_publisher_id_fk": { + "name": "agent_config_publisher_id_publisher_id_fk", + "tableFrom": "agent_config", + "tableTo": "publisher", + "columnsFrom": [ + "publisher_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "agent_config_publisher_id_id_version_pk": { + "name": "agent_config_publisher_id_id_version_pk", + "columns": [ + "publisher_id", + "id", + "version" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_run": { + "name": "agent_run", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '/', 1)\n ELSE NULL\n END", + "type": "stored" + } + }, + "agent_name": { + "name": "agent_name", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n ELSE agent_id\n END", + "type": "stored" + } + }, + "agent_version": { + "name": "agent_version", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '@', 2)\n ELSE NULL\n END", + "type": "stored" + } + }, + "ancestor_run_ids": { + "name": "ancestor_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "root_run_id": { + "name": "root_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END", + "type": "stored" + } + }, + "parent_run_id": { + "name": "parent_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END", + "type": "stored" + } + }, + "depth": { + "name": "depth", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)", + "type": "stored" + } + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "total_steps": { + "name": "total_steps", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "direct_credits": { + "name": "direct_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "total_credits": { + "name": "total_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "status": { + "name": "status", + "type": "agent_run_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'running'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_agent_run_user_id": { + "name": "idx_agent_run_user_id", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_parent": { + "name": "idx_agent_run_parent", + "columns": [ + { + "expression": "parent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_root": { + "name": "idx_agent_run_root", + "columns": [ + { + "expression": "root_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_agent_id": { + "name": "idx_agent_run_agent_id", + "columns": [ + { + "expression": "agent_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_publisher": { + "name": "idx_agent_run_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_status": { + "name": "idx_agent_run_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'running'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_ancestors_gin": { + "name": "idx_agent_run_ancestors_gin", + "columns": [ + { + "expression": "ancestor_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_agent_run_completed_publisher_agent": { + "name": "idx_agent_run_completed_publisher_agent", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_recent": { + "name": "idx_agent_run_completed_recent", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_version": { + "name": "idx_agent_run_completed_version", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_version", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_user": { + "name": "idx_agent_run_completed_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_run_user_id_user_id_fk": { + "name": "agent_run_user_id_user_id_fk", + "tableFrom": "agent_run", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_step": { + "name": "agent_step", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "agent_run_id": { + "name": "agent_run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "step_number": { + "name": "step_number", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "credits": { + "name": "credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true, + "default": "'0'" + }, + "child_run_ids": { + "name": "child_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "spawned_count": { + "name": "spawned_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "array_length(child_run_ids, 1)", + "type": "stored" + } + }, + "message_id": { + "name": "message_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "agent_step_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'completed'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "unique_step_number_per_run": { + "name": "unique_step_number_per_run", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "step_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_run_id": { + "name": "idx_agent_step_run_id", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_children_gin": { + "name": "idx_agent_step_children_gin", + "columns": [ + { + "expression": "child_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": { + "agent_step_agent_run_id_agent_run_id_fk": { + "name": "agent_step_agent_run_id_agent_run_id_fk", + "tableFrom": "agent_step", + "tableTo": "agent_run", + "columnsFrom": [ + "agent_run_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.credit_ledger": { + "name": "credit_ledger", + "schema": "", + "columns": { + "operation_id": { + "name": "operation_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "principal": { + "name": "principal", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "balance": { + "name": "balance", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "grant_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_credit_ledger_active_balance": { + "name": "idx_credit_ledger_active_balance", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "balance", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "priority", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_org": { + "name": "idx_credit_ledger_org", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_subscription": { + "name": "idx_credit_ledger_subscription", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "credit_ledger_user_id_user_id_fk": { + "name": "credit_ledger_user_id_user_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "credit_ledger_org_id_org_id_fk": { + "name": "credit_ledger_org_id_org_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.encrypted_api_keys": { + "name": "encrypted_api_keys", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "api_key_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "api_key": { + "name": "api_key", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "encrypted_api_keys_user_id_user_id_fk": { + "name": "encrypted_api_keys_user_id_user_id_fk", + "tableFrom": "encrypted_api_keys", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "encrypted_api_keys_user_id_type_pk": { + "name": "encrypted_api_keys_user_id_type_pk", + "columns": [ + "user_id", + "type" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.fingerprint": { + "name": "fingerprint", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "sig_hash": { + "name": "sig_hash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.free_session": { + "name": "free_session", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "status": { + "name": "status", + "type": "free_session_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "active_instance_id": { + "name": "active_instance_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "queued_at": { + "name": "queued_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "admitted_at": { + "name": "admitted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_free_session_queue": { + "name": "idx_free_session_queue", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "model", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "queued_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_free_session_expiry": { + "name": "idx_free_session_expiry", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "free_session_user_id_user_id_fk": { + "name": "free_session_user_id_user_id_fk", + "tableFrom": "free_session", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.git_eval_results": { + "name": "git_eval_results", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "cost_mode": { + "name": "cost_mode", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reasoner_model": { + "name": "reasoner_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_model": { + "name": "agent_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "cost": { + "name": "cost", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "is_public": { + "name": "is_public", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.limit_override": { + "name": "limit_override", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "credits_per_block": { + "name": "credits_per_block", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "block_duration_hours": { + "name": "block_duration_hours", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "weekly_credit_limit": { + "name": "weekly_credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "limit_override_user_id_user_id_fk": { + "name": "limit_override_user_id_user_id_fk", + "tableFrom": "limit_override", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.message": { + "name": "message", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "client_id": { + "name": "client_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "client_request_id": { + "name": "client_request_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "request": { + "name": "request", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "last_message": { + "name": "last_message", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "\"message\".\"request\" -> -1", + "type": "stored" + } + }, + "reasoning_text": { + "name": "reasoning_text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "response": { + "name": "response", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "input_tokens": { + "name": "input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "cache_creation_input_tokens": { + "name": "cache_creation_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "cache_read_input_tokens": { + "name": "cache_read_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "reasoning_tokens": { + "name": "reasoning_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "output_tokens": { + "name": "output_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cost": { + "name": "cost", + "type": "numeric(100, 20)", + "primaryKey": false, + "notNull": true + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "byok": { + "name": "byok", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "latency_ms": { + "name": "latency_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "ttft_ms": { + "name": "ttft_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "message_user_id_idx": { + "name": "message_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_finished_at_user_id_idx": { + "name": "message_finished_at_user_id_idx", + "columns": [ + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_idx": { + "name": "message_org_id_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_finished_at_idx": { + "name": "message_org_id_finished_at_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "message_user_id_user_id_fk": { + "name": "message_user_id_user_id_fk", + "tableFrom": "message", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "message_org_id_org_id_fk": { + "name": "message_org_id_org_id_fk", + "tableFrom": "message", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org": { + "name": "org", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "owner_id": { + "name": "owner_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "current_period_start": { + "name": "current_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "current_period_end": { + "name": "current_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "credit_limit": { + "name": "credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "billing_alerts": { + "name": "billing_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "usage_alerts": { + "name": "usage_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "weekly_reports": { + "name": "weekly_reports", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_owner_id_user_id_fk": { + "name": "org_owner_id_user_id_fk", + "tableFrom": "org", + "tableTo": "user", + "columnsFrom": [ + "owner_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_slug_unique": { + "name": "org_slug_unique", + "nullsNotDistinct": false, + "columns": [ + "slug" + ] + }, + "org_stripe_customer_id_unique": { + "name": "org_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_feature": { + "name": "org_feature", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "feature": { + "name": "feature", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "config": { + "name": "config", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_org_feature_active": { + "name": "idx_org_feature_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_feature_org_id_org_id_fk": { + "name": "org_feature_org_id_org_id_fk", + "tableFrom": "org_feature", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_feature_org_id_feature_pk": { + "name": "org_feature_org_id_feature_pk", + "columns": [ + "org_id", + "feature" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_invite": { + "name": "org_invite", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "invited_by": { + "name": "invited_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "accepted_at": { + "name": "accepted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "accepted_by": { + "name": "accepted_by", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_org_invite_token": { + "name": "idx_org_invite_token", + "columns": [ + { + "expression": "token", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_email": { + "name": "idx_org_invite_email", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_expires": { + "name": "idx_org_invite_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_invite_org_id_org_id_fk": { + "name": "org_invite_org_id_org_id_fk", + "tableFrom": "org_invite", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_invite_invited_by_user_id_fk": { + "name": "org_invite_invited_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "invited_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "org_invite_accepted_by_user_id_fk": { + "name": "org_invite_accepted_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "accepted_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_invite_token_unique": { + "name": "org_invite_token_unique", + "nullsNotDistinct": false, + "columns": [ + "token" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_member": { + "name": "org_member", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "joined_at": { + "name": "joined_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_member_org_id_org_id_fk": { + "name": "org_member_org_id_org_id_fk", + "tableFrom": "org_member", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_member_user_id_user_id_fk": { + "name": "org_member_user_id_user_id_fk", + "tableFrom": "org_member", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_member_org_id_user_id_pk": { + "name": "org_member_org_id_user_id_pk", + "columns": [ + "org_id", + "user_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_repo": { + "name": "org_repo", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_name": { + "name": "repo_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_owner": { + "name": "repo_owner", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "approved_by": { + "name": "approved_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "approved_at": { + "name": "approved_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + } + }, + "indexes": { + "idx_org_repo_active": { + "name": "idx_org_repo_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_repo_unique": { + "name": "idx_org_repo_unique", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "repo_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_repo_org_id_org_id_fk": { + "name": "org_repo_org_id_org_id_fk", + "tableFrom": "org_repo", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_repo_approved_by_user_id_fk": { + "name": "org_repo_approved_by_user_id_fk", + "tableFrom": "org_repo", + "tableTo": "user", + "columnsFrom": [ + "approved_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.publisher": { + "name": "publisher", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "verified": { + "name": "verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "bio": { + "name": "bio", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "avatar_url": { + "name": "avatar_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_by": { + "name": "created_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "publisher_user_id_user_id_fk": { + "name": "publisher_user_id_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_org_id_org_id_fk": { + "name": "publisher_org_id_org_id_fk", + "tableFrom": "publisher", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_created_by_user_id_fk": { + "name": "publisher_created_by_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "created_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": { + "publisher_single_owner": { + "name": "publisher_single_owner", + "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)" + } + }, + "isRLSEnabled": false + }, + "public.referral": { + "name": "referral", + "schema": "", + "columns": { + "referrer_id": { + "name": "referrer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "referred_id": { + "name": "referred_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "referral_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_legacy": { + "name": "is_legacy", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "referral_referrer_id_user_id_fk": { + "name": "referral_referrer_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referrer_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "referral_referred_id_user_id_fk": { + "name": "referral_referred_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referred_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "referral_referrer_id_referred_id_pk": { + "name": "referral_referrer_id_referred_id_pk", + "columns": [ + "referrer_id", + "referred_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.session": { + "name": "session", + "schema": "", + "columns": { + "sessionToken": { + "name": "sessionToken", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "fingerprint_id": { + "name": "fingerprint_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "session_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'web'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "session_userId_user_id_fk": { + "name": "session_userId_user_id_fk", + "tableFrom": "session", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "session_fingerprint_id_fingerprint_id_fk": { + "name": "session_fingerprint_id_fingerprint_id_fk", + "tableFrom": "session", + "tableTo": "fingerprint", + "columnsFrom": [ + "fingerprint_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.subscription": { + "name": "subscription", + "schema": "", + "columns": { + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_price_id": { + "name": "stripe_price_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "tier": { + "name": "tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "scheduled_tier": { + "name": "scheduled_tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "subscription_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'active'" + }, + "billing_period_start": { + "name": "billing_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "billing_period_end": { + "name": "billing_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "cancel_at_period_end": { + "name": "cancel_at_period_end", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "canceled_at": { + "name": "canceled_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_subscription_customer": { + "name": "idx_subscription_customer", + "columns": [ + { + "expression": "stripe_customer_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_user": { + "name": "idx_subscription_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_status": { + "name": "idx_subscription_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"subscription\".\"status\" = 'active'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "subscription_user_id_user_id_fk": { + "name": "subscription_user_id_user_id_fk", + "tableFrom": "subscription", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sync_failure": { + "name": "sync_failure", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_attempt_at": { + "name": "last_attempt_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "retry_count": { + "name": "retry_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "last_error": { + "name": "last_error", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_sync_failure_retry": { + "name": "idx_sync_failure_retry", + "columns": [ + { + "expression": "retry_count", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "last_attempt_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"sync_failure\".\"retry_count\" < 5", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.user": { + "name": "user", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "emailVerified": { + "name": "emailVerified", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "image": { + "name": "image", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "next_quota_reset": { + "name": "next_quota_reset", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "default": "now() + INTERVAL '1 month'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "referral_code": { + "name": "referral_code", + "type": "text", + "primaryKey": false, + "notNull": false, + "default": "'ref-' || gen_random_uuid()" + }, + "referral_limit": { + "name": "referral_limit", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "discord_id": { + "name": "discord_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "handle": { + "name": "handle", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "banned": { + "name": "banned", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "fallback_to_a_la_carte": { + "name": "fallback_to_a_la_carte", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "user_email_unique": { + "name": "user_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + }, + "user_stripe_customer_id_unique": { + "name": "user_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + }, + "user_referral_code_unique": { + "name": "user_referral_code_unique", + "nullsNotDistinct": false, + "columns": [ + "referral_code" + ] + }, + "user_discord_id_unique": { + "name": "user_discord_id_unique", + "nullsNotDistinct": false, + "columns": [ + "discord_id" + ] + }, + "user_handle_unique": { + "name": "user_handle_unique", + "nullsNotDistinct": false, + "columns": [ + "handle" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.verificationToken": { + "name": "verificationToken", + "schema": "", + "columns": { + "identifier": { + "name": "identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "verificationToken_identifier_token_pk": { + "name": "verificationToken_identifier_token_pk", + "columns": [ + "identifier", + "token" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.referral_status": { + "name": "referral_status", + "schema": "public", + "values": [ + "pending", + "completed" + ] + }, + "public.agent_run_status": { + "name": "agent_run_status", + "schema": "public", + "values": [ + "running", + "completed", + "failed", + "cancelled" + ] + }, + "public.agent_step_status": { + "name": "agent_step_status", + "schema": "public", + "values": [ + "running", + "completed", + "skipped" + ] + }, + "public.api_key_type": { + "name": "api_key_type", + "schema": "public", + "values": [ + "anthropic", + "gemini", + "openai" + ] + }, + "public.free_session_status": { + "name": "free_session_status", + "schema": "public", + "values": [ + "queued", + "active" + ] + }, + "public.grant_type": { + "name": "grant_type", + "schema": "public", + "values": [ + "free", + "referral", + "referral_legacy", + "subscription", + "purchase", + "admin", + "organization", + "ad" + ] + }, + "public.org_role": { + "name": "org_role", + "schema": "public", + "values": [ + "owner", + "admin", + "member" + ] + }, + "public.session_type": { + "name": "session_type", + "schema": "public", + "values": [ + "web", + "pat", + "cli" + ] + }, + "public.subscription_status": { + "name": "subscription_status", + "schema": "public", + "values": [ + "incomplete", + "incomplete_expired", + "trialing", + "active", + "past_due", + "canceled", + "unpaid", + "paused" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json index 137086659..bba4ab5ed 100644 --- a/packages/internal/src/db/migrations/meta/_journal.json +++ b/packages/internal/src/db/migrations/meta/_journal.json @@ -309,6 +309,13 @@ "when": 1776461642346, "tag": "0043_vengeful_boomer", "breakpoints": true + }, + { + "idx": 44, + "version": "7", + "when": 1776719872222, + "tag": "0044_violet_stingray", + "breakpoints": true } ] } \ No newline at end of file diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts index cd7762eee..ba481c89a 100644 --- a/packages/internal/src/db/schema.ts +++ b/packages/internal/src/db/schema.ts @@ -823,6 +823,10 @@ export const freeSession = pgTable( .references(() => user.id, { onDelete: 'cascade' }), status: freeSessionStatusEnum('status').notNull(), active_instance_id: text('active_instance_id').notNull(), + /** Which freebuff model this row is queued for / locked to. Each model has + * its own queue (admission picks one queued user per model per tick) and + * the model is fixed for the life of an active session. */ + model: text('model').notNull(), queued_at: timestamp('queued_at', { mode: 'date', withTimezone: true, @@ -851,8 +855,8 @@ export const freeSession = pgTable( .defaultNow(), }, (table) => [ - // Dequeue: SELECT ... WHERE status='queued' ORDER BY queued_at LIMIT N - index('idx_free_session_queue').on(table.status, table.queued_at), + // Per-model dequeue: WHERE status='queued' AND model=$1 ORDER BY queued_at + index('idx_free_session_queue').on(table.status, table.model, table.queued_at), // Expiry sweep: SELECT ... WHERE status='active' AND expires_at < now() index('idx_free_session_expiry').on(table.expires_at), ], diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index f3640f4a3..8809697f3 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -124,6 +124,7 @@ const STATUS_BY_GATE_CODE = { waiting_room_queued: 429, session_superseded: 409, session_expired: 410, + session_model_mismatch: 409, freebuff_update_required: 426, } satisfies Record @@ -394,6 +395,7 @@ export async function postChatCompletions(params: { userId, userEmail: userInfo.email, claimedInstanceId, + requestedModel: typedBody.model, }) if (!gate.ok) { trackEvent({ diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index eef464fee..3b9db7a49 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -12,6 +12,8 @@ import type { SessionDeps } from '@/server/free-session/public-api' import type { InternalSessionRow } from '@/server/free-session/types' import type { NextRequest } from 'next/server' +const DEFAULT_MODEL = 'z-ai/glm-5.1' + function makeReq( apiKey: string | null, opts: { instanceId?: string; cfCountry?: string } = {}, @@ -37,16 +39,24 @@ function makeSessionDeps(overrides: Partial = {}): SessionDeps & { graceMs: 30 * 60 * 1000, now: () => now, getSessionRow: async (userId) => rows.get(userId) ?? null, - queueDepth: async () => [...rows.values()].filter((r) => r.status === 'queued').length, + queueDepthsByModel: async () => { + const out: Record = {} + for (const r of rows.values()) { + if (r.status !== 'queued') continue + out[r.model] = (out[r.model] ?? 0) + 1 + } + return out + }, queuePositionFor: async () => 1, endSession: async (userId) => { rows.delete(userId) }, - joinOrTakeOver: async ({ userId, now }) => { + joinOrTakeOver: async ({ userId, model, now }) => { const r: InternalSessionRow = { user_id: userId, status: 'queued', active_instance_id: `inst-${++instanceCounter}`, + model, queued_at: now, admitted_at: null, expires_at: null, @@ -157,6 +167,7 @@ describe('GET /api/v1/freebuff/session', () => { user_id: 'u1', status: 'active', active_instance_id: 'real-id', + model: DEFAULT_MODEL, queued_at: new Date(), admitted_at: new Date(), expires_at: new Date(Date.now() + 60_000), @@ -180,6 +191,7 @@ describe('DELETE /api/v1/freebuff/session', () => { user_id: 'u1', status: 'active', active_instance_id: 'x', + model: DEFAULT_MODEL, queued_at: new Date(), admitted_at: new Date(), expires_at: new Date(Date.now() + 60_000), diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts index 6f1ae0664..073e7522f 100644 --- a/web/src/app/api/v1/freebuff/session/_handlers.ts +++ b/web/src/app/api/v1/freebuff/session/_handlers.ts @@ -39,6 +39,8 @@ function countryBlockedResponse(req: NextRequest): NextResponse | null { /** Header the CLI uses to identify which instance is polling. Used by GET to * detect when another CLI on the same account has rotated the id. */ export const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id' +/** Header the CLI sends on POST to pick which model's queue to join. */ +export const FREEBUFF_MODEL_HEADER = 'x-freebuff-model' export interface FreebuffSessionDeps { getUserInfoFromApiKey: GetUserInfoFromApiKeyFn @@ -122,13 +124,20 @@ export async function postFreebuffSession( const blocked = countryBlockedResponse(req) if (blocked) return blocked + const requestedModel = req.headers.get(FREEBUFF_MODEL_HEADER) ?? '' + try { const state = await requestSession({ userId: auth.userId, userEmail: auth.userEmail, + model: requestedModel, deps: deps.sessionDeps, }) - return NextResponse.json(state, { status: 200 }) + // model_locked is a 409 so it's distinguishable from a normal queued/active + // response on the client. The CLI translates it into a "switch model?" + // confirmation prompt. + const status = state.status === 'model_locked' ? 409 : 200 + return NextResponse.json(state, { status }) } catch (error) { return serverError(deps, 'POST', auth.userId, error) } diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts index a10a29713..43fe11a4c 100644 --- a/web/src/server/free-session/__tests__/admission.test.ts +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -3,9 +3,10 @@ import { describe, expect, test } from 'bun:test' import { runAdmissionTick } from '../admission' import type { AdmissionDeps } from '../admission' -import type { FireworksHealth } from '../fireworks-health' +import type { FireworksHealth, FleetHealth } from '../fireworks-health' const NOW = new Date('2026-04-17T12:00:00Z') +const TEST_MODEL = 'test-model' function makeAdmissionDeps(overrides: Partial = {}): AdmissionDeps & { calls: { admit: number } @@ -16,10 +17,9 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep sweepExpired: async () => 0, queueDepth: async () => 0, activeCount: async () => 0, - getFireworksHealth: async () => 'healthy', - admitFromQueue: async ({ getFireworksHealth }) => { + getFleetHealth: async () => ({}), + admitFromQueue: async ({ health }) => { calls.admit += 1 - const health = await getFireworksHealth() if (health !== 'healthy') { return { admitted: [], skipped: health } } @@ -28,11 +28,18 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep sessionLengthMs: 60 * 60 * 1000, graceMs: 30 * 60 * 1000, now: () => NOW, + // Default to a single model so per-tick assertions (admitted: 1) stay + // crisp regardless of how many production models are registered. + models: [TEST_MODEL], ...overrides, } return deps } +function fleet(health: FireworksHealth, model: string = TEST_MODEL): FleetHealth { + return { [model]: health } +} + describe('runAdmissionTick', () => { test('admits one user per tick when healthy', async () => { const deps = makeAdmissionDeps() @@ -41,18 +48,18 @@ describe('runAdmissionTick', () => { expect(result.skipped).toBeNull() }) - test('skips admission when Fireworks is degraded', async () => { + test('skips admission when the model deployment is degraded', async () => { const deps = makeAdmissionDeps({ - getFireworksHealth: async () => 'degraded' as FireworksHealth, + getFleetHealth: async () => fleet('degraded'), }) const result = await runAdmissionTick(deps) expect(result.admitted).toBe(0) expect(result.skipped).toBe('degraded') }) - test('skips admission when Fireworks is unhealthy', async () => { + test('skips admission when the model deployment is unhealthy', async () => { const deps = makeAdmissionDeps({ - getFireworksHealth: async () => 'unhealthy' as FireworksHealth, + getFleetHealth: async () => fleet('unhealthy'), }) const result = await runAdmissionTick(deps) expect(result.admitted).toBe(0) @@ -66,13 +73,38 @@ describe('runAdmissionTick', () => { swept = 3 return 3 }, - getFireworksHealth: async () => 'unhealthy' as FireworksHealth, + getFleetHealth: async () => fleet('unhealthy'), }) const result = await runAdmissionTick(deps) expect(swept).toBe(3) expect(result.expired).toBe(3) }) + test('admits per-model based on per-deployment health', async () => { + // Two models: 'good' is healthy, 'bad' is degraded. A single tick should + // admit 1 from 'good' and skip 'bad', surfacing the worst skip reason. + const deps = makeAdmissionDeps({ + models: ['good', 'bad'], + getFleetHealth: async () => ({ good: 'healthy', bad: 'degraded' }), + }) + const result = await runAdmissionTick(deps) + expect(result.admitted).toBe(1) + expect(result.skipped).toBe('degraded') + }) + + test('absent fleet entry defaults to healthy (serverless model)', async () => { + // Model isn't in the fleet map (e.g. served via Fireworks serverless). + // Admission should proceed rather than stall waiting for a probe that + // will never include this deployment. + const deps = makeAdmissionDeps({ + models: ['serverless-model'], + getFleetHealth: async () => ({}), + }) + const result = await runAdmissionTick(deps) + expect(result.admitted).toBe(1) + expect(result.skipped).toBeNull() + }) + test('propagates expiry count and admit count together', async () => { const deps = makeAdmissionDeps({ sweepExpired: async () => 2, diff --git a/web/src/server/free-session/__tests__/fireworks-health.test.ts b/web/src/server/free-session/__tests__/fireworks-health.test.ts index 3475769cd..b05fe8df9 100644 --- a/web/src/server/free-session/__tests__/fireworks-health.test.ts +++ b/web/src/server/free-session/__tests__/fireworks-health.test.ts @@ -4,7 +4,7 @@ import { KV_BLOCKS_DEGRADED_FRACTION, KV_BLOCKS_UNHEALTHY_FRACTION, PREFILL_QUEUE_P90_DEGRADED_MS, - classify, + classifyOne, } from '../fireworks-health' type PromSample = { name: string; labels: Record; value: number } @@ -57,7 +57,7 @@ function errors(code: string, rate: number): PromSample { describe('fireworks health classifier', () => { test('healthy when queue well under the threshold', () => { const samples: PromSample[] = [kvBlocks(0.5), ...prefillQueueBuckets(150)] - expect(classify(samples, [DEPLOY])).toBe('healthy') + expect(classifyOne(samples, DEPLOY)).toBe('healthy') }) test('degraded when prefill queue p90 exceeds the threshold', () => { @@ -65,7 +65,7 @@ describe('fireworks health classifier', () => { kvBlocks(0.5), ...prefillQueueBuckets(PREFILL_QUEUE_P90_DEGRADED_MS + 500), ] - expect(classify(samples, [DEPLOY])).toBe('degraded') + expect(classifyOne(samples, DEPLOY)).toBe('degraded') }) test('degraded when KV blocks cross the soft threshold (leading indicator)', () => { @@ -73,7 +73,7 @@ describe('fireworks health classifier', () => { kvBlocks(KV_BLOCKS_DEGRADED_FRACTION + 0.01), ...prefillQueueBuckets(300), ] - expect(classify(samples, [DEPLOY])).toBe('degraded') + expect(classifyOne(samples, DEPLOY)).toBe('degraded') }) test('unhealthy when KV blocks exceed the backstop', () => { @@ -81,7 +81,7 @@ describe('fireworks health classifier', () => { kvBlocks(KV_BLOCKS_UNHEALTHY_FRACTION + 0.005), ...prefillQueueBuckets(300), ] - expect(classify(samples, [DEPLOY])).toBe('unhealthy') + expect(classifyOne(samples, DEPLOY)).toBe('unhealthy') }) test('unhealthy when 5xx error fraction exceeds the threshold', () => { @@ -91,7 +91,7 @@ describe('fireworks health classifier', () => { requests(1), errors('500', 0.2), ] - expect(classify(samples, [DEPLOY])).toBe('unhealthy') + expect(classifyOne(samples, DEPLOY)).toBe('unhealthy') }) test('ignores high error fraction when traffic is too low to be meaningful', () => { @@ -101,14 +101,17 @@ describe('fireworks health classifier', () => { requests(0.05), errors('500', 0.05), ] - expect(classify(samples, [DEPLOY])).toBe('healthy') + expect(classifyOne(samples, DEPLOY)).toBe('healthy') }) test('healthy with no data yet (new deployment, no events)', () => { - expect(classify([], [DEPLOY])).toBe('healthy') + expect(classifyOne([], DEPLOY)).toBe('healthy') }) - test('worst-of across multiple deployments — unhealthy wins over degraded', () => { + test('classifies deployments independently — one bad deployment does not affect another', () => { + // The fleet probe builds the result by classifying each deployment + // separately, so a saturated 'other' deployment leaves DEPLOY's + // (only-degraded) verdict intact. const other = 'other123' const samples: PromSample[] = [ kvBlocks(0.5), @@ -119,6 +122,7 @@ describe('fireworks health classifier', () => { value: KV_BLOCKS_UNHEALTHY_FRACTION + 0.005, }, ] - expect(classify(samples, [DEPLOY, other])).toBe('unhealthy') + expect(classifyOne(samples, DEPLOY)).toBe('degraded') + expect(classifyOne(samples, other)).toBe('unhealthy') }) }) diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index b19f24ea0..7585d8927 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -6,12 +6,14 @@ import { getSessionState, requestSession, } from '../public-api' +import { FreeSessionModelLockedError } from '../store' import type { SessionDeps } from '../public-api' import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 const GRACE_MS = 30 * 60 * 1000 +const DEFAULT_MODEL = 'z-ai/glm-5.1' function makeDeps(overrides: Partial = {}): SessionDeps & { rows: Map @@ -41,15 +43,18 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { endSession: async (userId) => { rows.delete(userId) }, - queueDepth: async () => { - let n = 0 - for (const r of rows.values()) if (r.status === 'queued') n++ - return n + queueDepthsByModel: async () => { + const out: Record = {} + for (const r of rows.values()) { + if (r.status !== 'queued') continue + out[r.model] = (out[r.model] ?? 0) + 1 + } + return out }, - queuePositionFor: async ({ userId, queuedAt }) => { + queuePositionFor: async ({ userId, model, queuedAt }) => { let pos = 0 for (const r of rows.values()) { - if (r.status !== 'queued') continue + if (r.status !== 'queued' || r.model !== model) continue if ( r.queued_at.getTime() < queuedAt.getTime() || (r.queued_at.getTime() === queuedAt.getTime() && r.user_id <= userId) @@ -59,7 +64,7 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { } return pos }, - joinOrTakeOver: async ({ userId, now }) => { + joinOrTakeOver: async ({ userId, model, now }) => { const existing = rows.get(userId) const nextInstance = newInstanceId() if (!existing) { @@ -67,6 +72,7 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { user_id: userId, status: 'queued', active_instance_id: nextInstance, + model, queued_at: now, admitted_at: null, expires_at: null, @@ -81,17 +87,25 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { existing.expires_at && existing.expires_at.getTime() > now.getTime() ) { + if (existing.model !== model) { + throw new FreeSessionModelLockedError(existing.model) + } existing.active_instance_id = nextInstance existing.updated_at = now return existing } if (existing.status === 'queued') { existing.active_instance_id = nextInstance + if (existing.model !== model) { + existing.model = model + existing.queued_at = now + } existing.updated_at = now return existing } existing.status = 'queued' existing.active_instance_id = nextInstance + existing.model = model existing.queued_at = now existing.admitted_at = null existing.expires_at = null @@ -111,13 +125,17 @@ describe('requestSession', () => { test('disabled flag returns { status: disabled } and does not touch DB', async () => { const offDeps = makeDeps({ isWaitingRoomEnabled: () => false }) - const state = await requestSession({ userId: 'u1', deps: offDeps }) + const state = await requestSession({ + userId: 'u1', + model: DEFAULT_MODEL, + deps: offDeps, + }) expect(state).toEqual({ status: 'disabled' }) expect(offDeps.rows.size).toBe(0) }) test('first call puts user in queue at position 1', async () => { - const state = await requestSession({ userId: 'u1', deps }) + const state = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) expect(state.status).toBe('queued') if (state.status !== 'queued') throw new Error('unreachable') expect(state.position).toBe(1) @@ -125,18 +143,34 @@ describe('requestSession', () => { expect(state.instanceId).toBe('inst-1') }) + test('queued response includes a per-model depth snapshot for the selector', async () => { + // Seed 2 users in glm + 1 in minimax so the returned map captures both. + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) + deps._tick(new Date(deps._now().getTime() + 1000)) + await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps }) + deps._tick(new Date(deps._now().getTime() + 1000)) + await requestSession({ userId: 'u3', model: 'minimax/minimax-m2.7', deps }) + + const state = await getSessionState({ userId: 'u1', deps }) + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.queueDepthByModel).toEqual({ + [DEFAULT_MODEL]: 2, + 'minimax/minimax-m2.7': 1, + }) + }) + test('second call from same user rotates instance id, keeps queue position', async () => { - await requestSession({ userId: 'u1', deps }) - const second = await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) + const second = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) if (second.status !== 'queued') throw new Error('unreachable') expect(second.position).toBe(1) expect(second.instanceId).toBe('inst-2') }) test('multiple users queue in FIFO order', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) deps._tick(new Date(deps._now().getTime() + 1000)) - await requestSession({ userId: 'u2', deps }) + await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps }) const s1 = await getSessionState({ userId: 'u1', deps }) const s2 = await getSessionState({ userId: 'u2', deps }) @@ -147,13 +181,13 @@ describe('requestSession', () => { test('active unexpired session → rotate instance id, preserve active state', async () => { // Prime a user into active state manually. - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) - const second = await requestSession({ userId: 'u1', deps }) + const second = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) expect(second.status).toBe('active') if (second.status !== 'active') throw new Error('unreachable') expect(second.instanceId).not.toBe('inst-1') // rotated @@ -178,7 +212,7 @@ describe('getSessionState', () => { }) test('active session with matching instance id returns active', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -193,7 +227,7 @@ describe('getSessionState', () => { }) test('active session with mismatched instance id returns superseded', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -210,7 +244,7 @@ describe('getSessionState', () => { test('omitted claimedInstanceId on active session returns active (read-only)', async () => { // Polling without an id (e.g. very first GET before POST has resolved) // must not be classified as superseded — only an explicit mismatch is. - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -221,7 +255,7 @@ describe('getSessionState', () => { }) test('row inside grace window returns ended (with instanceId)', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000) @@ -239,7 +273,7 @@ describe('getSessionState', () => { }) test('row past grace window returns none', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN) @@ -305,7 +339,7 @@ describe('checkSessionAdmissible', () => { }) test('queued session → waiting_room_queued', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const result = await checkSessionAdmissible({ userId: 'u1', claimedInstanceId: 'inst-1', @@ -316,7 +350,7 @@ describe('checkSessionAdmissible', () => { }) test('active + matching instance id → ok', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -333,7 +367,7 @@ describe('checkSessionAdmissible', () => { }) test('active + wrong instance id → session_superseded', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -351,7 +385,7 @@ describe('checkSessionAdmissible', () => { test('missing instance id → freebuff_update_required (pre-waiting-room CLI)', async () => { // Classified up front regardless of row state: old clients never send an // id, so we surface a distinct code that maps to 426 Upgrade Required. - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -367,7 +401,7 @@ describe('checkSessionAdmissible', () => { }) test('active inside grace window → ok with reason=draining', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000) @@ -385,7 +419,7 @@ describe('checkSessionAdmissible', () => { }) test('active past the grace window → session_expired', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN) @@ -401,7 +435,7 @@ describe('checkSessionAdmissible', () => { }) test('draining + wrong instance id still rejects with session_superseded', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000) @@ -420,7 +454,7 @@ describe('checkSessionAdmissible', () => { describe('endUserSession', () => { test('removes row', async () => { const deps = makeDeps() - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) expect(deps.rows.has('u1')).toBe(true) await endUserSession({ userId: 'u1', deps }) expect(deps.rows.has('u1')).toBe(false) @@ -432,6 +466,7 @@ describe('endUserSession', () => { user_id: 'u1', status: 'active', active_instance_id: 'x', + model: DEFAULT_MODEL, queued_at: new Date(), admitted_at: null, expires_at: null, diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts index 681072b30..52dc82c12 100644 --- a/web/src/server/free-session/__tests__/session-view.test.ts +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -7,12 +7,15 @@ import type { InternalSessionRow } from '../types' const WAIT_PER_SPOT_MS = 24_000 const GRACE_MS = 30 * 60_000 +const TEST_MODEL = 'z-ai/glm-5.1' + function row(overrides: Partial = {}): InternalSessionRow { const now = new Date('2026-04-17T12:00:00Z') return { user_id: 'u1', status: 'queued', active_instance_id: 'inst-1', + model: TEST_MODEL, queued_at: now, admitted_at: null, expires_at: null, @@ -41,13 +44,13 @@ describe('toSessionStateResponse', () => { const now = new Date('2026-04-17T12:00:00Z') const baseArgs = { graceMs: GRACE_MS, + queueDepthByModel: {}, } test('returns null when row is null', () => { const view = toSessionStateResponse({ row: null, position: 0, - queueDepth: 0, ...baseArgs, now, }) @@ -58,15 +61,17 @@ describe('toSessionStateResponse', () => { const view = toSessionStateResponse({ row: row({ status: 'queued' }), position: 3, - queueDepth: 10, ...baseArgs, + queueDepthByModel: { [TEST_MODEL]: 10, 'minimax/minimax-m2.7': 4 }, now, }) expect(view).toEqual({ status: 'queued', instanceId: 'inst-1', + model: TEST_MODEL, position: 3, queueDepth: 10, + queueDepthByModel: { [TEST_MODEL]: 10, 'minimax/minimax-m2.7': 4 }, estimatedWaitMs: 2 * WAIT_PER_SPOT_MS, queuedAt: now.toISOString(), }) @@ -78,13 +83,13 @@ describe('toSessionStateResponse', () => { const view = toSessionStateResponse({ row: row({ status: 'active', admitted_at: admittedAt, expires_at: expiresAt }), position: 0, - queueDepth: 0, ...baseArgs, now, }) expect(view).toEqual({ status: 'active', instanceId: 'inst-1', + model: TEST_MODEL, admittedAt: admittedAt.toISOString(), expiresAt: expiresAt.toISOString(), remainingMs: 50 * 60_000, @@ -97,7 +102,6 @@ describe('toSessionStateResponse', () => { const view = toSessionStateResponse({ row: row({ status: 'active', admitted_at: admittedAt, expires_at: expiresAt }), position: 0, - queueDepth: 0, ...baseArgs, now, }) @@ -119,7 +123,6 @@ describe('toSessionStateResponse', () => { expires_at: new Date(now.getTime() - GRACE_MS - 1), }), position: 0, - queueDepth: 0, ...baseArgs, now, }) diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index 7c0097c70..4ec532daf 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -1,29 +1,34 @@ +import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' + import { ADMISSION_TICK_MS, getSessionGraceMs, getSessionLengthMs, isWaitingRoomEnabled, } from './config' -import { getFireworksHealth } from './fireworks-health' +import { getFleetHealth } from './fireworks-health' import { activeCount, admitFromQueue, queueDepth, sweepExpired } from './store' -import type { FireworksHealth } from './fireworks-health' +import type { FireworksHealth, FleetHealth } from './fireworks-health' import { logger } from '@/util/logger' export interface AdmissionDeps { sweepExpired: (now: Date, graceMs: number) => Promise - queueDepth: () => Promise + queueDepth: (params: { model: string }) => Promise activeCount: () => Promise admitFromQueue: (params: { + model: string sessionLengthMs: number now: Date - getFireworksHealth: () => Promise + health: FireworksHealth }) => Promise<{ admitted: { user_id: string }[]; skipped: FireworksHealth | null }> - getFireworksHealth: () => Promise + getFleetHealth: () => Promise /** Plain values, not thunks — these never change at runtime. */ sessionLengthMs: number graceMs: number + /** Models to run admission ticks for. Defaults to the full model registry. */ + models?: readonly string[] now?: () => Date } @@ -33,11 +38,13 @@ const defaultDeps: AdmissionDeps = { activeCount, admitFromQueue, // FREEBUFF_DEV_FORCE_ADMIT lets local `dev:freebuff` drive the full - // waiting-room → admitted → ended flow without a real upstream. - getFireworksHealth: + // waiting-room → admitted → ended flow without a real upstream. Returning + // an empty fleet means every model resolves to the absence-default of + // 'healthy' below. + getFleetHealth: process.env.FREEBUFF_DEV_FORCE_ADMIT === 'true' - ? async () => 'healthy' - : getFireworksHealth, + ? async () => ({}) + : getFleetHealth, get sessionLengthMs() { return getSessionLengthMs() }, @@ -49,7 +56,8 @@ const defaultDeps: AdmissionDeps = { export interface AdmissionTickResult { expired: number admitted: number - queueDepth: number + /** Per-model queue depth at the end of the tick. */ + queueDepthByModel: Record activeCount: number skipped: FireworksHealth | null } @@ -57,16 +65,15 @@ export interface AdmissionTickResult { /** * Run a single admission tick: * 1. Expire sessions past their expires_at + grace. - * 2. Attempt to admit one queued user. Admission proceeds only when the - * upstream health probe reports `healthy`; `degraded` and `unhealthy` - * both pause admission so the deployment can catch up. + * 2. For each model, attempt to admit one queued user. Admission proceeds + * only when the upstream health probe reports `healthy`; `degraded` and + * `unhealthy` both pause admission so the deployment can catch up. * - * Admission drips at (1 / ADMISSION_TICK_MS), which drives utilization up - * slowly; once the probe stops returning `healthy`, step 2 halts admission - * until the upstream recovers. + * Per-model admission means heavier models can sit cold without starving + * lighter ones. Admission still drips at (1 / ADMISSION_TICK_MS) per model. * * Returns counts for observability. Safe to call concurrently across pods — - * admitFromQueue takes an advisory xact lock. + * admitFromQueue takes a per-model advisory xact lock. */ export async function runAdmissionTick( deps: AdmissionDeps = defaultDeps, @@ -74,20 +81,42 @@ export async function runAdmissionTick( const now = (deps.now ?? (() => new Date()))() const expired = await deps.sweepExpired(now, deps.graceMs) - const { admitted, skipped } = await deps.admitFromQueue({ - sessionLengthMs: deps.sessionLengthMs, - now, - getFireworksHealth: deps.getFireworksHealth, - }) + const models = deps.models ?? FREEBUFF_MODELS.map((m) => m.id) + + // One probe per tick covers every model — the Fireworks metrics endpoint + // returns all deployments in a single response. Models without a dedicated + // deployment (e.g. serverless) aren't in the map; treat their absence as + // 'healthy' so admission continues. TODO: when those models move to their + // own deployments, drop the absence-default and require an explicit entry. + const fleet = await deps.getFleetHealth() + + // Run per-model admission in parallel — they only contend on independent + // advisory locks and a single update each. + const perModel = await Promise.all( + models.map(async (model) => { + const health = fleet[model] ?? 'healthy' + const { admitted, skipped } = await deps.admitFromQueue({ + model, + sessionLengthMs: deps.sessionLengthMs, + now, + health, + }) + const depth = await deps.queueDepth({ model }) + return { model, admittedCount: admitted.length, depth, skipped } + }), + ) + + const active = await deps.activeCount() + const totalAdmitted = perModel.reduce((s, r) => s + r.admittedCount, 0) + const queueDepthByModel = Object.fromEntries( + perModel.map((r) => [r.model, r.depth]), + ) + const skipped = perModel.find((r) => r.skipped)?.skipped ?? null - const [depth, active] = await Promise.all([ - deps.queueDepth(), - deps.activeCount(), - ]) return { expired, - admitted: admitted.length, - queueDepth: depth, + admitted: totalAdmitted, + queueDepthByModel, activeCount: active, skipped, } @@ -109,7 +138,7 @@ function runTick() { metric: 'freebuff_waiting_room', admitted: result.admitted, expired: result.expired, - queueDepth: result.queueDepth, + queueDepthByModel: result.queueDepthByModel, activeCount: result.activeCount, skipped: result.skipped, }, diff --git a/web/src/server/free-session/fireworks-health.ts b/web/src/server/free-session/fireworks-health.ts index cef6be01c..15f1bb124 100644 --- a/web/src/server/free-session/fireworks-health.ts +++ b/web/src/server/free-session/fireworks-health.ts @@ -52,25 +52,35 @@ const HEALTH_CHECK_TIMEOUT_MS = 5_000 * pod hits the endpoint at most ~2.4/min. */ const HEALTH_CACHE_TTL_MS = 25_000 -type CacheEntry = { expiresAt: number; health: FireworksHealth } +/** Map of model id → FireworksHealth. Only includes models that have a + * dedicated Fireworks deployment in `FIREWORKS_DEPLOYMENT_MAP`. Models served + * via the Fireworks serverless API (no deployment id) are not present — + * callers should treat their absence as 'healthy' for now. + * TODO: when serverless models move to dedicated deployments, drop the + * absence-means-healthy fallback at the call site. */ +export type FleetHealth = Record + +type CacheEntry = { expiresAt: number; fleet: FleetHealth } let cache: CacheEntry | null = null export function __resetFireworksHealthCacheForTests(): void { cache = null } -export async function getFireworksHealth(): Promise { +export async function getFleetHealth(): Promise { const now = Date.now() - if (cache && cache.expiresAt > now) return cache.health + if (cache && cache.expiresAt > now) return cache.fleet - const health = await probe() - cache = { expiresAt: now + HEALTH_CACHE_TTL_MS, health } - return health + const fleet = await probe() + cache = { expiresAt: now + HEALTH_CACHE_TTL_MS, fleet } + return fleet } -async function probe(): Promise { +async function probe(): Promise { const apiKey = env.FIREWORKS_API_KEY - if (!apiKey) return 'unhealthy' + // Mark every deployment-mapped model unhealthy when we can't authenticate + // the probe. Serverless models (absent from the map) keep their default. + if (!apiKey) return allDeploymentsAt('unhealthy') const controller = new AbortController() const timeout = setTimeout(() => controller.abort(), HEALTH_CHECK_TIMEOUT_MS) @@ -81,18 +91,15 @@ async function probe(): Promise { headers: { Authorization: `Bearer ${apiKey}` }, signal: controller.signal, }) - if (!response.ok) return 'unhealthy' + if (!response.ok) return allDeploymentsAt('unhealthy') body = await response.text() } catch { - return 'unhealthy' + return allDeploymentsAt('unhealthy') } finally { clearTimeout(timeout) } - const deploymentIds = Object.values(FIREWORKS_DEPLOYMENT_MAP).map( - (name) => name.split('/').pop()!, - ) - if (deploymentIds.length === 0) return 'healthy' + if (Object.keys(FIREWORKS_DEPLOYMENT_MAP).length === 0) return {} const { samples, newestTimestampMs } = parsePrometheus(body) @@ -104,27 +111,26 @@ async function probe(): Promise { { ageMs: Date.now() - newestTimestampMs }, '[FireworksHealth] unhealthy: metrics snapshot is stale', ) - return 'unhealthy' + return allDeploymentsAt('unhealthy') } - return classify(samples, deploymentIds) + const fleet: FleetHealth = {} + for (const [modelId, deploymentName] of Object.entries(FIREWORKS_DEPLOYMENT_MAP)) { + const deploymentId = deploymentName.split('/').pop()! + fleet[modelId] = classifyOne(samples, deploymentId) + } + return fleet } -/** Treat the whole fleet as degraded/unhealthy if any single deployment is. */ -export function classify( - samples: PromSample[], - deploymentIds: string[], -): FireworksHealth { - let worst: FireworksHealth = 'healthy' - for (const deploymentId of deploymentIds) { - const h = classifyOne(samples, deploymentId) - if (h === 'unhealthy') return 'unhealthy' - if (h === 'degraded') worst = 'degraded' +function allDeploymentsAt(health: FireworksHealth): FleetHealth { + const out: FleetHealth = {} + for (const modelId of Object.keys(FIREWORKS_DEPLOYMENT_MAP)) { + out[modelId] = health } - return worst + return out } -function classifyOne(samples: PromSample[], deploymentId: string): FireworksHealth { +export function classifyOne(samples: PromSample[], deploymentId: string): FireworksHealth { const kvBlocks = scalarFor( samples, 'generator_kv_blocks_fraction:avg_by_deployment', diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 74af009cc..be4506eb1 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -1,3 +1,8 @@ +import { + isFreebuffModelId as isSelectableFreebuffModel, + resolveFreebuffModel, +} from '@codebuff/common/constants/freebuff-models' + import { getSessionGraceMs, isWaitingRoomBypassedForEmail, @@ -5,9 +10,10 @@ import { } from './config' import { endSession, + FreeSessionModelLockedError, getSessionRow, joinOrTakeOver, - queueDepth, + queueDepthsByModel, queuePositionFor, } from './store' import { toSessionStateResponse } from './session-view' @@ -17,10 +23,18 @@ import type { InternalSessionRow, SessionStateResponse } from './types' export interface SessionDeps { getSessionRow: (userId: string) => Promise - joinOrTakeOver: (params: { userId: string; now: Date }) => Promise + joinOrTakeOver: (params: { + userId: string + model: string + now: Date + }) => Promise endSession: (userId: string) => Promise - queueDepth: () => Promise - queuePositionFor: (params: { userId: string; queuedAt: Date }) => Promise + queueDepthsByModel: () => Promise> + queuePositionFor: (params: { + userId: string + model: string + queuedAt: Date + }) => Promise isWaitingRoomEnabled: () => boolean /** Plain values, not getters: these never change at runtime. The deps * interface uses values rather than thunks so tests can pass numbers @@ -33,7 +47,7 @@ const defaultDeps: SessionDeps = { getSessionRow, joinOrTakeOver, endSession, - queueDepth, + queueDepthsByModel, queuePositionFor, isWaitingRoomEnabled, get graceMs() { @@ -51,39 +65,62 @@ async function viewForRow( deps: SessionDeps, row: InternalSessionRow, ): Promise { - const [position, depth] = + const [position, depthsByModel] = row.status === 'queued' ? await Promise.all([ - deps.queuePositionFor({ userId, queuedAt: row.queued_at }), - deps.queueDepth(), + deps.queuePositionFor({ + userId, + model: row.model, + queuedAt: row.queued_at, + }), + deps.queueDepthsByModel(), ]) - : [0, 0] + : [0, {}] return toSessionStateResponse({ row, position, - queueDepth: depth, + queueDepthByModel: depthsByModel, graceMs: deps.graceMs, now: nowOf(deps), }) } +export type RequestSessionResult = + | SessionStateResponse + | { + /** User asked to queue/switch to a different model while their active + * session is still bound to another. The CLI must end the existing + * session first (DELETE /session) before re-queueing. */ + status: 'model_locked' + currentModel: string + requestedModel: string + } + /** - * Client calls this on CLI startup. Semantics: - * - Waiting room disabled → { status: 'disabled' } - * - No existing session → create queued row, fresh instance_id - * - Existing active (unexpired) → rotate instance_id (takeover), preserve state - * - Existing queued → rotate instance_id, preserve queue position - * - Existing expired → re-queue at the back with fresh instance_id + * Client calls this on CLI startup with the model they want to use. + * Semantics: + * - Waiting room disabled → { status: 'disabled' } (model still respected + * downstream by chat-completions) + * - No existing session → create queued row for `model`, fresh instance_id + * - Existing active (unexpired), same model → rotate instance_id (takeover) + * - Existing active (unexpired), different model → { status: 'model_locked' } + * - Existing queued, same model → rotate instance_id, preserve position + * - Existing queued, different model → switch to new model and join the + * back of that model's queue + * - Existing expired → re-queue at the back of `model`'s queue with fresh + * instance_id * - * `joinOrTakeOver` always returns a row that maps to a non-null view (queued - * or active-unexpired), so the cast below is sound. + * `joinOrTakeOver` (when it doesn't throw) always returns a row that maps to + * a non-null view (queued or active-unexpired), so the cast below is sound. */ export async function requestSession(params: { userId: string + model: string userEmail?: string | null | undefined deps?: SessionDeps -}): Promise { +}): Promise { const deps = params.deps ?? defaultDeps + const model = resolveFreebuffModel(params.model) if ( !deps.isWaitingRoomEnabled() || isWaitingRoomBypassedForEmail(params.userEmail) @@ -91,7 +128,23 @@ export async function requestSession(params: { return { status: 'disabled' } } - const row = await deps.joinOrTakeOver({ userId: params.userId, now: nowOf(deps) }) + let row: InternalSessionRow + try { + row = await deps.joinOrTakeOver({ + userId: params.userId, + model, + now: nowOf(deps), + }) + } catch (err) { + if (err instanceof FreeSessionModelLockedError) { + return { + status: 'model_locked', + currentModel: err.currentModel, + requestedModel: model, + } + } + throw err + } const view = await viewForRow(params.userId, deps, row) if (!view) { throw new Error( @@ -171,6 +224,9 @@ export type SessionGateResult = | { ok: false; code: 'waiting_room_queued'; message: string } | { ok: false; code: 'session_superseded'; message: string } | { ok: false; code: 'session_expired'; message: string } + /** Active session locked to a different model than the one requested. The + * CLI should restart its session (DELETE then POST) to switch models. */ + | { ok: false; code: 'session_model_mismatch'; message: string } /** Pre-waiting-room CLI that never sends an instance id. Surfaced as a * distinct code so the caller can prompt the user to restart. */ | { ok: false; code: 'freebuff_update_required'; message: string } @@ -190,6 +246,10 @@ export async function checkSessionAdmissible(params: { userId: string userEmail?: string | null | undefined claimedInstanceId: string | null | undefined + /** Model the chat-completions request is for. When provided, the gate + * rejects requests whose model doesn't match the active session's model + * so a stale CLI tab can't slip a request through under the wrong model. */ + requestedModel?: string | null | undefined deps?: SessionDeps }): Promise { const deps = params.deps ?? defaultDeps @@ -254,6 +314,23 @@ export async function checkSessionAdmissible(params: { } } + // Reject requests for a model the session isn't bound to. Sub-agents may + // legitimately use other models (Gemini Flash etc.) so we only enforce this + // when the caller provides a requestedModel — and only against the set of + // selectable freebuff models (resolveFreebuffModel returns the canonical id + // or the default for anything outside the registry). + if ( + params.requestedModel && + isSelectableFreebuffModel(params.requestedModel) && + params.requestedModel !== row.model + ) { + return { + ok: false, + code: 'session_model_mismatch', + message: `This session is bound to ${row.model}; restart freebuff to switch models.`, + } + } + if (expiresAtMs > nowMs) { return { ok: true, diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts index 582e78814..599b44911 100644 --- a/web/src/server/free-session/session-view.ts +++ b/web/src/server/free-session/session-view.ts @@ -12,11 +12,13 @@ import type { InternalSessionRow, SessionStateResponse } from './types' export function toSessionStateResponse(params: { row: InternalSessionRow | null position: number - queueDepth: number + /** Snapshot of every model's queue depth at response time. Only consumed + * by the `queued` variant — active/ended don't need the selector. */ + queueDepthByModel: Record graceMs: number now: Date }): SessionStateResponse | null { - const { row, position, queueDepth, graceMs, now } = params + const { row, position, queueDepthByModel, graceMs, now } = params if (!row) return null if (row.status === 'active' && row.expires_at) { @@ -26,6 +28,7 @@ export function toSessionStateResponse(params: { return { status: 'active', instanceId: row.active_instance_id, + model: row.model, admittedAt: (row.admitted_at ?? row.created_at).toISOString(), expiresAt: row.expires_at.toISOString(), remainingMs: expiresAtMs - nowMs, @@ -48,8 +51,10 @@ export function toSessionStateResponse(params: { return { status: 'queued', instanceId: row.active_instance_id, + model: row.model, position, - queueDepth, + queueDepth: queueDepthByModel[row.model] ?? 0, + queueDepthByModel, estimatedWaitMs: estimateWaitMs({ position }), queuedAt: row.queued_at.toISOString(), } diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts index 34f4ad712..b0cd22b97 100644 --- a/web/src/server/free-session/store.ts +++ b/web/src/server/free-session/store.ts @@ -26,21 +26,37 @@ export async function getSessionRow( * Join the queue (or take over an existing row with a new instance_id). * * Semantics: - * - If no row exists: insert status=queued, fresh instance_id, queued_at=now. - * - If row exists and active+unexpired: rotate instance_id (takeover), - * preserve status/admitted_at/expires_at. - * - If row exists and expired: reset to queued with fresh instance_id - * and fresh queued_at — effectively re-queue at the back. - * - If row exists and already queued: rotate instance_id, preserve - * queued_at so user keeps their place in line. + * - If no row exists: insert status=queued for `model`, fresh instance_id, + * queued_at=now. + * - If row exists and active+unexpired and model matches: rotate + * instance_id (takeover), preserve status/admitted_at/expires_at. + * - If row exists and active+unexpired but the user picked a different + * model: reject with `model_locked` — the active session is bound to the + * model it was admitted with. The CLI should end the session first. + * - If row exists and expired: reset to queued with fresh instance_id, + * fresh queued_at, and the requested model — effectively re-queue at + * the back of the new model's queue. + * - If row exists and already queued: if model matches, rotate + * instance_id and preserve queued_at; if model differs, switch model + * and reset queued_at to now (move to back of the new queue). * * Never trusts client-supplied timestamps or instance ids. */ +export class FreeSessionModelLockedError extends Error { + constructor(public readonly currentModel: string) { + super( + `Active session is locked to model ${currentModel}; end the session before switching.`, + ) + this.name = 'FreeSessionModelLockedError' + } +} + export async function joinOrTakeOver(params: { userId: string + model: string now: Date }): Promise { - const { userId, now } = params + const { userId, model, now } = params const nextInstanceId = newInstanceId() // postgres-js does NOT coerce raw JS Date values when they're interpolated @@ -54,12 +70,21 @@ export async function joinOrTakeOver(params: { // column references resolve to the existing row. // // Decision table (pre-update state → post-update state): - // no row → INSERT: status=queued, queued_at=now - // active & expires_at > now → rotate instance_id only (takeover) - // queued → rotate instance_id, preserve queued_at + // no row → INSERT: status=queued, queued_at=now, + // model=$model + // active & expires_at > now → + // same model: rotate instance_id only (takeover) + // diff model: throw FreeSessionModelLockedError post-fetch (we can't + // easily express the reject-without-update branch in a single UPSERT; + // see below) + // queued, same model → rotate instance_id, preserve queued_at + // queued, diff model → switch model, reset queued_at=now + // (move to back of new queue) // active & expired → re-queue at back: status=queued, - // queued_at=now, admitted_at/expires_at=null + // queued_at=now, model=$model, + // admitted_at/expires_at=null const activeUnexpired = sql`${schema.freeSession.status} = 'active' AND ${schema.freeSession.expires_at} > ${nowIso}` + const sameModel = sql`${schema.freeSession.model} = ${model}` const [row] = await db .insert(schema.freeSession) @@ -67,6 +92,7 @@ export async function joinOrTakeOver(params: { user_id: userId, status: 'queued', active_instance_id: nextInstanceId, + model, queued_at: now, created_at: now, updated_at: now, @@ -74,12 +100,24 @@ export async function joinOrTakeOver(params: { .onConflictDoUpdate({ target: schema.freeSession.user_id, set: { - active_instance_id: nextInstanceId, + // For active+unexpired rows the instance_id only rotates if the model + // matches; otherwise we keep the existing id so the active session + // stays valid for the other CLI/tab. We then detect the mismatch + // post-update and throw, so the caller can return a clean error. + active_instance_id: sql`CASE + WHEN ${activeUnexpired} AND NOT (${sameModel}) THEN ${schema.freeSession.active_instance_id} + ELSE ${nextInstanceId} + END`, updated_at: now, status: sql`CASE WHEN ${activeUnexpired} THEN 'active'::free_session_status ELSE 'queued'::free_session_status END`, + // Keep model when active+unexpired (locked); switch otherwise. + model: sql`CASE + WHEN ${activeUnexpired} THEN ${schema.freeSession.model} + ELSE ${model} + END`, queued_at: sql`CASE - WHEN ${schema.freeSession.status} = 'queued' THEN ${schema.freeSession.queued_at} WHEN ${activeUnexpired} THEN ${schema.freeSession.queued_at} + WHEN ${schema.freeSession.status} = 'queued' AND ${sameModel} THEN ${schema.freeSession.queued_at} ELSE ${nowIso} END`, admitted_at: sql`CASE WHEN ${activeUnexpired} THEN ${schema.freeSession.admitted_at} ELSE NULL END`, @@ -91,6 +129,13 @@ export async function joinOrTakeOver(params: { if (!row) { throw new Error(`joinOrTakeOver returned no row for user=${userId}`) } + + // Active sessions are locked to their original model — surface a typed + // error so the public API can translate it into a structured response. + if (row.status === 'active' && row.model !== model) { + throw new FreeSessionModelLockedError(row.model) + } + return row as InternalSessionRow } @@ -100,14 +145,37 @@ export async function endSession(userId: string): Promise { .where(eq(schema.freeSession.user_id, userId)) } -export async function queueDepth(): Promise { +export async function queueDepth(params: { model: string }): Promise { const rows = await db .select({ n: count() }) .from(schema.freeSession) - .where(eq(schema.freeSession.status, 'queued')) + .where( + and( + eq(schema.freeSession.status, 'queued'), + eq(schema.freeSession.model, params.model), + ), + ) return Number(rows[0]?.n ?? 0) } +/** + * Single-query read of queued-row counts bucketed by model. Powers the + * per-model "N ahead" hint in the waiting-room model selector — one round-trip + * covers every model's queue depth, so the UI stays cheap to refresh. + * Models with no queued rows are absent from the map; callers should default + * missing keys to 0. + */ +export async function queueDepthsByModel(): Promise> { + const rows = await db + .select({ model: schema.freeSession.model, n: count() }) + .from(schema.freeSession) + .where(eq(schema.freeSession.status, 'queued')) + .groupBy(schema.freeSession.model) + const out: Record = {} + for (const row of rows) out[row.model] = Number(row.n) + return out +} + export async function activeCount(): Promise { const rows = await db .select({ n: count() }) @@ -118,6 +186,7 @@ export async function activeCount(): Promise { export async function queuePositionFor(params: { userId: string + model: string queuedAt: Date }): Promise { const rows = await db @@ -126,6 +195,7 @@ export async function queuePositionFor(params: { .where( and( eq(schema.freeSession.status, 'queued'), + eq(schema.freeSession.model, params.model), sql`(${schema.freeSession.queued_at}, ${schema.freeSession.user_id}) <= (${params.queuedAt.toISOString()}::timestamptz, ${params.userId})`, ), ) @@ -152,34 +222,42 @@ export async function sweepExpired(now: Date, graceMs: number): Promise } /** - * Atomically admit one queued user, gated by the upstream health probe and - * guarded by an advisory xact lock so only one pod admits per tick. + * Atomically admit one queued user for a specific model, gated by the + * upstream health for that model's deployment and guarded by an advisory + * xact lock so only one pod admits per tick (per model). + * + * Each model has its own queue; this admits the longest-waiting user from + * the given model's queue. Health is passed in (resolved by the caller from + * a single fleet probe) rather than fetched here, so a slow probe doesn't + * hold a Postgres connection open. * * Return semantics: * - `{ admitted: [row], skipped: null }` — admitted one user * - `{ admitted: [], skipped: null }` — empty queue or another pod held the lock - * - `{ admitted: [], skipped: 'degraded' | 'unhealthy' }` — probe blocked admission + * - `{ admitted: [], skipped: 'degraded' | 'unhealthy' }` — health blocked admission * * Only `healthy` admits; `degraded` and `unhealthy` both pause admission (the * distinction is for observability — degraded means "upstream loaded", - * unhealthy means "upstream unreachable or saturated"). The probe runs before - * the transaction so a slow probe doesn't hold a Postgres connection open. + * unhealthy means "upstream unreachable or saturated"). */ export async function admitFromQueue(params: { + model: string sessionLengthMs: number now: Date - getFireworksHealth: () => Promise + health: FireworksHealth }): Promise<{ admitted: InternalSessionRow[]; skipped: FireworksHealth | null }> { - const { sessionLengthMs, now, getFireworksHealth } = params + const { model, sessionLengthMs, now, health } = params - const health = await getFireworksHealth() if (health !== 'healthy') { return { admitted: [], skipped: health } } return db.transaction(async (tx) => { + // Per-model lock: hashing the model into the lock id lets distinct model + // queues admit concurrently while still serializing within a single queue. + const modelLockId = FREEBUFF_ADMISSION_LOCK_ID + hashStringToInt32(model) const lockResult = await tx.execute<{ acquired: unknown }>( - sql`SELECT pg_try_advisory_xact_lock(${FREEBUFF_ADMISSION_LOCK_ID}) AS acquired`, + sql`SELECT pg_try_advisory_xact_lock(${modelLockId}) AS acquired`, ) if ( !coerceBool( @@ -192,7 +270,12 @@ export async function admitFromQueue(params: { const candidates = await tx .select({ user_id: schema.freeSession.user_id }) .from(schema.freeSession) - .where(eq(schema.freeSession.status, 'queued')) + .where( + and( + eq(schema.freeSession.status, 'queued'), + eq(schema.freeSession.model, model), + ), + ) .orderBy(asc(schema.freeSession.queued_at), asc(schema.freeSession.user_id)) .limit(1) .for('update', { skipLocked: true }) @@ -220,3 +303,12 @@ export async function admitFromQueue(params: { return { admitted: admitted as InternalSessionRow[], skipped: null } }) } + +/** Stable 31-bit hash so model-keyed advisory lock ids don't overflow int4. */ +function hashStringToInt32(s: string): number { + let h = 0 + for (let i = 0; i < s.length; i++) { + h = (h * 31 + s.charCodeAt(i)) | 0 + } + return Math.abs(h) % 0x40000000 +} diff --git a/web/src/server/free-session/types.ts b/web/src/server/free-session/types.ts index 2f56e2c4d..f46a3ad52 100644 --- a/web/src/server/free-session/types.ts +++ b/web/src/server/free-session/types.ts @@ -15,6 +15,8 @@ export interface InternalSessionRow { user_id: string status: FreeSessionStatus active_instance_id: string + /** Freebuff model id this row is queued for (or locked to, once active). */ + model: string queued_at: Date admitted_at: Date | null expires_at: Date | null