From ac1f6be77ed8e68b241523ca3299f958ae74f8f9 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 20 Apr 2026 14:44:44 -0700 Subject: [PATCH 01/10] Add model selector to freebuff with per-model queues Lets users pick between glm-5.1 and minimax-m2.7 from the waiting room. Each model has its own FIFO queue so wait times scale independently. Selection persists locally; switching mid-queue moves you to the back of the new queue and switching mid-session is blocked. Adds /queue command to end the current session and rejoin (allowing model switch). Co-Authored-By: Claude Opus 4.7 --- cli/src/commands/command-registry.ts | 21 + .../components/freebuff-model-selector.tsx | 111 + cli/src/components/waiting-room-screen.tsx | 5 + cli/src/data/slash-commands.ts | 7 + cli/src/hooks/use-freebuff-session.ts | 61 +- cli/src/state/freebuff-model-store.ts | 41 + cli/src/utils/local-agent-registry.ts | 28 +- cli/src/utils/settings.ts | 28 + common/src/constants/freebuff-models.ts | 47 + common/src/types/freebuff-session.ts | 16 +- .../db/migrations/0044_violet_stingray.sql | 7 + .../src/db/migrations/meta/0044_snapshot.json | 3214 +++++++++++++++++ .../src/db/migrations/meta/_journal.json | 7 + packages/internal/src/db/schema.ts | 8 +- web/src/app/api/v1/chat/completions/_post.ts | 2 + .../session/__tests__/session.test.ts | 7 +- .../app/api/v1/freebuff/session/_handlers.ts | 14 +- .../free-session/__tests__/public-api.test.ts | 70 +- .../__tests__/session-view.test.ts | 5 + web/src/server/free-session/admission.ts | 63 +- web/src/server/free-session/public-api.ts | 107 +- web/src/server/free-session/session-view.ts | 2 + web/src/server/free-session/store.ts | 116 +- web/src/server/free-session/types.ts | 2 + 24 files changed, 3899 insertions(+), 90 deletions(-) create mode 100644 cli/src/components/freebuff-model-selector.tsx create mode 100644 cli/src/state/freebuff-model-store.ts create mode 100644 common/src/constants/freebuff-models.ts create mode 100644 packages/internal/src/db/migrations/0044_violet_stingray.sql create mode 100644 packages/internal/src/db/migrations/meta/0044_snapshot.json diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index 8b6c431baf..81a779c76e 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -4,6 +4,7 @@ import { safeOpen } from '../utils/open-url' import { handleAdsEnable, handleAdsDisable } from './ads' import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders' +import { endAndRejoinFreebuffSession } from '../hooks/use-freebuff-session' import { useThemeStore } from '../hooks/use-theme' import { handleHelpCommand } from './help' import { handleImageCommand } from './image' @@ -611,6 +612,26 @@ const ALL_COMMANDS: CommandDefinition[] = [ clearInput(params) }, }), + // /queue (freebuff-only) — end the active session early and re-queue. The + // hook flips status from 'active' → 'queued', which unmounts and + // mounts , where the user can pick a different model. + defineCommand({ + name: 'queue', + aliases: ['rejoin', 'switch'], + handler: (params) => { + params.setMessages((prev) => [ + ...prev, + getUserMessage(params.inputValue.trim()), + getSystemMessage('Ending session and returning to the waiting room…'), + ]) + params.saveToHistory(params.inputValue.trim()) + clearInput(params) + endAndRejoinFreebuffSession().catch(() => { + // The hook surfaces poll errors via the session store; nothing to do + // here beyond letting the chat history reflect the attempt. + }) + }, + }), ] export const COMMAND_REGISTRY: CommandDefinition[] = IS_FREEBUFF diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx new file mode 100644 index 0000000000..a3bbb3e040 --- /dev/null +++ b/cli/src/components/freebuff-model-selector.tsx @@ -0,0 +1,111 @@ +import { TextAttributes } from '@opentui/core' +import { useKeyboard } from '@opentui/react' +import React, { useCallback, useState } from 'react' + +import { Button } from './button' +import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' + +import { switchFreebuffModel } from '../hooks/use-freebuff-session' +import { useFreebuffModelStore } from '../state/freebuff-model-store' +import { useTheme } from '../hooks/use-theme' + +import type { KeyEvent } from '@opentui/core' + +interface FreebuffModelSelectorProps { + /** Disables interaction while a switch / refresh is mid-flight so the user + * can't queue up a second switch and double-bounce themselves to the back + * of yet another queue. */ + disabled?: boolean +} + +/** + * Lets the user pick which model's queue they're in. Tapping (or pressing the + * row's number key) on a different model triggers a re-POST: the server moves + * them to the back of the new model's queue. + */ +export const FreebuffModelSelector: React.FC = ({ + disabled = false, +}) => { + const theme = useTheme() + const selectedModel = useFreebuffModelStore((s) => s.selectedModel) + const [pending, setPending] = useState(null) + const [hoveredId, setHoveredId] = useState(null) + + const pick = useCallback( + (modelId: string) => { + if (disabled || pending) return + if (modelId === selectedModel) return + setPending(modelId) + switchFreebuffModel(modelId).finally(() => setPending(null)) + }, + [disabled, pending, selectedModel], + ) + + // Number-key shortcuts (1-9) so keyboard-only users can switch without + // hunting for a clickable region. + useKeyboard( + useCallback( + (key: KeyEvent) => { + if (disabled || pending) return + const digit = parseInt(key.name ?? '', 10) + if (!Number.isFinite(digit) || digit < 1 || digit > FREEBUFF_MODELS.length) { + return + } + const target = FREEBUFF_MODELS[digit - 1] + if (target && target.id !== selectedModel) { + key.preventDefault?.() + pick(target.id) + } + }, + [disabled, pending, pick, selectedModel], + ), + ) + + return ( + + + Model — tap or press 1-{FREEBUFF_MODELS.length} to switch + + {FREEBUFF_MODELS.map((model, idx) => { + const isSelected = model.id === selectedModel + const isPending = pending === model.id + const isHovered = hoveredId === model.id + const indicator = isSelected ? '●' : '○' + const indicatorColor = isSelected ? theme.primary : theme.muted + const labelColor = isSelected ? theme.foreground : theme.muted + const interactable = !disabled && !pending && !isSelected + return ( + + ) + })} + + ) +} diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index 08e967d28b..5ee2402994 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -5,6 +5,7 @@ import React, { useMemo, useState } from 'react' import { AdBanner } from './ad-banner' import { Button } from './button' import { ChoiceAdBanner } from './choice-ad-banner' +import { FreebuffModelSelector } from './freebuff-model-selector' import { ShimmerText } from './shimmer-text' import { useFreebuffCtrlCExit } from '../hooks/use-freebuff-ctrl-c-exit' import { useGravityAd } from '../hooks/use-gravity-ad' @@ -200,6 +201,10 @@ export const WaitingRoomScreen: React.FC = ({ {formatElapsed(elapsedMs)} + + + + )} diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts index bd67811d32..198a383f2a 100644 --- a/cli/src/data/slash-commands.ts +++ b/cli/src/data/slash-commands.ts @@ -47,6 +47,7 @@ const FREEBUFF_REMOVED_COMMAND_IDS = new Set([ const FREEBUFF_ONLY_COMMAND_IDS = new Set([ 'connect', 'plan', + 'queue', ]) const ALL_SLASH_COMMANDS: SlashCommand[] = [ @@ -184,6 +185,12 @@ const ALL_SLASH_COMMANDS: SlashCommand[] = [ label: 'theme:toggle', description: 'Toggle between light and dark mode', }, + { + id: 'queue', + label: 'queue', + description: 'End your free session and return to the waiting room (lets you switch model)', + aliases: ['rejoin', 'switch'], + }, { id: 'logout', label: 'logout', diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index 06db946be7..5c1954c3db 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -1,6 +1,10 @@ import { env } from '@codebuff/common/env' import { useEffect } from 'react' +import { + getSelectedFreebuffModel, + useFreebuffModelStore, +} from '../state/freebuff-model-store' import { useFreebuffSessionStore } from '../state/freebuff-session-store' import { getAuthTokenDetails } from '../utils/auth' import { IS_FREEBUFF } from '../utils/constants' @@ -16,6 +20,11 @@ const POLL_INTERVAL_ERROR_MS = 10_000 * account has rotated the id and respond with `{ status: 'superseded' }`. */ const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id' +/** Header sent on POST/GET telling the server which model's queue we want. + * POST uses it to (re-)join that model's queue; GET uses it only for the + * rare GET-before-POST edge where there's no row yet. */ +const FREEBUFF_MODEL_HEADER = 'x-freebuff-model' + /** Play the terminal bell so users get an audible notification on admission. */ const playAdmissionSound = () => { try { @@ -33,12 +42,15 @@ const sessionEndpoint = (): string => { async function callSession( method: 'POST' | 'GET' | 'DELETE', token: string, - opts: { instanceId?: string; signal?: AbortSignal } = {}, + opts: { instanceId?: string; model?: string; signal?: AbortSignal } = {}, ): Promise { const headers: Record = { Authorization: `Bearer ${token}` } if (method === 'GET' && opts.instanceId) { headers[FREEBUFF_INSTANCE_HEADER] = opts.instanceId } + if ((method === 'POST' || method === 'GET') && opts.model) { + headers[FREEBUFF_MODEL_HEADER] = opts.model + } const resp = await fetch(sessionEndpoint(), { method, headers, @@ -64,6 +76,17 @@ async function callSession( return body } } + // 409 from POST means the user picked a different model than their active + // session is bound to. Surface as a non-throw `model_locked` so the UI can + // show a confirmation prompt (DELETE then re-POST to switch). + if (resp.status === 409 && method === 'POST') { + const body = (await resp.json().catch(() => null)) as + | FreebuffSessionResponse + | null + if (body && body.status === 'model_locked') { + return body + } + } if (!resp.ok) { const text = await resp.text().catch(() => '') throw new Error( @@ -95,6 +118,7 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null { case 'disabled': case 'superseded': case 'country_blocked': + case 'model_locked': return null } } @@ -145,6 +169,39 @@ export async function refreshFreebuffSession(opts: { resetChat?: boolean } = {}) await controller?.refresh() } +/** + * User picked a different model in the waiting room. Persist the choice and + * re-POST so the server moves them to the back of the new model's queue. If + * the user has an active session bound to a different model, the server + * responds with `model_locked` and the UI prompts them to end first. + */ +export async function switchFreebuffModel(model: string): Promise { + if (!IS_FREEBUFF) return + const { setSelectedModel } = useFreebuffModelStore.getState() + setSelectedModel(model) + await controller?.refresh() +} + +/** + * End the current session and immediately rejoin the queue. Used by the + * "switch model" confirmation flow when the server returned `model_locked`, + * and by any UI that lets the user exit an active session early. + */ +export async function endAndRejoinFreebuffSession(): Promise { + if (!IS_FREEBUFF) return + const { token } = getAuthTokenDetails() + if (!token) return + try { + await callSession('DELETE', token) + } catch { + // Best-effort — even if DELETE fails the re-POST below will eventually + // succeed once the server-side sweep catches up. + } + const { useChatStore } = await import('../state/chat-store') + useChatStore.getState().reset() + await controller?.refresh() +} + export function markFreebuffSessionSuperseded(): void { if (!IS_FREEBUFF) return controller?.abort() @@ -250,10 +307,12 @@ export function useFreebuffSession(): UseFreebuffSessionResult { // re-POST out from under an in-flight agent. const method: 'POST' | 'GET' = hasPosted ? 'GET' : 'POST' const instanceId = getFreebuffInstanceId() + const model = getSelectedFreebuffModel() try { const next = await callSession(method, token, { signal: abortController.signal, instanceId, + model, }) if (cancelled) return hasPosted = true diff --git a/cli/src/state/freebuff-model-store.ts b/cli/src/state/freebuff-model-store.ts new file mode 100644 index 0000000000..182a38831f --- /dev/null +++ b/cli/src/state/freebuff-model-store.ts @@ -0,0 +1,41 @@ +import { + DEFAULT_FREEBUFF_MODEL_ID, + resolveFreebuffModel, +} from '@codebuff/common/constants/freebuff-models' +import { create } from 'zustand' + +import { + loadFreebuffModelPreference, + saveFreebuffModelPreference, +} from '../utils/settings' + +/** + * Holds the user's currently-selected freebuff model. Initialized from the + * persisted settings file so freebuff defaults to whatever model the user + * last picked. Writing through `setSelectedModel` also persists to disk so + * the next launch picks it up without an explicit save call. + * + * Components in the waiting room read this to highlight the current row in + * the model picker; the session hook reads it to decide which queue to join. + */ +interface FreebuffModelStore { + selectedModel: string + setSelectedModel: (model: string) => void +} + +export const useFreebuffModelStore = create((set) => ({ + selectedModel: resolveFreebuffModel( + loadFreebuffModelPreference() ?? DEFAULT_FREEBUFF_MODEL_ID, + ), + setSelectedModel: (model) => { + const resolved = resolveFreebuffModel(model) + saveFreebuffModelPreference(resolved) + set({ selectedModel: resolved }) + }, +})) + +/** Imperative read for non-React callers (the session hook's tick loop and + * the chat-completions metadata builder). */ +export function getSelectedFreebuffModel(): string { + return useFreebuffModelStore.getState().selectedModel +} diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts index 203a9f7a90..af11b13dc6 100644 --- a/cli/src/utils/local-agent-registry.ts +++ b/cli/src/utils/local-agent-registry.ts @@ -7,11 +7,23 @@ import { loadLocalAgents as sdkLoadLocalAgents, loadMCPConfigSync } from '@codeb import type { MCPConfig } from '@codebuff/common/types/mcp' +import { getSelectedFreebuffModel } from '../state/freebuff-model-store' import { getProjectRoot } from '../project-files' -import { AGENT_MODE_TO_ID, type AgentMode } from './constants' +import { AGENT_MODE_TO_ID, IS_FREEBUFF, type AgentMode } from './constants' import { logger } from './logger' import * as bundledAgentsModule from '../agents/bundled-agents.generated' +/** Agents whose hardcoded model gets swapped out for the user's currently + * selected freebuff model. Each entry must also be allowlisted under the + * matching id in `FREE_MODE_AGENT_MODELS` (server-side check) for both + * glm-5.1 and minimax-m2.7 — otherwise the chat-completions endpoint will + * reject the request with `free_mode_invalid_agent_model`. */ +const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS = new Set([ + 'base2-free', + 'editor-lite', + 'code-reviewer-lite', +]) + import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition' // ============================================================================ @@ -354,6 +366,20 @@ export const loadAgentDefinitions = (): AgentDefinition[] => { } } + // Override the model of free-mode agents to match the user's pick from the + // freebuff waiting room. Bundled definitions hardcode glm-5.1; we swap in + // whatever the user chose so the chat-completions request body carries the + // matching model and the server-side session gate doesn't reject it as a + // model mismatch. + if (IS_FREEBUFF) { + const selectedModel = getSelectedFreebuffModel() + for (const def of definitions) { + if (FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS.has(def.id)) { + def.model = selectedModel + } + } + } + return definitions } diff --git a/cli/src/utils/settings.ts b/cli/src/utils/settings.ts index c469ae273e..5dc901e69d 100644 --- a/cli/src/utils/settings.ts +++ b/cli/src/utils/settings.ts @@ -1,6 +1,8 @@ import fs from 'fs' import path from 'path' +import { isFreebuffModelId } from '@codebuff/common/constants/freebuff-models' + import { getConfigDir } from './auth' import { AGENT_MODES } from './constants' import { logger } from './logger' @@ -20,6 +22,10 @@ const DEFAULT_SETTINGS: Settings = { export interface Settings { mode?: AgentMode adsEnabled?: boolean + /** Last model the user picked in the freebuff model selector. Restored on + * next freebuff launch so users land in the queue for their preferred + * model without re-picking. Persisted as the canonical model id. */ + freebuffModel?: string /** @deprecated Use server-side fallbackToALaCarte setting instead */ alwaysUseALaCarte?: boolean /** @deprecated Use server-side fallbackToALaCarte setting instead */ @@ -96,6 +102,12 @@ const validateSettings = (parsed: unknown): Settings => { settings.adsEnabled = obj.adsEnabled } + // Validate freebuffModel — drop unknown ids so a removed model doesn't + // strand the user on a non-existent queue. + if (typeof obj.freebuffModel === 'string' && isFreebuffModelId(obj.freebuffModel)) { + settings.freebuffModel = obj.freebuffModel + } + // Validate alwaysUseALaCarte (legacy) if (typeof obj.alwaysUseALaCarte === 'boolean') { settings.alwaysUseALaCarte = obj.alwaysUseALaCarte @@ -149,3 +161,19 @@ export const saveModePreference = (mode: AgentMode): void => { saveSettings({ mode }) } +/** + * Load the saved freebuff model preference. Returns undefined if none is + * saved yet — callers should fall back to DEFAULT_FREEBUFF_MODEL_ID. + */ +export const loadFreebuffModelPreference = (): string | undefined => { + return loadSettings().freebuffModel +} + +/** + * Save the freebuff model preference. Called whenever the user picks a model + * in the waiting room so the next launch defaults to it. + */ +export const saveFreebuffModelPreference = (model: string): void => { + saveSettings({ freebuffModel: model }) +} + diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts new file mode 100644 index 0000000000..02b2e9d689 --- /dev/null +++ b/common/src/constants/freebuff-models.ts @@ -0,0 +1,47 @@ +/** + * Models a freebuff user can pick between in the waiting-room model selector. + * + * Each model has its own queue (server keys queue position by `model`), so the + * list here is effectively the set of separate waiting lines. Order is the + * order shown in the UI. + */ +export interface FreebuffModelOption { + /** Stable ID used in the wire protocol and DB. Matches the model id passed + * to the chat-completions endpoint. */ + id: string + /** Short label for the selector UI. */ + displayName: string + /** One-line description shown next to the label. */ + tagline: string +} + +export const FREEBUFF_MODELS: readonly FreebuffModelOption[] = [ + { + id: 'z-ai/glm-5.1', + displayName: 'GLM 5.1', + tagline: 'Strong all-rounder.', + }, + { + id: 'minimax/minimax-m2.7', + displayName: 'MiniMax M2.7', + tagline: 'Fast, lighter wait.', + }, +] as const + +export const DEFAULT_FREEBUFF_MODEL_ID: string = FREEBUFF_MODELS[0].id + +export function isFreebuffModelId(id: string | null | undefined): id is string { + if (!id) return false + return FREEBUFF_MODELS.some((m) => m.id === id) +} + +export function resolveFreebuffModel(id: string | null | undefined): string { + return isFreebuffModelId(id) ? id : DEFAULT_FREEBUFF_MODEL_ID +} + +export function getFreebuffModel(id: string): FreebuffModelOption { + return ( + FREEBUFF_MODELS.find((m) => m.id === id) ?? + FREEBUFF_MODELS.find((m) => m.id === DEFAULT_FREEBUFF_MODEL_ID)! + ) +} diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts index b2a6dabff3..b9d72ff5c6 100644 --- a/common/src/types/freebuff-session.ts +++ b/common/src/types/freebuff-session.ts @@ -21,7 +21,9 @@ export type FreebuffSessionServerResponse = | { status: 'queued' instanceId: string - /** 1-indexed position in the FIFO queue. */ + /** Model the user is queued for. Each model has its own queue. */ + model: string + /** 1-indexed position in the queue for `model`. */ position: number queueDepth: number estimatedWaitMs: number @@ -30,6 +32,8 @@ export type FreebuffSessionServerResponse = | { status: 'active' instanceId: string + /** Model the active session is bound to — cannot change mid-session. */ + model: string admittedAt: string expiresAt: string remainingMs: number @@ -68,3 +72,13 @@ export type FreebuffSessionServerResponse = status: 'country_blocked' countryCode: string } + | { + /** User has an active session bound to a different model. Returned + * from POST /session when they pick a new model without ending their + * current session first. The CLI shows a confirmation prompt: "End + * your active GLM session to switch?" → on confirm, DELETE then + * re-POST with the new model. */ + status: 'model_locked' + currentModel: string + requestedModel: string + } diff --git a/packages/internal/src/db/migrations/0044_violet_stingray.sql b/packages/internal/src/db/migrations/0044_violet_stingray.sql new file mode 100644 index 0000000000..e6942d1d92 --- /dev/null +++ b/packages/internal/src/db/migrations/0044_violet_stingray.sql @@ -0,0 +1,7 @@ +DROP INDEX "idx_free_session_queue";--> statement-breakpoint +-- Backfill any in-flight rows with the previous sole free-mode model. The +-- column is supposed to be required going forward, so we set a temporary +-- default to ride out the migration and drop it immediately after. +ALTER TABLE "free_session" ADD COLUMN "model" text NOT NULL DEFAULT 'z-ai/glm-5.1';--> statement-breakpoint +ALTER TABLE "free_session" ALTER COLUMN "model" DROP DEFAULT;--> statement-breakpoint +CREATE INDEX "idx_free_session_queue" ON "free_session" USING btree ("status","model","queued_at"); \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/0044_snapshot.json b/packages/internal/src/db/migrations/meta/0044_snapshot.json new file mode 100644 index 0000000000..847f32bba0 --- /dev/null +++ b/packages/internal/src/db/migrations/meta/0044_snapshot.json @@ -0,0 +1,3214 @@ +{ + "id": "108f2bd2-7ddc-4c15-b351-28f2b55d5348", + "prevId": "7c9172ed-5f73-4bf8-93cc-2c7e6d82a9ad", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.account": { + "name": "account", + "schema": "", + "columns": { + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "providerAccountId": { + "name": "providerAccountId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "refresh_token": { + "name": "refresh_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_token": { + "name": "access_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "token_type": { + "name": "token_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "id_token": { + "name": "id_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "session_state": { + "name": "session_state", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "account_userId_user_id_fk": { + "name": "account_userId_user_id_fk", + "tableFrom": "account", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "account_provider_providerAccountId_pk": { + "name": "account_provider_providerAccountId_pk", + "columns": [ + "provider", + "providerAccountId" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ad_impression": { + "name": "ad_impression", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ad_text": { + "name": "ad_text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "cta": { + "name": "cta", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "favicon": { + "name": "favicon", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "click_url": { + "name": "click_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "imp_url": { + "name": "imp_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "payout": { + "name": "payout", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true + }, + "credits_granted": { + "name": "credits_granted", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "grant_operation_id": { + "name": "grant_operation_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "served_at": { + "name": "served_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "impression_fired_at": { + "name": "impression_fired_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "clicked_at": { + "name": "clicked_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_ad_impression_user": { + "name": "idx_ad_impression_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "served_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ad_impression_imp_url": { + "name": "idx_ad_impression_imp_url", + "columns": [ + { + "expression": "imp_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "ad_impression_user_id_user_id_fk": { + "name": "ad_impression_user_id_user_id_fk", + "tableFrom": "ad_impression", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "ad_impression_imp_url_unique": { + "name": "ad_impression_imp_url_unique", + "nullsNotDistinct": false, + "columns": [ + "imp_url" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_config": { + "name": "agent_config", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "version": { + "name": "version", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "major": { + "name": "major", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)", + "type": "stored" + } + }, + "minor": { + "name": "minor", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)", + "type": "stored" + } + }, + "patch": { + "name": "patch", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)", + "type": "stored" + } + }, + "data": { + "name": "data", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_agent_config_publisher": { + "name": "idx_agent_config_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_config_publisher_id_publisher_id_fk": { + "name": "agent_config_publisher_id_publisher_id_fk", + "tableFrom": "agent_config", + "tableTo": "publisher", + "columnsFrom": [ + "publisher_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "agent_config_publisher_id_id_version_pk": { + "name": "agent_config_publisher_id_id_version_pk", + "columns": [ + "publisher_id", + "id", + "version" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_run": { + "name": "agent_run", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '/', 1)\n ELSE NULL\n END", + "type": "stored" + } + }, + "agent_name": { + "name": "agent_name", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n ELSE agent_id\n END", + "type": "stored" + } + }, + "agent_version": { + "name": "agent_version", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '@', 2)\n ELSE NULL\n END", + "type": "stored" + } + }, + "ancestor_run_ids": { + "name": "ancestor_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "root_run_id": { + "name": "root_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END", + "type": "stored" + } + }, + "parent_run_id": { + "name": "parent_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END", + "type": "stored" + } + }, + "depth": { + "name": "depth", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)", + "type": "stored" + } + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "total_steps": { + "name": "total_steps", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "direct_credits": { + "name": "direct_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "total_credits": { + "name": "total_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "status": { + "name": "status", + "type": "agent_run_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'running'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_agent_run_user_id": { + "name": "idx_agent_run_user_id", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_parent": { + "name": "idx_agent_run_parent", + "columns": [ + { + "expression": "parent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_root": { + "name": "idx_agent_run_root", + "columns": [ + { + "expression": "root_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_agent_id": { + "name": "idx_agent_run_agent_id", + "columns": [ + { + "expression": "agent_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_publisher": { + "name": "idx_agent_run_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_status": { + "name": "idx_agent_run_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'running'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_ancestors_gin": { + "name": "idx_agent_run_ancestors_gin", + "columns": [ + { + "expression": "ancestor_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_agent_run_completed_publisher_agent": { + "name": "idx_agent_run_completed_publisher_agent", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_recent": { + "name": "idx_agent_run_completed_recent", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_version": { + "name": "idx_agent_run_completed_version", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_version", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_user": { + "name": "idx_agent_run_completed_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_run_user_id_user_id_fk": { + "name": "agent_run_user_id_user_id_fk", + "tableFrom": "agent_run", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_step": { + "name": "agent_step", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "agent_run_id": { + "name": "agent_run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "step_number": { + "name": "step_number", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "credits": { + "name": "credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true, + "default": "'0'" + }, + "child_run_ids": { + "name": "child_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "spawned_count": { + "name": "spawned_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "array_length(child_run_ids, 1)", + "type": "stored" + } + }, + "message_id": { + "name": "message_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "agent_step_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'completed'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "unique_step_number_per_run": { + "name": "unique_step_number_per_run", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "step_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_run_id": { + "name": "idx_agent_step_run_id", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_children_gin": { + "name": "idx_agent_step_children_gin", + "columns": [ + { + "expression": "child_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": { + "agent_step_agent_run_id_agent_run_id_fk": { + "name": "agent_step_agent_run_id_agent_run_id_fk", + "tableFrom": "agent_step", + "tableTo": "agent_run", + "columnsFrom": [ + "agent_run_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.credit_ledger": { + "name": "credit_ledger", + "schema": "", + "columns": { + "operation_id": { + "name": "operation_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "principal": { + "name": "principal", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "balance": { + "name": "balance", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "grant_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_credit_ledger_active_balance": { + "name": "idx_credit_ledger_active_balance", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "balance", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "priority", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_org": { + "name": "idx_credit_ledger_org", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_subscription": { + "name": "idx_credit_ledger_subscription", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "credit_ledger_user_id_user_id_fk": { + "name": "credit_ledger_user_id_user_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "credit_ledger_org_id_org_id_fk": { + "name": "credit_ledger_org_id_org_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.encrypted_api_keys": { + "name": "encrypted_api_keys", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "api_key_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "api_key": { + "name": "api_key", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "encrypted_api_keys_user_id_user_id_fk": { + "name": "encrypted_api_keys_user_id_user_id_fk", + "tableFrom": "encrypted_api_keys", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "encrypted_api_keys_user_id_type_pk": { + "name": "encrypted_api_keys_user_id_type_pk", + "columns": [ + "user_id", + "type" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.fingerprint": { + "name": "fingerprint", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "sig_hash": { + "name": "sig_hash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.free_session": { + "name": "free_session", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "status": { + "name": "status", + "type": "free_session_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "active_instance_id": { + "name": "active_instance_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "queued_at": { + "name": "queued_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "admitted_at": { + "name": "admitted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_free_session_queue": { + "name": "idx_free_session_queue", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "model", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "queued_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_free_session_expiry": { + "name": "idx_free_session_expiry", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "free_session_user_id_user_id_fk": { + "name": "free_session_user_id_user_id_fk", + "tableFrom": "free_session", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.git_eval_results": { + "name": "git_eval_results", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "cost_mode": { + "name": "cost_mode", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reasoner_model": { + "name": "reasoner_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_model": { + "name": "agent_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "cost": { + "name": "cost", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "is_public": { + "name": "is_public", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.limit_override": { + "name": "limit_override", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "credits_per_block": { + "name": "credits_per_block", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "block_duration_hours": { + "name": "block_duration_hours", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "weekly_credit_limit": { + "name": "weekly_credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "limit_override_user_id_user_id_fk": { + "name": "limit_override_user_id_user_id_fk", + "tableFrom": "limit_override", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.message": { + "name": "message", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "client_id": { + "name": "client_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "client_request_id": { + "name": "client_request_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "request": { + "name": "request", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "last_message": { + "name": "last_message", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "\"message\".\"request\" -> -1", + "type": "stored" + } + }, + "reasoning_text": { + "name": "reasoning_text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "response": { + "name": "response", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "input_tokens": { + "name": "input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "cache_creation_input_tokens": { + "name": "cache_creation_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "cache_read_input_tokens": { + "name": "cache_read_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "reasoning_tokens": { + "name": "reasoning_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "output_tokens": { + "name": "output_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cost": { + "name": "cost", + "type": "numeric(100, 20)", + "primaryKey": false, + "notNull": true + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "byok": { + "name": "byok", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "latency_ms": { + "name": "latency_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "ttft_ms": { + "name": "ttft_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "message_user_id_idx": { + "name": "message_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_finished_at_user_id_idx": { + "name": "message_finished_at_user_id_idx", + "columns": [ + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_idx": { + "name": "message_org_id_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_finished_at_idx": { + "name": "message_org_id_finished_at_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "message_user_id_user_id_fk": { + "name": "message_user_id_user_id_fk", + "tableFrom": "message", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "message_org_id_org_id_fk": { + "name": "message_org_id_org_id_fk", + "tableFrom": "message", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org": { + "name": "org", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "owner_id": { + "name": "owner_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "current_period_start": { + "name": "current_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "current_period_end": { + "name": "current_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "credit_limit": { + "name": "credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "billing_alerts": { + "name": "billing_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "usage_alerts": { + "name": "usage_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "weekly_reports": { + "name": "weekly_reports", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_owner_id_user_id_fk": { + "name": "org_owner_id_user_id_fk", + "tableFrom": "org", + "tableTo": "user", + "columnsFrom": [ + "owner_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_slug_unique": { + "name": "org_slug_unique", + "nullsNotDistinct": false, + "columns": [ + "slug" + ] + }, + "org_stripe_customer_id_unique": { + "name": "org_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_feature": { + "name": "org_feature", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "feature": { + "name": "feature", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "config": { + "name": "config", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_org_feature_active": { + "name": "idx_org_feature_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_feature_org_id_org_id_fk": { + "name": "org_feature_org_id_org_id_fk", + "tableFrom": "org_feature", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_feature_org_id_feature_pk": { + "name": "org_feature_org_id_feature_pk", + "columns": [ + "org_id", + "feature" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_invite": { + "name": "org_invite", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "invited_by": { + "name": "invited_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "accepted_at": { + "name": "accepted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "accepted_by": { + "name": "accepted_by", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_org_invite_token": { + "name": "idx_org_invite_token", + "columns": [ + { + "expression": "token", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_email": { + "name": "idx_org_invite_email", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_expires": { + "name": "idx_org_invite_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_invite_org_id_org_id_fk": { + "name": "org_invite_org_id_org_id_fk", + "tableFrom": "org_invite", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_invite_invited_by_user_id_fk": { + "name": "org_invite_invited_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "invited_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "org_invite_accepted_by_user_id_fk": { + "name": "org_invite_accepted_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "accepted_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_invite_token_unique": { + "name": "org_invite_token_unique", + "nullsNotDistinct": false, + "columns": [ + "token" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_member": { + "name": "org_member", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "joined_at": { + "name": "joined_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_member_org_id_org_id_fk": { + "name": "org_member_org_id_org_id_fk", + "tableFrom": "org_member", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_member_user_id_user_id_fk": { + "name": "org_member_user_id_user_id_fk", + "tableFrom": "org_member", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_member_org_id_user_id_pk": { + "name": "org_member_org_id_user_id_pk", + "columns": [ + "org_id", + "user_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_repo": { + "name": "org_repo", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_name": { + "name": "repo_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_owner": { + "name": "repo_owner", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "approved_by": { + "name": "approved_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "approved_at": { + "name": "approved_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + } + }, + "indexes": { + "idx_org_repo_active": { + "name": "idx_org_repo_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_repo_unique": { + "name": "idx_org_repo_unique", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "repo_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_repo_org_id_org_id_fk": { + "name": "org_repo_org_id_org_id_fk", + "tableFrom": "org_repo", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_repo_approved_by_user_id_fk": { + "name": "org_repo_approved_by_user_id_fk", + "tableFrom": "org_repo", + "tableTo": "user", + "columnsFrom": [ + "approved_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.publisher": { + "name": "publisher", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "verified": { + "name": "verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "bio": { + "name": "bio", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "avatar_url": { + "name": "avatar_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_by": { + "name": "created_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "publisher_user_id_user_id_fk": { + "name": "publisher_user_id_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_org_id_org_id_fk": { + "name": "publisher_org_id_org_id_fk", + "tableFrom": "publisher", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_created_by_user_id_fk": { + "name": "publisher_created_by_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "created_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": { + "publisher_single_owner": { + "name": "publisher_single_owner", + "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)" + } + }, + "isRLSEnabled": false + }, + "public.referral": { + "name": "referral", + "schema": "", + "columns": { + "referrer_id": { + "name": "referrer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "referred_id": { + "name": "referred_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "referral_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_legacy": { + "name": "is_legacy", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "referral_referrer_id_user_id_fk": { + "name": "referral_referrer_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referrer_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "referral_referred_id_user_id_fk": { + "name": "referral_referred_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referred_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "referral_referrer_id_referred_id_pk": { + "name": "referral_referrer_id_referred_id_pk", + "columns": [ + "referrer_id", + "referred_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.session": { + "name": "session", + "schema": "", + "columns": { + "sessionToken": { + "name": "sessionToken", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "fingerprint_id": { + "name": "fingerprint_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "session_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'web'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "session_userId_user_id_fk": { + "name": "session_userId_user_id_fk", + "tableFrom": "session", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "session_fingerprint_id_fingerprint_id_fk": { + "name": "session_fingerprint_id_fingerprint_id_fk", + "tableFrom": "session", + "tableTo": "fingerprint", + "columnsFrom": [ + "fingerprint_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.subscription": { + "name": "subscription", + "schema": "", + "columns": { + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_price_id": { + "name": "stripe_price_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "tier": { + "name": "tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "scheduled_tier": { + "name": "scheduled_tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "subscription_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'active'" + }, + "billing_period_start": { + "name": "billing_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "billing_period_end": { + "name": "billing_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "cancel_at_period_end": { + "name": "cancel_at_period_end", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "canceled_at": { + "name": "canceled_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_subscription_customer": { + "name": "idx_subscription_customer", + "columns": [ + { + "expression": "stripe_customer_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_user": { + "name": "idx_subscription_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_status": { + "name": "idx_subscription_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"subscription\".\"status\" = 'active'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "subscription_user_id_user_id_fk": { + "name": "subscription_user_id_user_id_fk", + "tableFrom": "subscription", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sync_failure": { + "name": "sync_failure", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_attempt_at": { + "name": "last_attempt_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "retry_count": { + "name": "retry_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "last_error": { + "name": "last_error", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_sync_failure_retry": { + "name": "idx_sync_failure_retry", + "columns": [ + { + "expression": "retry_count", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "last_attempt_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"sync_failure\".\"retry_count\" < 5", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.user": { + "name": "user", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "emailVerified": { + "name": "emailVerified", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "image": { + "name": "image", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "next_quota_reset": { + "name": "next_quota_reset", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "default": "now() + INTERVAL '1 month'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "referral_code": { + "name": "referral_code", + "type": "text", + "primaryKey": false, + "notNull": false, + "default": "'ref-' || gen_random_uuid()" + }, + "referral_limit": { + "name": "referral_limit", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "discord_id": { + "name": "discord_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "handle": { + "name": "handle", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "banned": { + "name": "banned", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "fallback_to_a_la_carte": { + "name": "fallback_to_a_la_carte", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "user_email_unique": { + "name": "user_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + }, + "user_stripe_customer_id_unique": { + "name": "user_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + }, + "user_referral_code_unique": { + "name": "user_referral_code_unique", + "nullsNotDistinct": false, + "columns": [ + "referral_code" + ] + }, + "user_discord_id_unique": { + "name": "user_discord_id_unique", + "nullsNotDistinct": false, + "columns": [ + "discord_id" + ] + }, + "user_handle_unique": { + "name": "user_handle_unique", + "nullsNotDistinct": false, + "columns": [ + "handle" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.verificationToken": { + "name": "verificationToken", + "schema": "", + "columns": { + "identifier": { + "name": "identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "verificationToken_identifier_token_pk": { + "name": "verificationToken_identifier_token_pk", + "columns": [ + "identifier", + "token" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.referral_status": { + "name": "referral_status", + "schema": "public", + "values": [ + "pending", + "completed" + ] + }, + "public.agent_run_status": { + "name": "agent_run_status", + "schema": "public", + "values": [ + "running", + "completed", + "failed", + "cancelled" + ] + }, + "public.agent_step_status": { + "name": "agent_step_status", + "schema": "public", + "values": [ + "running", + "completed", + "skipped" + ] + }, + "public.api_key_type": { + "name": "api_key_type", + "schema": "public", + "values": [ + "anthropic", + "gemini", + "openai" + ] + }, + "public.free_session_status": { + "name": "free_session_status", + "schema": "public", + "values": [ + "queued", + "active" + ] + }, + "public.grant_type": { + "name": "grant_type", + "schema": "public", + "values": [ + "free", + "referral", + "referral_legacy", + "subscription", + "purchase", + "admin", + "organization", + "ad" + ] + }, + "public.org_role": { + "name": "org_role", + "schema": "public", + "values": [ + "owner", + "admin", + "member" + ] + }, + "public.session_type": { + "name": "session_type", + "schema": "public", + "values": [ + "web", + "pat", + "cli" + ] + }, + "public.subscription_status": { + "name": "subscription_status", + "schema": "public", + "values": [ + "incomplete", + "incomplete_expired", + "trialing", + "active", + "past_due", + "canceled", + "unpaid", + "paused" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json index 1370866594..bba4ab5edd 100644 --- a/packages/internal/src/db/migrations/meta/_journal.json +++ b/packages/internal/src/db/migrations/meta/_journal.json @@ -309,6 +309,13 @@ "when": 1776461642346, "tag": "0043_vengeful_boomer", "breakpoints": true + }, + { + "idx": 44, + "version": "7", + "when": 1776719872222, + "tag": "0044_violet_stingray", + "breakpoints": true } ] } \ No newline at end of file diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts index cd7762eee1..ba481c89a5 100644 --- a/packages/internal/src/db/schema.ts +++ b/packages/internal/src/db/schema.ts @@ -823,6 +823,10 @@ export const freeSession = pgTable( .references(() => user.id, { onDelete: 'cascade' }), status: freeSessionStatusEnum('status').notNull(), active_instance_id: text('active_instance_id').notNull(), + /** Which freebuff model this row is queued for / locked to. Each model has + * its own queue (admission picks one queued user per model per tick) and + * the model is fixed for the life of an active session. */ + model: text('model').notNull(), queued_at: timestamp('queued_at', { mode: 'date', withTimezone: true, @@ -851,8 +855,8 @@ export const freeSession = pgTable( .defaultNow(), }, (table) => [ - // Dequeue: SELECT ... WHERE status='queued' ORDER BY queued_at LIMIT N - index('idx_free_session_queue').on(table.status, table.queued_at), + // Per-model dequeue: WHERE status='queued' AND model=$1 ORDER BY queued_at + index('idx_free_session_queue').on(table.status, table.model, table.queued_at), // Expiry sweep: SELECT ... WHERE status='active' AND expires_at < now() index('idx_free_session_expiry').on(table.expires_at), ], diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index f3640f4a3d..8809697f35 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -124,6 +124,7 @@ const STATUS_BY_GATE_CODE = { waiting_room_queued: 429, session_superseded: 409, session_expired: 410, + session_model_mismatch: 409, freebuff_update_required: 426, } satisfies Record @@ -394,6 +395,7 @@ export async function postChatCompletions(params: { userId, userEmail: userInfo.email, claimedInstanceId, + requestedModel: typedBody.model, }) if (!gate.ok) { trackEvent({ diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index eef464fee0..b3f80cabbe 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -12,6 +12,8 @@ import type { SessionDeps } from '@/server/free-session/public-api' import type { InternalSessionRow } from '@/server/free-session/types' import type { NextRequest } from 'next/server' +const DEFAULT_MODEL = 'z-ai/glm-5.1' + function makeReq( apiKey: string | null, opts: { instanceId?: string; cfCountry?: string } = {}, @@ -42,11 +44,12 @@ function makeSessionDeps(overrides: Partial = {}): SessionDeps & { endSession: async (userId) => { rows.delete(userId) }, - joinOrTakeOver: async ({ userId, now }) => { + joinOrTakeOver: async ({ userId, model, now }) => { const r: InternalSessionRow = { user_id: userId, status: 'queued', active_instance_id: `inst-${++instanceCounter}`, + model, queued_at: now, admitted_at: null, expires_at: null, @@ -157,6 +160,7 @@ describe('GET /api/v1/freebuff/session', () => { user_id: 'u1', status: 'active', active_instance_id: 'real-id', + model: DEFAULT_MODEL, queued_at: new Date(), admitted_at: new Date(), expires_at: new Date(Date.now() + 60_000), @@ -180,6 +184,7 @@ describe('DELETE /api/v1/freebuff/session', () => { user_id: 'u1', status: 'active', active_instance_id: 'x', + model: DEFAULT_MODEL, queued_at: new Date(), admitted_at: new Date(), expires_at: new Date(Date.now() + 60_000), diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts index 6f1ae06647..ddb2ebcb0d 100644 --- a/web/src/app/api/v1/freebuff/session/_handlers.ts +++ b/web/src/app/api/v1/freebuff/session/_handlers.ts @@ -39,6 +39,11 @@ function countryBlockedResponse(req: NextRequest): NextResponse | null { /** Header the CLI uses to identify which instance is polling. Used by GET to * detect when another CLI on the same account has rotated the id. */ export const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id' +/** Header the CLI uses to communicate which freebuff model it wants to be in + * the queue for. Used by both POST (join/switch) and GET (read-only — the + * server doesn't change the model on a GET, but uses the header for the + * rare GET-before-POST case where there's no row yet). */ +export const FREEBUFF_MODEL_HEADER = 'x-freebuff-model' export interface FreebuffSessionDeps { getUserInfoFromApiKey: GetUserInfoFromApiKeyFn @@ -122,13 +127,20 @@ export async function postFreebuffSession( const blocked = countryBlockedResponse(req) if (blocked) return blocked + const requestedModel = req.headers.get(FREEBUFF_MODEL_HEADER) ?? '' + try { const state = await requestSession({ userId: auth.userId, userEmail: auth.userEmail, + model: requestedModel, deps: deps.sessionDeps, }) - return NextResponse.json(state, { status: 200 }) + // model_locked is a 409 so it's distinguishable from a normal queued/active + // response on the client. The CLI translates it into a "switch model?" + // confirmation prompt. + const status = state.status === 'model_locked' ? 409 : 200 + return NextResponse.json(state, { status }) } catch (error) { return serverError(deps, 'POST', auth.userId, error) } diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index b19f24ea03..5ba5a4747d 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -6,12 +6,14 @@ import { getSessionState, requestSession, } from '../public-api' +import { FreeSessionModelLockedError } from '../store' import type { SessionDeps } from '../public-api' import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 const GRACE_MS = 30 * 60 * 1000 +const DEFAULT_MODEL = 'z-ai/glm-5.1' function makeDeps(overrides: Partial = {}): SessionDeps & { rows: Map @@ -41,15 +43,17 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { endSession: async (userId) => { rows.delete(userId) }, - queueDepth: async () => { + queueDepth: async ({ model }) => { let n = 0 - for (const r of rows.values()) if (r.status === 'queued') n++ + for (const r of rows.values()) { + if (r.status === 'queued' && r.model === model) n++ + } return n }, - queuePositionFor: async ({ userId, queuedAt }) => { + queuePositionFor: async ({ userId, model, queuedAt }) => { let pos = 0 for (const r of rows.values()) { - if (r.status !== 'queued') continue + if (r.status !== 'queued' || r.model !== model) continue if ( r.queued_at.getTime() < queuedAt.getTime() || (r.queued_at.getTime() === queuedAt.getTime() && r.user_id <= userId) @@ -59,7 +63,7 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { } return pos }, - joinOrTakeOver: async ({ userId, now }) => { + joinOrTakeOver: async ({ userId, model, now }) => { const existing = rows.get(userId) const nextInstance = newInstanceId() if (!existing) { @@ -67,6 +71,7 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { user_id: userId, status: 'queued', active_instance_id: nextInstance, + model, queued_at: now, admitted_at: null, expires_at: null, @@ -81,17 +86,25 @@ function makeDeps(overrides: Partial = {}): SessionDeps & { existing.expires_at && existing.expires_at.getTime() > now.getTime() ) { + if (existing.model !== model) { + throw new FreeSessionModelLockedError(existing.model) + } existing.active_instance_id = nextInstance existing.updated_at = now return existing } if (existing.status === 'queued') { existing.active_instance_id = nextInstance + if (existing.model !== model) { + existing.model = model + existing.queued_at = now + } existing.updated_at = now return existing } existing.status = 'queued' existing.active_instance_id = nextInstance + existing.model = model existing.queued_at = now existing.admitted_at = null existing.expires_at = null @@ -111,13 +124,17 @@ describe('requestSession', () => { test('disabled flag returns { status: disabled } and does not touch DB', async () => { const offDeps = makeDeps({ isWaitingRoomEnabled: () => false }) - const state = await requestSession({ userId: 'u1', deps: offDeps }) + const state = await requestSession({ + userId: 'u1', + model: DEFAULT_MODEL, + deps: offDeps, + }) expect(state).toEqual({ status: 'disabled' }) expect(offDeps.rows.size).toBe(0) }) test('first call puts user in queue at position 1', async () => { - const state = await requestSession({ userId: 'u1', deps }) + const state = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) expect(state.status).toBe('queued') if (state.status !== 'queued') throw new Error('unreachable') expect(state.position).toBe(1) @@ -126,17 +143,17 @@ describe('requestSession', () => { }) test('second call from same user rotates instance id, keeps queue position', async () => { - await requestSession({ userId: 'u1', deps }) - const second = await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) + const second = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) if (second.status !== 'queued') throw new Error('unreachable') expect(second.position).toBe(1) expect(second.instanceId).toBe('inst-2') }) test('multiple users queue in FIFO order', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) deps._tick(new Date(deps._now().getTime() + 1000)) - await requestSession({ userId: 'u2', deps }) + await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps }) const s1 = await getSessionState({ userId: 'u1', deps }) const s2 = await getSessionState({ userId: 'u2', deps }) @@ -147,13 +164,13 @@ describe('requestSession', () => { test('active unexpired session → rotate instance id, preserve active state', async () => { // Prime a user into active state manually. - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) - const second = await requestSession({ userId: 'u1', deps }) + const second = await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) expect(second.status).toBe('active') if (second.status !== 'active') throw new Error('unreachable') expect(second.instanceId).not.toBe('inst-1') // rotated @@ -178,7 +195,7 @@ describe('getSessionState', () => { }) test('active session with matching instance id returns active', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -193,7 +210,7 @@ describe('getSessionState', () => { }) test('active session with mismatched instance id returns superseded', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -210,7 +227,7 @@ describe('getSessionState', () => { test('omitted claimedInstanceId on active session returns active (read-only)', async () => { // Polling without an id (e.g. very first GET before POST has resolved) // must not be classified as superseded — only an explicit mismatch is. - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -221,7 +238,7 @@ describe('getSessionState', () => { }) test('row inside grace window returns ended (with instanceId)', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000) @@ -239,7 +256,7 @@ describe('getSessionState', () => { }) test('row past grace window returns none', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN) @@ -305,7 +322,7 @@ describe('checkSessionAdmissible', () => { }) test('queued session → waiting_room_queued', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const result = await checkSessionAdmissible({ userId: 'u1', claimedInstanceId: 'inst-1', @@ -316,7 +333,7 @@ describe('checkSessionAdmissible', () => { }) test('active + matching instance id → ok', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -333,7 +350,7 @@ describe('checkSessionAdmissible', () => { }) test('active + wrong instance id → session_superseded', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -351,7 +368,7 @@ describe('checkSessionAdmissible', () => { test('missing instance id → freebuff_update_required (pre-waiting-room CLI)', async () => { // Classified up front regardless of row state: old clients never send an // id, so we surface a distinct code that maps to 426 Upgrade Required. - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = deps._now() @@ -367,7 +384,7 @@ describe('checkSessionAdmissible', () => { }) test('active inside grace window → ok with reason=draining', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000) @@ -385,7 +402,7 @@ describe('checkSessionAdmissible', () => { }) test('active past the grace window → session_expired', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - 2 * SESSION_LEN) @@ -401,7 +418,7 @@ describe('checkSessionAdmissible', () => { }) test('draining + wrong instance id still rejects with session_superseded', async () => { - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = new Date(deps._now().getTime() - SESSION_LEN - 60_000) @@ -420,7 +437,7 @@ describe('checkSessionAdmissible', () => { describe('endUserSession', () => { test('removes row', async () => { const deps = makeDeps() - await requestSession({ userId: 'u1', deps }) + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) expect(deps.rows.has('u1')).toBe(true) await endUserSession({ userId: 'u1', deps }) expect(deps.rows.has('u1')).toBe(false) @@ -432,6 +449,7 @@ describe('endUserSession', () => { user_id: 'u1', status: 'active', active_instance_id: 'x', + model: DEFAULT_MODEL, queued_at: new Date(), admitted_at: null, expires_at: null, diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts index 681072b30e..8a33e45add 100644 --- a/web/src/server/free-session/__tests__/session-view.test.ts +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -7,12 +7,15 @@ import type { InternalSessionRow } from '../types' const WAIT_PER_SPOT_MS = 24_000 const GRACE_MS = 30 * 60_000 +const TEST_MODEL = 'z-ai/glm-5.1' + function row(overrides: Partial = {}): InternalSessionRow { const now = new Date('2026-04-17T12:00:00Z') return { user_id: 'u1', status: 'queued', active_instance_id: 'inst-1', + model: TEST_MODEL, queued_at: now, admitted_at: null, expires_at: null, @@ -65,6 +68,7 @@ describe('toSessionStateResponse', () => { expect(view).toEqual({ status: 'queued', instanceId: 'inst-1', + model: TEST_MODEL, position: 3, queueDepth: 10, estimatedWaitMs: 2 * WAIT_PER_SPOT_MS, @@ -85,6 +89,7 @@ describe('toSessionStateResponse', () => { expect(view).toEqual({ status: 'active', instanceId: 'inst-1', + model: TEST_MODEL, admittedAt: admittedAt.toISOString(), expiresAt: expiresAt.toISOString(), remainingMs: 50 * 60_000, diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index 7c0097c70d..bc187ee9cb 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -1,3 +1,5 @@ +import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' + import { ADMISSION_TICK_MS, getSessionGraceMs, @@ -13,9 +15,10 @@ import { logger } from '@/util/logger' export interface AdmissionDeps { sweepExpired: (now: Date, graceMs: number) => Promise - queueDepth: () => Promise + queueDepth: (params: { model: string }) => Promise activeCount: () => Promise admitFromQueue: (params: { + model: string sessionLengthMs: number now: Date getFireworksHealth: () => Promise @@ -24,6 +27,8 @@ export interface AdmissionDeps { /** Plain values, not thunks — these never change at runtime. */ sessionLengthMs: number graceMs: number + /** Models to run admission ticks for. Defaults to the full model registry. */ + models?: readonly string[] now?: () => Date } @@ -49,7 +54,8 @@ const defaultDeps: AdmissionDeps = { export interface AdmissionTickResult { expired: number admitted: number - queueDepth: number + /** Per-model queue depth at the end of the tick. */ + queueDepthByModel: Record activeCount: number skipped: FireworksHealth | null } @@ -57,16 +63,15 @@ export interface AdmissionTickResult { /** * Run a single admission tick: * 1. Expire sessions past their expires_at + grace. - * 2. Attempt to admit one queued user. Admission proceeds only when the - * upstream health probe reports `healthy`; `degraded` and `unhealthy` - * both pause admission so the deployment can catch up. + * 2. For each model, attempt to admit one queued user. Admission proceeds + * only when the upstream health probe reports `healthy`; `degraded` and + * `unhealthy` both pause admission so the deployment can catch up. * - * Admission drips at (1 / ADMISSION_TICK_MS), which drives utilization up - * slowly; once the probe stops returning `healthy`, step 2 halts admission - * until the upstream recovers. + * Per-model admission means heavier models can sit cold without starving + * lighter ones. Admission still drips at (1 / ADMISSION_TICK_MS) per model. * * Returns counts for observability. Safe to call concurrently across pods — - * admitFromQueue takes an advisory xact lock. + * admitFromQueue takes a per-model advisory xact lock. */ export async function runAdmissionTick( deps: AdmissionDeps = defaultDeps, @@ -74,20 +79,36 @@ export async function runAdmissionTick( const now = (deps.now ?? (() => new Date()))() const expired = await deps.sweepExpired(now, deps.graceMs) - const { admitted, skipped } = await deps.admitFromQueue({ - sessionLengthMs: deps.sessionLengthMs, - now, - getFireworksHealth: deps.getFireworksHealth, - }) + const models = deps.models ?? FREEBUFF_MODELS.map((m) => m.id) + + // Run per-model admission in parallel — they only contend on independent + // advisory locks and a single update each. + const perModel = await Promise.all( + models.map(async (model) => { + const { admitted, skipped } = await deps.admitFromQueue({ + model, + sessionLengthMs: deps.sessionLengthMs, + now, + getFireworksHealth: deps.getFireworksHealth, + }) + const depth = await deps.queueDepth({ model }) + return { model, admittedCount: admitted.length, depth, skipped } + }), + ) + + const active = await deps.activeCount() + const totalAdmitted = perModel.reduce((s, r) => s + r.admittedCount, 0) + const queueDepthByModel = Object.fromEntries( + perModel.map((r) => [r.model, r.depth]), + ) + // Use the most-degraded skipped reason for the top-level result. They all + // come from the same shared probe so they'll usually agree anyway. + const skipped = perModel.find((r) => r.skipped)?.skipped ?? null - const [depth, active] = await Promise.all([ - deps.queueDepth(), - deps.activeCount(), - ]) return { expired, - admitted: admitted.length, - queueDepth: depth, + admitted: totalAdmitted, + queueDepthByModel, activeCount: active, skipped, } @@ -109,7 +130,7 @@ function runTick() { metric: 'freebuff_waiting_room', admitted: result.admitted, expired: result.expired, - queueDepth: result.queueDepth, + queueDepthByModel: result.queueDepthByModel, activeCount: result.activeCount, skipped: result.skipped, }, diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 74af009cc9..f616a5165e 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -1,3 +1,8 @@ +import { + isFreebuffModelId as isSelectableFreebuffModel, + resolveFreebuffModel, +} from '@codebuff/common/constants/freebuff-models' + import { getSessionGraceMs, isWaitingRoomBypassedForEmail, @@ -5,6 +10,7 @@ import { } from './config' import { endSession, + FreeSessionModelLockedError, getSessionRow, joinOrTakeOver, queueDepth, @@ -17,10 +23,18 @@ import type { InternalSessionRow, SessionStateResponse } from './types' export interface SessionDeps { getSessionRow: (userId: string) => Promise - joinOrTakeOver: (params: { userId: string; now: Date }) => Promise + joinOrTakeOver: (params: { + userId: string + model: string + now: Date + }) => Promise endSession: (userId: string) => Promise - queueDepth: () => Promise - queuePositionFor: (params: { userId: string; queuedAt: Date }) => Promise + queueDepth: (params: { model: string }) => Promise + queuePositionFor: (params: { + userId: string + model: string + queuedAt: Date + }) => Promise isWaitingRoomEnabled: () => boolean /** Plain values, not getters: these never change at runtime. The deps * interface uses values rather than thunks so tests can pass numbers @@ -54,8 +68,12 @@ async function viewForRow( const [position, depth] = row.status === 'queued' ? await Promise.all([ - deps.queuePositionFor({ userId, queuedAt: row.queued_at }), - deps.queueDepth(), + deps.queuePositionFor({ + userId, + model: row.model, + queuedAt: row.queued_at, + }), + deps.queueDepth({ model: row.model }), ]) : [0, 0] return toSessionStateResponse({ @@ -67,23 +85,42 @@ async function viewForRow( }) } +export type RequestSessionResult = + | SessionStateResponse + | { + /** User asked to queue/switch to a different model while their active + * session is still bound to another. The CLI must end the existing + * session first (DELETE /session) before re-queueing. */ + status: 'model_locked' + currentModel: string + requestedModel: string + } + /** - * Client calls this on CLI startup. Semantics: - * - Waiting room disabled → { status: 'disabled' } - * - No existing session → create queued row, fresh instance_id - * - Existing active (unexpired) → rotate instance_id (takeover), preserve state - * - Existing queued → rotate instance_id, preserve queue position - * - Existing expired → re-queue at the back with fresh instance_id + * Client calls this on CLI startup with the model they want to use. + * Semantics: + * - Waiting room disabled → { status: 'disabled' } (model still respected + * downstream by chat-completions) + * - No existing session → create queued row for `model`, fresh instance_id + * - Existing active (unexpired), same model → rotate instance_id (takeover) + * - Existing active (unexpired), different model → { status: 'model_locked' } + * - Existing queued, same model → rotate instance_id, preserve position + * - Existing queued, different model → switch to new model and join the + * back of that model's queue + * - Existing expired → re-queue at the back of `model`'s queue with fresh + * instance_id * - * `joinOrTakeOver` always returns a row that maps to a non-null view (queued - * or active-unexpired), so the cast below is sound. + * `joinOrTakeOver` (when it doesn't throw) always returns a row that maps to + * a non-null view (queued or active-unexpired), so the cast below is sound. */ export async function requestSession(params: { userId: string + model: string userEmail?: string | null | undefined deps?: SessionDeps -}): Promise { +}): Promise { const deps = params.deps ?? defaultDeps + const model = resolveFreebuffModel(params.model) if ( !deps.isWaitingRoomEnabled() || isWaitingRoomBypassedForEmail(params.userEmail) @@ -91,7 +128,23 @@ export async function requestSession(params: { return { status: 'disabled' } } - const row = await deps.joinOrTakeOver({ userId: params.userId, now: nowOf(deps) }) + let row: InternalSessionRow + try { + row = await deps.joinOrTakeOver({ + userId: params.userId, + model, + now: nowOf(deps), + }) + } catch (err) { + if (err instanceof FreeSessionModelLockedError) { + return { + status: 'model_locked', + currentModel: err.currentModel, + requestedModel: model, + } + } + throw err + } const view = await viewForRow(params.userId, deps, row) if (!view) { throw new Error( @@ -171,6 +224,9 @@ export type SessionGateResult = | { ok: false; code: 'waiting_room_queued'; message: string } | { ok: false; code: 'session_superseded'; message: string } | { ok: false; code: 'session_expired'; message: string } + /** Active session locked to a different model than the one requested. The + * CLI should restart its session (DELETE then POST) to switch models. */ + | { ok: false; code: 'session_model_mismatch'; message: string } /** Pre-waiting-room CLI that never sends an instance id. Surfaced as a * distinct code so the caller can prompt the user to restart. */ | { ok: false; code: 'freebuff_update_required'; message: string } @@ -190,6 +246,10 @@ export async function checkSessionAdmissible(params: { userId: string userEmail?: string | null | undefined claimedInstanceId: string | null | undefined + /** Model the chat-completions request is for. When provided, the gate + * rejects requests whose model doesn't match the active session's model + * so a stale CLI tab can't slip a request through under the wrong model. */ + requestedModel?: string | null | undefined deps?: SessionDeps }): Promise { const deps = params.deps ?? defaultDeps @@ -254,6 +314,23 @@ export async function checkSessionAdmissible(params: { } } + // Reject requests for a model the session isn't bound to. Sub-agents may + // legitimately use other models (Gemini Flash etc.) so we only enforce this + // when the caller provides a requestedModel — and only against the set of + // selectable freebuff models (resolveFreebuffModel returns the canonical id + // or the default for anything outside the registry). + if ( + params.requestedModel && + isSelectableFreebuffModel(params.requestedModel) && + params.requestedModel !== row.model + ) { + return { + ok: false, + code: 'session_model_mismatch', + message: `This session is bound to ${row.model}; restart freebuff to switch models.`, + } + } + if (expiresAtMs > nowMs) { return { ok: true, diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts index 582e788148..2d12ce8e3f 100644 --- a/web/src/server/free-session/session-view.ts +++ b/web/src/server/free-session/session-view.ts @@ -26,6 +26,7 @@ export function toSessionStateResponse(params: { return { status: 'active', instanceId: row.active_instance_id, + model: row.model, admittedAt: (row.admitted_at ?? row.created_at).toISOString(), expiresAt: row.expires_at.toISOString(), remainingMs: expiresAtMs - nowMs, @@ -48,6 +49,7 @@ export function toSessionStateResponse(params: { return { status: 'queued', instanceId: row.active_instance_id, + model: row.model, position, queueDepth, estimatedWaitMs: estimateWaitMs({ position }), diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts index 34f4ad7124..68b86cf2be 100644 --- a/web/src/server/free-session/store.ts +++ b/web/src/server/free-session/store.ts @@ -26,21 +26,37 @@ export async function getSessionRow( * Join the queue (or take over an existing row with a new instance_id). * * Semantics: - * - If no row exists: insert status=queued, fresh instance_id, queued_at=now. - * - If row exists and active+unexpired: rotate instance_id (takeover), - * preserve status/admitted_at/expires_at. - * - If row exists and expired: reset to queued with fresh instance_id - * and fresh queued_at — effectively re-queue at the back. - * - If row exists and already queued: rotate instance_id, preserve - * queued_at so user keeps their place in line. + * - If no row exists: insert status=queued for `model`, fresh instance_id, + * queued_at=now. + * - If row exists and active+unexpired and model matches: rotate + * instance_id (takeover), preserve status/admitted_at/expires_at. + * - If row exists and active+unexpired but the user picked a different + * model: reject with `model_locked` — the active session is bound to the + * model it was admitted with. The CLI should end the session first. + * - If row exists and expired: reset to queued with fresh instance_id, + * fresh queued_at, and the requested model — effectively re-queue at + * the back of the new model's queue. + * - If row exists and already queued: if model matches, rotate + * instance_id and preserve queued_at; if model differs, switch model + * and reset queued_at to now (move to back of the new queue). * * Never trusts client-supplied timestamps or instance ids. */ +export class FreeSessionModelLockedError extends Error { + constructor(public readonly currentModel: string) { + super( + `Active session is locked to model ${currentModel}; end the session before switching.`, + ) + this.name = 'FreeSessionModelLockedError' + } +} + export async function joinOrTakeOver(params: { userId: string + model: string now: Date }): Promise { - const { userId, now } = params + const { userId, model, now } = params const nextInstanceId = newInstanceId() // postgres-js does NOT coerce raw JS Date values when they're interpolated @@ -54,12 +70,21 @@ export async function joinOrTakeOver(params: { // column references resolve to the existing row. // // Decision table (pre-update state → post-update state): - // no row → INSERT: status=queued, queued_at=now - // active & expires_at > now → rotate instance_id only (takeover) - // queued → rotate instance_id, preserve queued_at + // no row → INSERT: status=queued, queued_at=now, + // model=$model + // active & expires_at > now → + // same model: rotate instance_id only (takeover) + // diff model: throw FreeSessionModelLockedError post-fetch (we can't + // easily express the reject-without-update branch in a single UPSERT; + // see below) + // queued, same model → rotate instance_id, preserve queued_at + // queued, diff model → switch model, reset queued_at=now + // (move to back of new queue) // active & expired → re-queue at back: status=queued, - // queued_at=now, admitted_at/expires_at=null + // queued_at=now, model=$model, + // admitted_at/expires_at=null const activeUnexpired = sql`${schema.freeSession.status} = 'active' AND ${schema.freeSession.expires_at} > ${nowIso}` + const sameModel = sql`${schema.freeSession.model} = ${model}` const [row] = await db .insert(schema.freeSession) @@ -67,6 +92,7 @@ export async function joinOrTakeOver(params: { user_id: userId, status: 'queued', active_instance_id: nextInstanceId, + model, queued_at: now, created_at: now, updated_at: now, @@ -74,12 +100,24 @@ export async function joinOrTakeOver(params: { .onConflictDoUpdate({ target: schema.freeSession.user_id, set: { - active_instance_id: nextInstanceId, + // For active+unexpired rows the instance_id only rotates if the model + // matches; otherwise we keep the existing id so the active session + // stays valid for the other CLI/tab. We then detect the mismatch + // post-update and throw, so the caller can return a clean error. + active_instance_id: sql`CASE + WHEN ${activeUnexpired} AND NOT (${sameModel}) THEN ${schema.freeSession.active_instance_id} + ELSE ${nextInstanceId} + END`, updated_at: now, status: sql`CASE WHEN ${activeUnexpired} THEN 'active'::free_session_status ELSE 'queued'::free_session_status END`, + // Keep model when active+unexpired (locked); switch otherwise. + model: sql`CASE + WHEN ${activeUnexpired} THEN ${schema.freeSession.model} + ELSE ${model} + END`, queued_at: sql`CASE - WHEN ${schema.freeSession.status} = 'queued' THEN ${schema.freeSession.queued_at} WHEN ${activeUnexpired} THEN ${schema.freeSession.queued_at} + WHEN ${schema.freeSession.status} = 'queued' AND ${sameModel} THEN ${schema.freeSession.queued_at} ELSE ${nowIso} END`, admitted_at: sql`CASE WHEN ${activeUnexpired} THEN ${schema.freeSession.admitted_at} ELSE NULL END`, @@ -91,6 +129,13 @@ export async function joinOrTakeOver(params: { if (!row) { throw new Error(`joinOrTakeOver returned no row for user=${userId}`) } + + // Active sessions are locked to their original model — surface a typed + // error so the public API can translate it into a structured response. + if (row.status === 'active' && row.model !== model) { + throw new FreeSessionModelLockedError(row.model) + } + return row as InternalSessionRow } @@ -100,11 +145,16 @@ export async function endSession(userId: string): Promise { .where(eq(schema.freeSession.user_id, userId)) } -export async function queueDepth(): Promise { +export async function queueDepth(params: { model: string }): Promise { const rows = await db .select({ n: count() }) .from(schema.freeSession) - .where(eq(schema.freeSession.status, 'queued')) + .where( + and( + eq(schema.freeSession.status, 'queued'), + eq(schema.freeSession.model, params.model), + ), + ) return Number(rows[0]?.n ?? 0) } @@ -118,6 +168,7 @@ export async function activeCount(): Promise { export async function queuePositionFor(params: { userId: string + model: string queuedAt: Date }): Promise { const rows = await db @@ -126,6 +177,7 @@ export async function queuePositionFor(params: { .where( and( eq(schema.freeSession.status, 'queued'), + eq(schema.freeSession.model, params.model), sql`(${schema.freeSession.queued_at}, ${schema.freeSession.user_id}) <= (${params.queuedAt.toISOString()}::timestamptz, ${params.userId})`, ), ) @@ -152,8 +204,12 @@ export async function sweepExpired(now: Date, graceMs: number): Promise } /** - * Atomically admit one queued user, gated by the upstream health probe and - * guarded by an advisory xact lock so only one pod admits per tick. + * Atomically admit one queued user for a specific model, gated by the + * upstream health probe and guarded by an advisory xact lock so only one pod + * admits per tick (per model). + * + * Each model has its own queue; this admits the longest-waiting user from the + * given model's queue. * * Return semantics: * - `{ admitted: [row], skipped: null }` — admitted one user @@ -166,11 +222,12 @@ export async function sweepExpired(now: Date, graceMs: number): Promise * the transaction so a slow probe doesn't hold a Postgres connection open. */ export async function admitFromQueue(params: { + model: string sessionLengthMs: number now: Date getFireworksHealth: () => Promise }): Promise<{ admitted: InternalSessionRow[]; skipped: FireworksHealth | null }> { - const { sessionLengthMs, now, getFireworksHealth } = params + const { model, sessionLengthMs, now, getFireworksHealth } = params const health = await getFireworksHealth() if (health !== 'healthy') { @@ -178,8 +235,11 @@ export async function admitFromQueue(params: { } return db.transaction(async (tx) => { + // Per-model lock: hashing the model into the lock id lets distinct model + // queues admit concurrently while still serializing within a single queue. + const modelLockId = FREEBUFF_ADMISSION_LOCK_ID + hashStringToInt32(model) const lockResult = await tx.execute<{ acquired: unknown }>( - sql`SELECT pg_try_advisory_xact_lock(${FREEBUFF_ADMISSION_LOCK_ID}) AS acquired`, + sql`SELECT pg_try_advisory_xact_lock(${modelLockId}) AS acquired`, ) if ( !coerceBool( @@ -192,7 +252,12 @@ export async function admitFromQueue(params: { const candidates = await tx .select({ user_id: schema.freeSession.user_id }) .from(schema.freeSession) - .where(eq(schema.freeSession.status, 'queued')) + .where( + and( + eq(schema.freeSession.status, 'queued'), + eq(schema.freeSession.model, model), + ), + ) .orderBy(asc(schema.freeSession.queued_at), asc(schema.freeSession.user_id)) .limit(1) .for('update', { skipLocked: true }) @@ -220,3 +285,12 @@ export async function admitFromQueue(params: { return { admitted: admitted as InternalSessionRow[], skipped: null } }) } + +/** Stable 31-bit hash so model-keyed advisory lock ids don't overflow int4. */ +function hashStringToInt32(s: string): number { + let h = 0 + for (let i = 0; i < s.length; i++) { + h = (h * 31 + s.charCodeAt(i)) | 0 + } + return Math.abs(h) % 0x40000000 +} diff --git a/web/src/server/free-session/types.ts b/web/src/server/free-session/types.ts index 2f56e2c4d3..f46a3ad52d 100644 --- a/web/src/server/free-session/types.ts +++ b/web/src/server/free-session/types.ts @@ -15,6 +15,8 @@ export interface InternalSessionRow { user_id: string status: FreeSessionStatus active_instance_id: string + /** Freebuff model id this row is queued for (or locked to, once active). */ + model: string queued_at: Date admitted_at: Date | null expires_at: Date | null From ba8eaf21add590f2395f6c06b842383299737cc3 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 20 Apr 2026 14:52:36 -0700 Subject: [PATCH 02/10] Rename /queue to /end-session and drop aliases Co-Authored-By: Claude Opus 4.7 --- cli/src/commands/command-registry.ts | 5 ++--- cli/src/data/slash-commands.ts | 7 +++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index 81a779c76e..e1fb363d83 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -612,12 +612,11 @@ const ALL_COMMANDS: CommandDefinition[] = [ clearInput(params) }, }), - // /queue (freebuff-only) — end the active session early and re-queue. The + // /end-session (freebuff-only) — end the active session early and re-queue. The // hook flips status from 'active' → 'queued', which unmounts and // mounts , where the user can pick a different model. defineCommand({ - name: 'queue', - aliases: ['rejoin', 'switch'], + name: 'end-session', handler: (params) => { params.setMessages((prev) => [ ...prev, diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts index 198a383f2a..fd2454087e 100644 --- a/cli/src/data/slash-commands.ts +++ b/cli/src/data/slash-commands.ts @@ -47,7 +47,7 @@ const FREEBUFF_REMOVED_COMMAND_IDS = new Set([ const FREEBUFF_ONLY_COMMAND_IDS = new Set([ 'connect', 'plan', - 'queue', + 'end-session', ]) const ALL_SLASH_COMMANDS: SlashCommand[] = [ @@ -186,10 +186,9 @@ const ALL_SLASH_COMMANDS: SlashCommand[] = [ description: 'Toggle between light and dark mode', }, { - id: 'queue', - label: 'queue', + id: 'end-session', + label: 'end-session', description: 'End your free session and return to the waiting room (lets you switch model)', - aliases: ['rejoin', 'switch'], }, { id: 'logout', From 10cb2709d91a7566157e0365d625bf597604b8c0 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 20 Apr 2026 14:59:21 -0700 Subject: [PATCH 03/10] Address review: hide /end-session in non-freebuff, recover from model_locked - Add end-session to FREEBUFF_ONLY_COMMANDS so non-freebuff users can't invoke it (would have shown a confusing "returning to waiting room" message with no underlying state to act on). - When the waiting room receives model_locked from a switch attempt that raced with admission, silently revert the local model selection to the active session's model and re-tick. Previously polling halted and the screen had no render branch, leaving the UI blank. Co-Authored-By: Claude Opus 4.7 --- cli/src/commands/command-registry.ts | 1 + cli/src/hooks/use-freebuff-session.ts | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index e1fb363d83..28ab22f590 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -179,6 +179,7 @@ const FREEBUFF_REMOVED_COMMANDS = new Set([ const FREEBUFF_ONLY_COMMANDS = new Set([ 'connect', 'plan', + 'end-session', ]) const ALL_COMMANDS: CommandDefinition[] = [ diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index 5c1954c3db..76ed423696 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -317,6 +317,17 @@ export function useFreebuffSession(): UseFreebuffSessionResult { if (cancelled) return hasPosted = true + // Race recovery: user picked a different model in the waiting room at + // the exact moment the server admitted them with the original model. + // Silently revert the local selection and re-tick so the next call + // (a GET) lands the actual active session. Users who really want to + // switch can /end-session deliberately. + if (next.status === 'model_locked') { + useFreebuffModelStore.getState().setSelectedModel(next.currentModel) + schedule(0) + return + } + if (previousStatus === 'queued' && next.status === 'active') { playAdmissionSound() } From 06419bdb30499c268d830d743299cd3915bec265 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 20 Apr 2026 15:01:57 -0700 Subject: [PATCH 04/10] Fix admission tick tests for per-model admission The admission tick now iterates per registered model, so tests that asserted admitted: 1 received 2 (one per model). Default the test helper to a single model so the existing assertions stay crisp and won't drift as more production models are added. Co-Authored-By: Claude Opus 4.7 --- web/src/server/free-session/__tests__/admission.test.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts index a10a297132..72d3e4f90b 100644 --- a/web/src/server/free-session/__tests__/admission.test.ts +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -28,6 +28,9 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep sessionLengthMs: 60 * 60 * 1000, graceMs: 30 * 60 * 1000, now: () => NOW, + // Default to a single model so per-tick assertions (admitted: 1) stay + // crisp regardless of how many production models are registered. + models: ['test-model'], ...overrides, } return deps From a1b3b280d50d04c7b6687b3975fc9f4788adf53a Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 20 Apr 2026 15:05:53 -0700 Subject: [PATCH 05/10] Address second review pass on freebuff model selector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop dead x-freebuff-model header on GET — the server only reads it on POST, and tick() always POSTs first so GET-before-POST never happens. - Derive FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS from the server's FREE_MODE_AGENT_MODELS (agents whose allowlist includes every freebuff model) so adding a new model doesn't require updating two lists. - Extract shouldReleaseSlot() — DELETE-eligibility predicate was inlined in two places. - Probe Fireworks once per admission tick instead of N times (N = number of models). Adds a TODO for when we add a non-Fireworks model. - Tighten model-selector key handler to /^[1-9]$/ so "1abc" isn't treated as 1. - Make FREEBUFF_MODELS a literal tuple so isFreebuffModelId narrows to the actual id union instead of plain string. Co-Authored-By: Claude Opus 4.7 --- .../components/freebuff-model-selector.tsx | 8 ++--- cli/src/hooks/use-freebuff-session.ts | 36 ++++++++++--------- cli/src/utils/local-agent-registry.ts | 21 ++++++----- common/src/constants/freebuff-models.ts | 16 ++++++--- .../app/api/v1/freebuff/session/_handlers.ts | 5 +-- web/src/server/free-session/admission.ts | 10 ++++-- 6 files changed, 54 insertions(+), 42 deletions(-) diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx index a3bbb3e040..c02c1d09e1 100644 --- a/cli/src/components/freebuff-model-selector.tsx +++ b/cli/src/components/freebuff-model-selector.tsx @@ -47,10 +47,10 @@ export const FreebuffModelSelector: React.FC = ({ useCallback( (key: KeyEvent) => { if (disabled || pending) return - const digit = parseInt(key.name ?? '', 10) - if (!Number.isFinite(digit) || digit < 1 || digit > FREEBUFF_MODELS.length) { - return - } + const name = key.name ?? '' + if (!/^[1-9]$/.test(name)) return + const digit = Number(name) + if (digit > FREEBUFF_MODELS.length) return const target = FREEBUFF_MODELS[digit - 1] if (target && target.id !== selectedModel) { key.preventDefault?.() diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index 76ed423696..8f9a2dba4c 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -20,9 +20,7 @@ const POLL_INTERVAL_ERROR_MS = 10_000 * account has rotated the id and respond with `{ status: 'superseded' }`. */ const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id' -/** Header sent on POST/GET telling the server which model's queue we want. - * POST uses it to (re-)join that model's queue; GET uses it only for the - * rare GET-before-POST edge where there's no row yet. */ +/** Header sent on POST telling the server which model's queue to join. */ const FREEBUFF_MODEL_HEADER = 'x-freebuff-model' /** Play the terminal bell so users get an audible notification on admission. */ @@ -48,7 +46,7 @@ async function callSession( if (method === 'GET' && opts.instanceId) { headers[FREEBUFF_INSTANCE_HEADER] = opts.instanceId } - if ((method === 'POST' || method === 'GET') && opts.model) { + if (method === 'POST' && opts.model) { headers[FREEBUFF_MODEL_HEADER] = opts.model } const resp = await fetch(sessionEndpoint(), { @@ -216,6 +214,21 @@ export function markFreebuffSessionEnded(): void { controller?.apply({ status: 'ended' }) } +/** True when the session row represents a server-side slot the caller is + * holding (queued, active, or in the post-expiry grace window with a live + * instance id). DELETE only matters in those states; otherwise we'd fire a + * spurious request the server has nothing to act on. */ +function shouldReleaseSlot( + current: FreebuffSessionResponse | null, +): boolean { + if (!current) return false + return ( + current.status === 'queued' || + current.status === 'active' || + (current.status === 'ended' && Boolean(current.instanceId)) + ) +} + /** * Best-effort DELETE of the caller's session row. Used by exit paths that * skip React unmount (process.exit on Ctrl+C) so the seat frees up quickly @@ -224,13 +237,7 @@ export function markFreebuffSessionEnded(): void { export async function endFreebuffSessionBestEffort(): Promise { if (!IS_FREEBUFF) return const current = useFreebuffSessionStore.getState().session - if (!current) return - // Only fire DELETE if we actually held a slot. - const heldSlot = - current.status === 'queued' || - current.status === 'active' || - (current.status === 'ended' && Boolean(current.instanceId)) - if (!heldSlot) return + if (!shouldReleaseSlot(current)) return const { token } = getAuthTokenDetails() if (!token) return try { @@ -389,12 +396,7 @@ export function useFreebuffSession(): UseFreebuffSessionResult { // Fire-and-forget DELETE. Only release if we actually held a slot so // we don't generate spurious DELETEs (e.g. HMR before POST completes). - if ( - current && - (current.status === 'queued' || - current.status === 'active' || - (current.status === 'ended' && current.instanceId)) - ) { + if (shouldReleaseSlot(current)) { callSession('DELETE', token).catch(() => {}) } setSession(null) diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts index af11b13dc6..59206eb848 100644 --- a/cli/src/utils/local-agent-registry.ts +++ b/cli/src/utils/local-agent-registry.ts @@ -7,6 +7,9 @@ import { loadLocalAgents as sdkLoadLocalAgents, loadMCPConfigSync } from '@codeb import type { MCPConfig } from '@codebuff/common/types/mcp' +import { FREE_MODE_AGENT_MODELS } from '@codebuff/common/constants/free-agents' +import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' + import { getSelectedFreebuffModel } from '../state/freebuff-model-store' import { getProjectRoot } from '../project-files' import { AGENT_MODE_TO_ID, IS_FREEBUFF, type AgentMode } from './constants' @@ -14,15 +17,15 @@ import { logger } from './logger' import * as bundledAgentsModule from '../agents/bundled-agents.generated' /** Agents whose hardcoded model gets swapped out for the user's currently - * selected freebuff model. Each entry must also be allowlisted under the - * matching id in `FREE_MODE_AGENT_MODELS` (server-side check) for both - * glm-5.1 and minimax-m2.7 — otherwise the chat-completions endpoint will - * reject the request with `free_mode_invalid_agent_model`. */ -const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS = new Set([ - 'base2-free', - 'editor-lite', - 'code-reviewer-lite', -]) + * selected freebuff model. Derived from the server's + * `FREE_MODE_AGENT_MODELS` — any agent whose allowlist contains every + * freebuff model is safe to retarget client-side without tripping the + * server's `free_mode_invalid_agent_model` rejection. */ +const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS: ReadonlySet = new Set( + Object.entries(FREE_MODE_AGENT_MODELS) + .filter(([, allowed]) => FREEBUFF_MODELS.every((m) => allowed.has(m.id))) + .map(([agentId]) => agentId), +) import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition' diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 02b2e9d689..6144c03a70 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -15,7 +15,7 @@ export interface FreebuffModelOption { tagline: string } -export const FREEBUFF_MODELS: readonly FreebuffModelOption[] = [ +export const FREEBUFF_MODELS = [ { id: 'z-ai/glm-5.1', displayName: 'GLM 5.1', @@ -26,16 +26,22 @@ export const FREEBUFF_MODELS: readonly FreebuffModelOption[] = [ displayName: 'MiniMax M2.7', tagline: 'Fast, lighter wait.', }, -] as const +] as const satisfies readonly FreebuffModelOption[] -export const DEFAULT_FREEBUFF_MODEL_ID: string = FREEBUFF_MODELS[0].id +export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id'] -export function isFreebuffModelId(id: string | null | undefined): id is string { +export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_MODELS[0].id + +export function isFreebuffModelId( + id: string | null | undefined, +): id is FreebuffModelId { if (!id) return false return FREEBUFF_MODELS.some((m) => m.id === id) } -export function resolveFreebuffModel(id: string | null | undefined): string { +export function resolveFreebuffModel( + id: string | null | undefined, +): FreebuffModelId { return isFreebuffModelId(id) ? id : DEFAULT_FREEBUFF_MODEL_ID } diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts index ddb2ebcb0d..073e7522f6 100644 --- a/web/src/app/api/v1/freebuff/session/_handlers.ts +++ b/web/src/app/api/v1/freebuff/session/_handlers.ts @@ -39,10 +39,7 @@ function countryBlockedResponse(req: NextRequest): NextResponse | null { /** Header the CLI uses to identify which instance is polling. Used by GET to * detect when another CLI on the same account has rotated the id. */ export const FREEBUFF_INSTANCE_HEADER = 'x-freebuff-instance-id' -/** Header the CLI uses to communicate which freebuff model it wants to be in - * the queue for. Used by both POST (join/switch) and GET (read-only — the - * server doesn't change the model on a GET, but uses the header for the - * rare GET-before-POST case where there's no row yet). */ +/** Header the CLI sends on POST to pick which model's queue to join. */ export const FREEBUFF_MODEL_HEADER = 'x-freebuff-model' export interface FreebuffSessionDeps { diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index bc187ee9cb..9de36d0557 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -81,6 +81,12 @@ export async function runAdmissionTick( const models = deps.models ?? FREEBUFF_MODELS.map((m) => m.id) + // Probe upstream health once per tick. Today every model shares a Fireworks + // deployment so a single probe gates them all — TODO: when we add a + // non-Fireworks model, plumb a model/deploymentId into the probe. + const health = await deps.getFireworksHealth() + const sharedHealth = async () => health + // Run per-model admission in parallel — they only contend on independent // advisory locks and a single update each. const perModel = await Promise.all( @@ -89,7 +95,7 @@ export async function runAdmissionTick( model, sessionLengthMs: deps.sessionLengthMs, now, - getFireworksHealth: deps.getFireworksHealth, + getFireworksHealth: sharedHealth, }) const depth = await deps.queueDepth({ model }) return { model, admittedCount: admitted.length, depth, skipped } @@ -101,8 +107,6 @@ export async function runAdmissionTick( const queueDepthByModel = Object.fromEntries( perModel.map((r) => [r.model, r.depth]), ) - // Use the most-degraded skipped reason for the top-level result. They all - // come from the same shared probe so they'll usually agree anyway. const skipped = perModel.find((r) => r.skipped)?.skipped ?? null return { From 469e99851090262e95f855c0bf3ff672c014aadb Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 20 Apr 2026 15:14:56 -0700 Subject: [PATCH 06/10] Gate admission on per-deployment Fireworks health MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the fleet-wide worst-of collapse with a per-model map. One probe per tick still covers every deployment (Fireworks returns them in a single response), but each model's admission now uses its own deployment's verdict — a degraded minimax no longer blocks glm. Models absent from FIREWORKS_DEPLOYMENT_MAP (serverless) default to 'healthy'; TODO for when they move to dedicated deployments. Co-Authored-By: Claude Opus 4.7 --- .../free-session/__tests__/admission.test.ts | 49 ++++++++++++--- .../__tests__/fireworks-health.test.ts | 24 ++++--- web/src/server/free-session/admission.ts | 32 +++++----- .../server/free-session/fireworks-health.ts | 62 ++++++++++--------- web/src/server/free-session/store.ts | 20 +++--- 5 files changed, 115 insertions(+), 72 deletions(-) diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts index 72d3e4f90b..43fe11a4cd 100644 --- a/web/src/server/free-session/__tests__/admission.test.ts +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -3,9 +3,10 @@ import { describe, expect, test } from 'bun:test' import { runAdmissionTick } from '../admission' import type { AdmissionDeps } from '../admission' -import type { FireworksHealth } from '../fireworks-health' +import type { FireworksHealth, FleetHealth } from '../fireworks-health' const NOW = new Date('2026-04-17T12:00:00Z') +const TEST_MODEL = 'test-model' function makeAdmissionDeps(overrides: Partial = {}): AdmissionDeps & { calls: { admit: number } @@ -16,10 +17,9 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep sweepExpired: async () => 0, queueDepth: async () => 0, activeCount: async () => 0, - getFireworksHealth: async () => 'healthy', - admitFromQueue: async ({ getFireworksHealth }) => { + getFleetHealth: async () => ({}), + admitFromQueue: async ({ health }) => { calls.admit += 1 - const health = await getFireworksHealth() if (health !== 'healthy') { return { admitted: [], skipped: health } } @@ -30,12 +30,16 @@ function makeAdmissionDeps(overrides: Partial = {}): AdmissionDep now: () => NOW, // Default to a single model so per-tick assertions (admitted: 1) stay // crisp regardless of how many production models are registered. - models: ['test-model'], + models: [TEST_MODEL], ...overrides, } return deps } +function fleet(health: FireworksHealth, model: string = TEST_MODEL): FleetHealth { + return { [model]: health } +} + describe('runAdmissionTick', () => { test('admits one user per tick when healthy', async () => { const deps = makeAdmissionDeps() @@ -44,18 +48,18 @@ describe('runAdmissionTick', () => { expect(result.skipped).toBeNull() }) - test('skips admission when Fireworks is degraded', async () => { + test('skips admission when the model deployment is degraded', async () => { const deps = makeAdmissionDeps({ - getFireworksHealth: async () => 'degraded' as FireworksHealth, + getFleetHealth: async () => fleet('degraded'), }) const result = await runAdmissionTick(deps) expect(result.admitted).toBe(0) expect(result.skipped).toBe('degraded') }) - test('skips admission when Fireworks is unhealthy', async () => { + test('skips admission when the model deployment is unhealthy', async () => { const deps = makeAdmissionDeps({ - getFireworksHealth: async () => 'unhealthy' as FireworksHealth, + getFleetHealth: async () => fleet('unhealthy'), }) const result = await runAdmissionTick(deps) expect(result.admitted).toBe(0) @@ -69,13 +73,38 @@ describe('runAdmissionTick', () => { swept = 3 return 3 }, - getFireworksHealth: async () => 'unhealthy' as FireworksHealth, + getFleetHealth: async () => fleet('unhealthy'), }) const result = await runAdmissionTick(deps) expect(swept).toBe(3) expect(result.expired).toBe(3) }) + test('admits per-model based on per-deployment health', async () => { + // Two models: 'good' is healthy, 'bad' is degraded. A single tick should + // admit 1 from 'good' and skip 'bad', surfacing the worst skip reason. + const deps = makeAdmissionDeps({ + models: ['good', 'bad'], + getFleetHealth: async () => ({ good: 'healthy', bad: 'degraded' }), + }) + const result = await runAdmissionTick(deps) + expect(result.admitted).toBe(1) + expect(result.skipped).toBe('degraded') + }) + + test('absent fleet entry defaults to healthy (serverless model)', async () => { + // Model isn't in the fleet map (e.g. served via Fireworks serverless). + // Admission should proceed rather than stall waiting for a probe that + // will never include this deployment. + const deps = makeAdmissionDeps({ + models: ['serverless-model'], + getFleetHealth: async () => ({}), + }) + const result = await runAdmissionTick(deps) + expect(result.admitted).toBe(1) + expect(result.skipped).toBeNull() + }) + test('propagates expiry count and admit count together', async () => { const deps = makeAdmissionDeps({ sweepExpired: async () => 2, diff --git a/web/src/server/free-session/__tests__/fireworks-health.test.ts b/web/src/server/free-session/__tests__/fireworks-health.test.ts index 3475769cdc..b05fe8df9c 100644 --- a/web/src/server/free-session/__tests__/fireworks-health.test.ts +++ b/web/src/server/free-session/__tests__/fireworks-health.test.ts @@ -4,7 +4,7 @@ import { KV_BLOCKS_DEGRADED_FRACTION, KV_BLOCKS_UNHEALTHY_FRACTION, PREFILL_QUEUE_P90_DEGRADED_MS, - classify, + classifyOne, } from '../fireworks-health' type PromSample = { name: string; labels: Record; value: number } @@ -57,7 +57,7 @@ function errors(code: string, rate: number): PromSample { describe('fireworks health classifier', () => { test('healthy when queue well under the threshold', () => { const samples: PromSample[] = [kvBlocks(0.5), ...prefillQueueBuckets(150)] - expect(classify(samples, [DEPLOY])).toBe('healthy') + expect(classifyOne(samples, DEPLOY)).toBe('healthy') }) test('degraded when prefill queue p90 exceeds the threshold', () => { @@ -65,7 +65,7 @@ describe('fireworks health classifier', () => { kvBlocks(0.5), ...prefillQueueBuckets(PREFILL_QUEUE_P90_DEGRADED_MS + 500), ] - expect(classify(samples, [DEPLOY])).toBe('degraded') + expect(classifyOne(samples, DEPLOY)).toBe('degraded') }) test('degraded when KV blocks cross the soft threshold (leading indicator)', () => { @@ -73,7 +73,7 @@ describe('fireworks health classifier', () => { kvBlocks(KV_BLOCKS_DEGRADED_FRACTION + 0.01), ...prefillQueueBuckets(300), ] - expect(classify(samples, [DEPLOY])).toBe('degraded') + expect(classifyOne(samples, DEPLOY)).toBe('degraded') }) test('unhealthy when KV blocks exceed the backstop', () => { @@ -81,7 +81,7 @@ describe('fireworks health classifier', () => { kvBlocks(KV_BLOCKS_UNHEALTHY_FRACTION + 0.005), ...prefillQueueBuckets(300), ] - expect(classify(samples, [DEPLOY])).toBe('unhealthy') + expect(classifyOne(samples, DEPLOY)).toBe('unhealthy') }) test('unhealthy when 5xx error fraction exceeds the threshold', () => { @@ -91,7 +91,7 @@ describe('fireworks health classifier', () => { requests(1), errors('500', 0.2), ] - expect(classify(samples, [DEPLOY])).toBe('unhealthy') + expect(classifyOne(samples, DEPLOY)).toBe('unhealthy') }) test('ignores high error fraction when traffic is too low to be meaningful', () => { @@ -101,14 +101,17 @@ describe('fireworks health classifier', () => { requests(0.05), errors('500', 0.05), ] - expect(classify(samples, [DEPLOY])).toBe('healthy') + expect(classifyOne(samples, DEPLOY)).toBe('healthy') }) test('healthy with no data yet (new deployment, no events)', () => { - expect(classify([], [DEPLOY])).toBe('healthy') + expect(classifyOne([], DEPLOY)).toBe('healthy') }) - test('worst-of across multiple deployments — unhealthy wins over degraded', () => { + test('classifies deployments independently — one bad deployment does not affect another', () => { + // The fleet probe builds the result by classifying each deployment + // separately, so a saturated 'other' deployment leaves DEPLOY's + // (only-degraded) verdict intact. const other = 'other123' const samples: PromSample[] = [ kvBlocks(0.5), @@ -119,6 +122,7 @@ describe('fireworks health classifier', () => { value: KV_BLOCKS_UNHEALTHY_FRACTION + 0.005, }, ] - expect(classify(samples, [DEPLOY, other])).toBe('unhealthy') + expect(classifyOne(samples, DEPLOY)).toBe('degraded') + expect(classifyOne(samples, other)).toBe('unhealthy') }) }) diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts index 9de36d0557..4ec532daf8 100644 --- a/web/src/server/free-session/admission.ts +++ b/web/src/server/free-session/admission.ts @@ -6,10 +6,10 @@ import { getSessionLengthMs, isWaitingRoomEnabled, } from './config' -import { getFireworksHealth } from './fireworks-health' +import { getFleetHealth } from './fireworks-health' import { activeCount, admitFromQueue, queueDepth, sweepExpired } from './store' -import type { FireworksHealth } from './fireworks-health' +import type { FireworksHealth, FleetHealth } from './fireworks-health' import { logger } from '@/util/logger' @@ -21,9 +21,9 @@ export interface AdmissionDeps { model: string sessionLengthMs: number now: Date - getFireworksHealth: () => Promise + health: FireworksHealth }) => Promise<{ admitted: { user_id: string }[]; skipped: FireworksHealth | null }> - getFireworksHealth: () => Promise + getFleetHealth: () => Promise /** Plain values, not thunks — these never change at runtime. */ sessionLengthMs: number graceMs: number @@ -38,11 +38,13 @@ const defaultDeps: AdmissionDeps = { activeCount, admitFromQueue, // FREEBUFF_DEV_FORCE_ADMIT lets local `dev:freebuff` drive the full - // waiting-room → admitted → ended flow without a real upstream. - getFireworksHealth: + // waiting-room → admitted → ended flow without a real upstream. Returning + // an empty fleet means every model resolves to the absence-default of + // 'healthy' below. + getFleetHealth: process.env.FREEBUFF_DEV_FORCE_ADMIT === 'true' - ? async () => 'healthy' - : getFireworksHealth, + ? async () => ({}) + : getFleetHealth, get sessionLengthMs() { return getSessionLengthMs() }, @@ -81,21 +83,23 @@ export async function runAdmissionTick( const models = deps.models ?? FREEBUFF_MODELS.map((m) => m.id) - // Probe upstream health once per tick. Today every model shares a Fireworks - // deployment so a single probe gates them all — TODO: when we add a - // non-Fireworks model, plumb a model/deploymentId into the probe. - const health = await deps.getFireworksHealth() - const sharedHealth = async () => health + // One probe per tick covers every model — the Fireworks metrics endpoint + // returns all deployments in a single response. Models without a dedicated + // deployment (e.g. serverless) aren't in the map; treat their absence as + // 'healthy' so admission continues. TODO: when those models move to their + // own deployments, drop the absence-default and require an explicit entry. + const fleet = await deps.getFleetHealth() // Run per-model admission in parallel — they only contend on independent // advisory locks and a single update each. const perModel = await Promise.all( models.map(async (model) => { + const health = fleet[model] ?? 'healthy' const { admitted, skipped } = await deps.admitFromQueue({ model, sessionLengthMs: deps.sessionLengthMs, now, - getFireworksHealth: sharedHealth, + health, }) const depth = await deps.queueDepth({ model }) return { model, admittedCount: admitted.length, depth, skipped } diff --git a/web/src/server/free-session/fireworks-health.ts b/web/src/server/free-session/fireworks-health.ts index cef6be01c1..15f1bb124c 100644 --- a/web/src/server/free-session/fireworks-health.ts +++ b/web/src/server/free-session/fireworks-health.ts @@ -52,25 +52,35 @@ const HEALTH_CHECK_TIMEOUT_MS = 5_000 * pod hits the endpoint at most ~2.4/min. */ const HEALTH_CACHE_TTL_MS = 25_000 -type CacheEntry = { expiresAt: number; health: FireworksHealth } +/** Map of model id → FireworksHealth. Only includes models that have a + * dedicated Fireworks deployment in `FIREWORKS_DEPLOYMENT_MAP`. Models served + * via the Fireworks serverless API (no deployment id) are not present — + * callers should treat their absence as 'healthy' for now. + * TODO: when serverless models move to dedicated deployments, drop the + * absence-means-healthy fallback at the call site. */ +export type FleetHealth = Record + +type CacheEntry = { expiresAt: number; fleet: FleetHealth } let cache: CacheEntry | null = null export function __resetFireworksHealthCacheForTests(): void { cache = null } -export async function getFireworksHealth(): Promise { +export async function getFleetHealth(): Promise { const now = Date.now() - if (cache && cache.expiresAt > now) return cache.health + if (cache && cache.expiresAt > now) return cache.fleet - const health = await probe() - cache = { expiresAt: now + HEALTH_CACHE_TTL_MS, health } - return health + const fleet = await probe() + cache = { expiresAt: now + HEALTH_CACHE_TTL_MS, fleet } + return fleet } -async function probe(): Promise { +async function probe(): Promise { const apiKey = env.FIREWORKS_API_KEY - if (!apiKey) return 'unhealthy' + // Mark every deployment-mapped model unhealthy when we can't authenticate + // the probe. Serverless models (absent from the map) keep their default. + if (!apiKey) return allDeploymentsAt('unhealthy') const controller = new AbortController() const timeout = setTimeout(() => controller.abort(), HEALTH_CHECK_TIMEOUT_MS) @@ -81,18 +91,15 @@ async function probe(): Promise { headers: { Authorization: `Bearer ${apiKey}` }, signal: controller.signal, }) - if (!response.ok) return 'unhealthy' + if (!response.ok) return allDeploymentsAt('unhealthy') body = await response.text() } catch { - return 'unhealthy' + return allDeploymentsAt('unhealthy') } finally { clearTimeout(timeout) } - const deploymentIds = Object.values(FIREWORKS_DEPLOYMENT_MAP).map( - (name) => name.split('/').pop()!, - ) - if (deploymentIds.length === 0) return 'healthy' + if (Object.keys(FIREWORKS_DEPLOYMENT_MAP).length === 0) return {} const { samples, newestTimestampMs } = parsePrometheus(body) @@ -104,27 +111,26 @@ async function probe(): Promise { { ageMs: Date.now() - newestTimestampMs }, '[FireworksHealth] unhealthy: metrics snapshot is stale', ) - return 'unhealthy' + return allDeploymentsAt('unhealthy') } - return classify(samples, deploymentIds) + const fleet: FleetHealth = {} + for (const [modelId, deploymentName] of Object.entries(FIREWORKS_DEPLOYMENT_MAP)) { + const deploymentId = deploymentName.split('/').pop()! + fleet[modelId] = classifyOne(samples, deploymentId) + } + return fleet } -/** Treat the whole fleet as degraded/unhealthy if any single deployment is. */ -export function classify( - samples: PromSample[], - deploymentIds: string[], -): FireworksHealth { - let worst: FireworksHealth = 'healthy' - for (const deploymentId of deploymentIds) { - const h = classifyOne(samples, deploymentId) - if (h === 'unhealthy') return 'unhealthy' - if (h === 'degraded') worst = 'degraded' +function allDeploymentsAt(health: FireworksHealth): FleetHealth { + const out: FleetHealth = {} + for (const modelId of Object.keys(FIREWORKS_DEPLOYMENT_MAP)) { + out[modelId] = health } - return worst + return out } -function classifyOne(samples: PromSample[], deploymentId: string): FireworksHealth { +export function classifyOne(samples: PromSample[], deploymentId: string): FireworksHealth { const kvBlocks = scalarFor( samples, 'generator_kv_blocks_fraction:avg_by_deployment', diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts index 68b86cf2be..93ddf7aa36 100644 --- a/web/src/server/free-session/store.ts +++ b/web/src/server/free-session/store.ts @@ -205,31 +205,31 @@ export async function sweepExpired(now: Date, graceMs: number): Promise /** * Atomically admit one queued user for a specific model, gated by the - * upstream health probe and guarded by an advisory xact lock so only one pod - * admits per tick (per model). + * upstream health for that model's deployment and guarded by an advisory + * xact lock so only one pod admits per tick (per model). * - * Each model has its own queue; this admits the longest-waiting user from the - * given model's queue. + * Each model has its own queue; this admits the longest-waiting user from + * the given model's queue. Health is passed in (resolved by the caller from + * a single fleet probe) rather than fetched here, so a slow probe doesn't + * hold a Postgres connection open. * * Return semantics: * - `{ admitted: [row], skipped: null }` — admitted one user * - `{ admitted: [], skipped: null }` — empty queue or another pod held the lock - * - `{ admitted: [], skipped: 'degraded' | 'unhealthy' }` — probe blocked admission + * - `{ admitted: [], skipped: 'degraded' | 'unhealthy' }` — health blocked admission * * Only `healthy` admits; `degraded` and `unhealthy` both pause admission (the * distinction is for observability — degraded means "upstream loaded", - * unhealthy means "upstream unreachable or saturated"). The probe runs before - * the transaction so a slow probe doesn't hold a Postgres connection open. + * unhealthy means "upstream unreachable or saturated"). */ export async function admitFromQueue(params: { model: string sessionLengthMs: number now: Date - getFireworksHealth: () => Promise + health: FireworksHealth }): Promise<{ admitted: InternalSessionRow[]; skipped: FireworksHealth | null }> { - const { model, sessionLengthMs, now, getFireworksHealth } = params + const { model, sessionLengthMs, now, health } = params - const health = await getFireworksHealth() if (health !== 'healthy') { return { admitted: [], skipped: health } } From 930f0f29b3fc17f5e24a4da0d5d133be3a9bbaac Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 20 Apr 2026 15:44:33 -0700 Subject: [PATCH 07/10] Address review nits: docstring, dead prop, stale docs, import order - Update switchFreebuffModel docstring to match the silent auto-revert the tick loop actually does (not the prompt-to-end-first flow it used to describe). - Remove the unused `disabled` prop on FreebuffModelSelector; the only caller never set it, so the component's internal `pending` state is the only disable path. - Refresh docs/freebuff-waiting-room.md for per-model queues: mermaid, schema (with the `model` column from migration 0044), admission loop (per-model advisory locks, getFleetHealth), tunables, POST semantics, model_locked response shape, and the CLI / multi-pod sections. - Group endAndRejoinFreebuffSession with the other ../hooks/* imports in command-registry.ts. --- cli/src/commands/command-registry.ts | 6 +- .../components/freebuff-model-selector.tsx | 21 +-- cli/src/hooks/use-freebuff-session.ts | 6 +- docs/freebuff-waiting-room.md | 121 +++++++++++------- 4 files changed, 89 insertions(+), 65 deletions(-) diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index 28ab22f590..5c7b639286 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -3,14 +3,14 @@ import { CLAUDE_OAUTH_ENABLED } from '@codebuff/common/constants/claude-oauth' import { safeOpen } from '../utils/open-url' import { handleAdsEnable, handleAdsDisable } from './ads' -import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders' -import { endAndRejoinFreebuffSession } from '../hooks/use-freebuff-session' -import { useThemeStore } from '../hooks/use-theme' import { handleHelpCommand } from './help' import { handleImageCommand } from './image' import { handleInitializationFlowLocally } from './init' +import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders' import { runBashCommand } from './router' import { handleUsageCommand } from './usage' +import { endAndRejoinFreebuffSession } from '../hooks/use-freebuff-session' +import { useThemeStore } from '../hooks/use-theme' import { WEBSITE_URL } from '../login/constants' import { useChatStore } from '../state/chat-store' import { useFeedbackStore } from '../state/feedback-store' diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx index c02c1d09e1..2a5ff1051a 100644 --- a/cli/src/components/freebuff-model-selector.tsx +++ b/cli/src/components/freebuff-model-selector.tsx @@ -11,21 +11,12 @@ import { useTheme } from '../hooks/use-theme' import type { KeyEvent } from '@opentui/core' -interface FreebuffModelSelectorProps { - /** Disables interaction while a switch / refresh is mid-flight so the user - * can't queue up a second switch and double-bounce themselves to the back - * of yet another queue. */ - disabled?: boolean -} - /** * Lets the user pick which model's queue they're in. Tapping (or pressing the * row's number key) on a different model triggers a re-POST: the server moves * them to the back of the new model's queue. */ -export const FreebuffModelSelector: React.FC = ({ - disabled = false, -}) => { +export const FreebuffModelSelector: React.FC = () => { const theme = useTheme() const selectedModel = useFreebuffModelStore((s) => s.selectedModel) const [pending, setPending] = useState(null) @@ -33,12 +24,12 @@ export const FreebuffModelSelector: React.FC = ({ const pick = useCallback( (modelId: string) => { - if (disabled || pending) return + if (pending) return if (modelId === selectedModel) return setPending(modelId) switchFreebuffModel(modelId).finally(() => setPending(null)) }, - [disabled, pending, selectedModel], + [pending, selectedModel], ) // Number-key shortcuts (1-9) so keyboard-only users can switch without @@ -46,7 +37,7 @@ export const FreebuffModelSelector: React.FC = ({ useKeyboard( useCallback( (key: KeyEvent) => { - if (disabled || pending) return + if (pending) return const name = key.name ?? '' if (!/^[1-9]$/.test(name)) return const digit = Number(name) @@ -57,7 +48,7 @@ export const FreebuffModelSelector: React.FC = ({ pick(target.id) } }, - [disabled, pending, pick, selectedModel], + [pending, pick, selectedModel], ), ) @@ -79,7 +70,7 @@ export const FreebuffModelSelector: React.FC = ({ const indicator = isSelected ? '●' : '○' const indicatorColor = isSelected ? theme.primary : theme.muted const labelColor = isSelected ? theme.foreground : theme.muted - const interactable = !disabled && !pending && !isSelected + const interactable = !pending && !isSelected return (