Skip to content

Commit 469e998

Browse files
jahoomaclaude
andcommitted
Gate admission on per-deployment Fireworks health
Replace the fleet-wide worst-of collapse with a per-model map. One probe per tick still covers every deployment (Fireworks returns them in a single response), but each model's admission now uses its own deployment's verdict — a degraded minimax no longer blocks glm. Models absent from FIREWORKS_DEPLOYMENT_MAP (serverless) default to 'healthy'; TODO for when they move to dedicated deployments. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent a1b3b28 commit 469e998

5 files changed

Lines changed: 115 additions & 72 deletions

File tree

web/src/server/free-session/__tests__/admission.test.ts

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@ import { describe, expect, test } from 'bun:test'
33
import { runAdmissionTick } from '../admission'
44

55
import type { AdmissionDeps } from '../admission'
6-
import type { FireworksHealth } from '../fireworks-health'
6+
import type { FireworksHealth, FleetHealth } from '../fireworks-health'
77

88
const NOW = new Date('2026-04-17T12:00:00Z')
9+
const TEST_MODEL = 'test-model'
910

1011
function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDeps & {
1112
calls: { admit: number }
@@ -16,10 +17,9 @@ function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDep
1617
sweepExpired: async () => 0,
1718
queueDepth: async () => 0,
1819
activeCount: async () => 0,
19-
getFireworksHealth: async () => 'healthy',
20-
admitFromQueue: async ({ getFireworksHealth }) => {
20+
getFleetHealth: async () => ({}),
21+
admitFromQueue: async ({ health }) => {
2122
calls.admit += 1
22-
const health = await getFireworksHealth()
2323
if (health !== 'healthy') {
2424
return { admitted: [], skipped: health }
2525
}
@@ -30,12 +30,16 @@ function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDep
3030
now: () => NOW,
3131
// Default to a single model so per-tick assertions (admitted: 1) stay
3232
// crisp regardless of how many production models are registered.
33-
models: ['test-model'],
33+
models: [TEST_MODEL],
3434
...overrides,
3535
}
3636
return deps
3737
}
3838

39+
function fleet(health: FireworksHealth, model: string = TEST_MODEL): FleetHealth {
40+
return { [model]: health }
41+
}
42+
3943
describe('runAdmissionTick', () => {
4044
test('admits one user per tick when healthy', async () => {
4145
const deps = makeAdmissionDeps()
@@ -44,18 +48,18 @@ describe('runAdmissionTick', () => {
4448
expect(result.skipped).toBeNull()
4549
})
4650

47-
test('skips admission when Fireworks is degraded', async () => {
51+
test('skips admission when the model deployment is degraded', async () => {
4852
const deps = makeAdmissionDeps({
49-
getFireworksHealth: async () => 'degraded' as FireworksHealth,
53+
getFleetHealth: async () => fleet('degraded'),
5054
})
5155
const result = await runAdmissionTick(deps)
5256
expect(result.admitted).toBe(0)
5357
expect(result.skipped).toBe('degraded')
5458
})
5559

56-
test('skips admission when Fireworks is unhealthy', async () => {
60+
test('skips admission when the model deployment is unhealthy', async () => {
5761
const deps = makeAdmissionDeps({
58-
getFireworksHealth: async () => 'unhealthy' as FireworksHealth,
62+
getFleetHealth: async () => fleet('unhealthy'),
5963
})
6064
const result = await runAdmissionTick(deps)
6165
expect(result.admitted).toBe(0)
@@ -69,13 +73,38 @@ describe('runAdmissionTick', () => {
6973
swept = 3
7074
return 3
7175
},
72-
getFireworksHealth: async () => 'unhealthy' as FireworksHealth,
76+
getFleetHealth: async () => fleet('unhealthy'),
7377
})
7478
const result = await runAdmissionTick(deps)
7579
expect(swept).toBe(3)
7680
expect(result.expired).toBe(3)
7781
})
7882

83+
test('admits per-model based on per-deployment health', async () => {
84+
// Two models: 'good' is healthy, 'bad' is degraded. A single tick should
85+
// admit 1 from 'good' and skip 'bad', surfacing the worst skip reason.
86+
const deps = makeAdmissionDeps({
87+
models: ['good', 'bad'],
88+
getFleetHealth: async () => ({ good: 'healthy', bad: 'degraded' }),
89+
})
90+
const result = await runAdmissionTick(deps)
91+
expect(result.admitted).toBe(1)
92+
expect(result.skipped).toBe('degraded')
93+
})
94+
95+
test('absent fleet entry defaults to healthy (serverless model)', async () => {
96+
// Model isn't in the fleet map (e.g. served via Fireworks serverless).
97+
// Admission should proceed rather than stall waiting for a probe that
98+
// will never include this deployment.
99+
const deps = makeAdmissionDeps({
100+
models: ['serverless-model'],
101+
getFleetHealth: async () => ({}),
102+
})
103+
const result = await runAdmissionTick(deps)
104+
expect(result.admitted).toBe(1)
105+
expect(result.skipped).toBeNull()
106+
})
107+
79108
test('propagates expiry count and admit count together', async () => {
80109
const deps = makeAdmissionDeps({
81110
sweepExpired: async () => 2,

web/src/server/free-session/__tests__/fireworks-health.test.ts

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import {
44
KV_BLOCKS_DEGRADED_FRACTION,
55
KV_BLOCKS_UNHEALTHY_FRACTION,
66
PREFILL_QUEUE_P90_DEGRADED_MS,
7-
classify,
7+
classifyOne,
88
} from '../fireworks-health'
99

1010
type PromSample = { name: string; labels: Record<string, string>; value: number }
@@ -57,31 +57,31 @@ function errors(code: string, rate: number): PromSample {
5757
describe('fireworks health classifier', () => {
5858
test('healthy when queue well under the threshold', () => {
5959
const samples: PromSample[] = [kvBlocks(0.5), ...prefillQueueBuckets(150)]
60-
expect(classify(samples, [DEPLOY])).toBe('healthy')
60+
expect(classifyOne(samples, DEPLOY)).toBe('healthy')
6161
})
6262

6363
test('degraded when prefill queue p90 exceeds the threshold', () => {
6464
const samples: PromSample[] = [
6565
kvBlocks(0.5),
6666
...prefillQueueBuckets(PREFILL_QUEUE_P90_DEGRADED_MS + 500),
6767
]
68-
expect(classify(samples, [DEPLOY])).toBe('degraded')
68+
expect(classifyOne(samples, DEPLOY)).toBe('degraded')
6969
})
7070

7171
test('degraded when KV blocks cross the soft threshold (leading indicator)', () => {
7272
const samples: PromSample[] = [
7373
kvBlocks(KV_BLOCKS_DEGRADED_FRACTION + 0.01),
7474
...prefillQueueBuckets(300),
7575
]
76-
expect(classify(samples, [DEPLOY])).toBe('degraded')
76+
expect(classifyOne(samples, DEPLOY)).toBe('degraded')
7777
})
7878

7979
test('unhealthy when KV blocks exceed the backstop', () => {
8080
const samples: PromSample[] = [
8181
kvBlocks(KV_BLOCKS_UNHEALTHY_FRACTION + 0.005),
8282
...prefillQueueBuckets(300),
8383
]
84-
expect(classify(samples, [DEPLOY])).toBe('unhealthy')
84+
expect(classifyOne(samples, DEPLOY)).toBe('unhealthy')
8585
})
8686

8787
test('unhealthy when 5xx error fraction exceeds the threshold', () => {
@@ -91,7 +91,7 @@ describe('fireworks health classifier', () => {
9191
requests(1),
9292
errors('500', 0.2),
9393
]
94-
expect(classify(samples, [DEPLOY])).toBe('unhealthy')
94+
expect(classifyOne(samples, DEPLOY)).toBe('unhealthy')
9595
})
9696

9797
test('ignores high error fraction when traffic is too low to be meaningful', () => {
@@ -101,14 +101,17 @@ describe('fireworks health classifier', () => {
101101
requests(0.05),
102102
errors('500', 0.05),
103103
]
104-
expect(classify(samples, [DEPLOY])).toBe('healthy')
104+
expect(classifyOne(samples, DEPLOY)).toBe('healthy')
105105
})
106106

107107
test('healthy with no data yet (new deployment, no events)', () => {
108-
expect(classify([], [DEPLOY])).toBe('healthy')
108+
expect(classifyOne([], DEPLOY)).toBe('healthy')
109109
})
110110

111-
test('worst-of across multiple deployments — unhealthy wins over degraded', () => {
111+
test('classifies deployments independently — one bad deployment does not affect another', () => {
112+
// The fleet probe builds the result by classifying each deployment
113+
// separately, so a saturated 'other' deployment leaves DEPLOY's
114+
// (only-degraded) verdict intact.
112115
const other = 'other123'
113116
const samples: PromSample[] = [
114117
kvBlocks(0.5),
@@ -119,6 +122,7 @@ describe('fireworks health classifier', () => {
119122
value: KV_BLOCKS_UNHEALTHY_FRACTION + 0.005,
120123
},
121124
]
122-
expect(classify(samples, [DEPLOY, other])).toBe('unhealthy')
125+
expect(classifyOne(samples, DEPLOY)).toBe('degraded')
126+
expect(classifyOne(samples, other)).toBe('unhealthy')
123127
})
124128
})

web/src/server/free-session/admission.ts

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ import {
66
getSessionLengthMs,
77
isWaitingRoomEnabled,
88
} from './config'
9-
import { getFireworksHealth } from './fireworks-health'
9+
import { getFleetHealth } from './fireworks-health'
1010
import { activeCount, admitFromQueue, queueDepth, sweepExpired } from './store'
1111

12-
import type { FireworksHealth } from './fireworks-health'
12+
import type { FireworksHealth, FleetHealth } from './fireworks-health'
1313

1414
import { logger } from '@/util/logger'
1515

@@ -21,9 +21,9 @@ export interface AdmissionDeps {
2121
model: string
2222
sessionLengthMs: number
2323
now: Date
24-
getFireworksHealth: () => Promise<FireworksHealth>
24+
health: FireworksHealth
2525
}) => Promise<{ admitted: { user_id: string }[]; skipped: FireworksHealth | null }>
26-
getFireworksHealth: () => Promise<FireworksHealth>
26+
getFleetHealth: () => Promise<FleetHealth>
2727
/** Plain values, not thunks — these never change at runtime. */
2828
sessionLengthMs: number
2929
graceMs: number
@@ -38,11 +38,13 @@ const defaultDeps: AdmissionDeps = {
3838
activeCount,
3939
admitFromQueue,
4040
// FREEBUFF_DEV_FORCE_ADMIT lets local `dev:freebuff` drive the full
41-
// waiting-room → admitted → ended flow without a real upstream.
42-
getFireworksHealth:
41+
// waiting-room → admitted → ended flow without a real upstream. Returning
42+
// an empty fleet means every model resolves to the absence-default of
43+
// 'healthy' below.
44+
getFleetHealth:
4345
process.env.FREEBUFF_DEV_FORCE_ADMIT === 'true'
44-
? async () => 'healthy'
45-
: getFireworksHealth,
46+
? async () => ({})
47+
: getFleetHealth,
4648
get sessionLengthMs() {
4749
return getSessionLengthMs()
4850
},
@@ -81,21 +83,23 @@ export async function runAdmissionTick(
8183

8284
const models = deps.models ?? FREEBUFF_MODELS.map((m) => m.id)
8385

84-
// Probe upstream health once per tick. Today every model shares a Fireworks
85-
// deployment so a single probe gates them all — TODO: when we add a
86-
// non-Fireworks model, plumb a model/deploymentId into the probe.
87-
const health = await deps.getFireworksHealth()
88-
const sharedHealth = async () => health
86+
// One probe per tick covers every model — the Fireworks metrics endpoint
87+
// returns all deployments in a single response. Models without a dedicated
88+
// deployment (e.g. serverless) aren't in the map; treat their absence as
89+
// 'healthy' so admission continues. TODO: when those models move to their
90+
// own deployments, drop the absence-default and require an explicit entry.
91+
const fleet = await deps.getFleetHealth()
8992

9093
// Run per-model admission in parallel — they only contend on independent
9194
// advisory locks and a single update each.
9295
const perModel = await Promise.all(
9396
models.map(async (model) => {
97+
const health = fleet[model] ?? 'healthy'
9498
const { admitted, skipped } = await deps.admitFromQueue({
9599
model,
96100
sessionLengthMs: deps.sessionLengthMs,
97101
now,
98-
getFireworksHealth: sharedHealth,
102+
health,
99103
})
100104
const depth = await deps.queueDepth({ model })
101105
return { model, admittedCount: admitted.length, depth, skipped }

web/src/server/free-session/fireworks-health.ts

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -52,25 +52,35 @@ const HEALTH_CHECK_TIMEOUT_MS = 5_000
5252
* pod hits the endpoint at most ~2.4/min. */
5353
const HEALTH_CACHE_TTL_MS = 25_000
5454

55-
type CacheEntry = { expiresAt: number; health: FireworksHealth }
55+
/** Map of model id → FireworksHealth. Only includes models that have a
56+
* dedicated Fireworks deployment in `FIREWORKS_DEPLOYMENT_MAP`. Models served
57+
* via the Fireworks serverless API (no deployment id) are not present —
58+
* callers should treat their absence as 'healthy' for now.
59+
* TODO: when serverless models move to dedicated deployments, drop the
60+
* absence-means-healthy fallback at the call site. */
61+
export type FleetHealth = Record<string, FireworksHealth>
62+
63+
type CacheEntry = { expiresAt: number; fleet: FleetHealth }
5664
let cache: CacheEntry | null = null
5765

5866
export function __resetFireworksHealthCacheForTests(): void {
5967
cache = null
6068
}
6169

62-
export async function getFireworksHealth(): Promise<FireworksHealth> {
70+
export async function getFleetHealth(): Promise<FleetHealth> {
6371
const now = Date.now()
64-
if (cache && cache.expiresAt > now) return cache.health
72+
if (cache && cache.expiresAt > now) return cache.fleet
6573

66-
const health = await probe()
67-
cache = { expiresAt: now + HEALTH_CACHE_TTL_MS, health }
68-
return health
74+
const fleet = await probe()
75+
cache = { expiresAt: now + HEALTH_CACHE_TTL_MS, fleet }
76+
return fleet
6977
}
7078

71-
async function probe(): Promise<FireworksHealth> {
79+
async function probe(): Promise<FleetHealth> {
7280
const apiKey = env.FIREWORKS_API_KEY
73-
if (!apiKey) return 'unhealthy'
81+
// Mark every deployment-mapped model unhealthy when we can't authenticate
82+
// the probe. Serverless models (absent from the map) keep their default.
83+
if (!apiKey) return allDeploymentsAt('unhealthy')
7484

7585
const controller = new AbortController()
7686
const timeout = setTimeout(() => controller.abort(), HEALTH_CHECK_TIMEOUT_MS)
@@ -81,18 +91,15 @@ async function probe(): Promise<FireworksHealth> {
8191
headers: { Authorization: `Bearer ${apiKey}` },
8292
signal: controller.signal,
8393
})
84-
if (!response.ok) return 'unhealthy'
94+
if (!response.ok) return allDeploymentsAt('unhealthy')
8595
body = await response.text()
8696
} catch {
87-
return 'unhealthy'
97+
return allDeploymentsAt('unhealthy')
8898
} finally {
8999
clearTimeout(timeout)
90100
}
91101

92-
const deploymentIds = Object.values(FIREWORKS_DEPLOYMENT_MAP).map(
93-
(name) => name.split('/').pop()!,
94-
)
95-
if (deploymentIds.length === 0) return 'healthy'
102+
if (Object.keys(FIREWORKS_DEPLOYMENT_MAP).length === 0) return {}
96103

97104
const { samples, newestTimestampMs } = parsePrometheus(body)
98105

@@ -104,27 +111,26 @@ async function probe(): Promise<FireworksHealth> {
104111
{ ageMs: Date.now() - newestTimestampMs },
105112
'[FireworksHealth] unhealthy: metrics snapshot is stale',
106113
)
107-
return 'unhealthy'
114+
return allDeploymentsAt('unhealthy')
108115
}
109116

110-
return classify(samples, deploymentIds)
117+
const fleet: FleetHealth = {}
118+
for (const [modelId, deploymentName] of Object.entries(FIREWORKS_DEPLOYMENT_MAP)) {
119+
const deploymentId = deploymentName.split('/').pop()!
120+
fleet[modelId] = classifyOne(samples, deploymentId)
121+
}
122+
return fleet
111123
}
112124

113-
/** Treat the whole fleet as degraded/unhealthy if any single deployment is. */
114-
export function classify(
115-
samples: PromSample[],
116-
deploymentIds: string[],
117-
): FireworksHealth {
118-
let worst: FireworksHealth = 'healthy'
119-
for (const deploymentId of deploymentIds) {
120-
const h = classifyOne(samples, deploymentId)
121-
if (h === 'unhealthy') return 'unhealthy'
122-
if (h === 'degraded') worst = 'degraded'
125+
function allDeploymentsAt(health: FireworksHealth): FleetHealth {
126+
const out: FleetHealth = {}
127+
for (const modelId of Object.keys(FIREWORKS_DEPLOYMENT_MAP)) {
128+
out[modelId] = health
123129
}
124-
return worst
130+
return out
125131
}
126132

127-
function classifyOne(samples: PromSample[], deploymentId: string): FireworksHealth {
133+
export function classifyOne(samples: PromSample[], deploymentId: string): FireworksHealth {
128134
const kvBlocks = scalarFor(
129135
samples,
130136
'generator_kv_blocks_fraction:avg_by_deployment',

0 commit comments

Comments
 (0)