Skip to content

Commit fc9a76d

Browse files
[codex] Fallback lite GLM to standard Fireworks (#543)
Co-authored-by: James Grugett <jahooma@gmail.com>
1 parent 3276d9e commit fc9a76d

2 files changed

Lines changed: 159 additions & 8 deletions

File tree

web/src/llm-api/__tests__/fireworks-deployment.test.ts

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ describe('Fireworks deployment routing', () => {
8888
model: 'z-ai/glm-5.1',
8989
messages: [{ role: 'user' as const, content: 'test' }],
9090
}
91+
const liteBody = {
92+
...minimalBody,
93+
codebuff_metadata: { cost_mode: 'lite' },
94+
}
9195

9296
it('uses standard API when custom deployment is disabled', async () => {
9397
const fetchCalls: string[] = []
@@ -298,6 +302,29 @@ describe('Fireworks deployment routing', () => {
298302
expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
299303
})
300304

305+
it('falls back to the standard Fireworks API in lite mode outside deployment hours', async () => {
306+
const fetchCalls: string[] = []
307+
308+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
309+
const body = JSON.parse(init?.body as string)
310+
fetchCalls.push(body.model)
311+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
312+
}) as unknown as typeof globalThis.fetch
313+
314+
const response = await createFireworksRequestWithFallback({
315+
body: liteBody as never,
316+
originalModel: 'z-ai/glm-5.1',
317+
fetch: mockFetch,
318+
logger,
319+
useCustomDeployment: true,
320+
sessionId: 'test-user-id',
321+
now: BEFORE_DEPLOYMENT_HOURS,
322+
})
323+
324+
expect(response.status).toBe(200)
325+
expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
326+
})
327+
301328
it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => {
302329
const fetchCalls: string[] = []
303330

@@ -508,5 +535,92 @@ describe('Fireworks deployment routing', () => {
508535

509536
expect(logger.info).toHaveBeenCalledTimes(2)
510537
})
538+
539+
it('falls back to the standard Fireworks API in lite mode after deployment scaling 503', async () => {
540+
const fetchCalls: string[] = []
541+
542+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
543+
const body = JSON.parse(init?.body as string)
544+
fetchCalls.push(body.model)
545+
if (fetchCalls.length === 1) {
546+
return new Response(
547+
JSON.stringify({
548+
error: {
549+
message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.',
550+
code: 'DEPLOYMENT_SCALING_UP',
551+
type: 'error',
552+
},
553+
}),
554+
{ status: 503, statusText: 'Service Unavailable' },
555+
)
556+
}
557+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
558+
}) as unknown as typeof globalThis.fetch
559+
560+
const response = await createFireworksRequestWithFallback({
561+
body: liteBody as never,
562+
originalModel: 'z-ai/glm-5.1',
563+
fetch: mockFetch,
564+
logger,
565+
useCustomDeployment: true,
566+
sessionId: 'test-user-id',
567+
now: IN_DEPLOYMENT_HOURS,
568+
})
569+
570+
expect(response.status).toBe(200)
571+
expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID, STANDARD_MODEL_ID])
572+
expect(isDeploymentCoolingDown()).toBe(true)
573+
})
574+
575+
it('falls back to the standard Fireworks API in lite mode during deployment cooldown', async () => {
576+
markDeploymentScalingUp()
577+
578+
const fetchCalls: string[] = []
579+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
580+
const body = JSON.parse(init?.body as string)
581+
fetchCalls.push(body.model)
582+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
583+
}) as unknown as typeof globalThis.fetch
584+
585+
const response = await createFireworksRequestWithFallback({
586+
body: liteBody as never,
587+
originalModel: 'z-ai/glm-5.1',
588+
fetch: mockFetch,
589+
logger,
590+
useCustomDeployment: true,
591+
sessionId: 'test-user-id',
592+
now: IN_DEPLOYMENT_HOURS,
593+
})
594+
595+
expect(response.status).toBe(200)
596+
expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
597+
})
598+
599+
it('falls back to the standard Fireworks API in lite mode when the deployment request throws', async () => {
600+
const fetchCalls: string[] = []
601+
602+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
603+
const body = JSON.parse(init?.body as string)
604+
fetchCalls.push(body.model)
605+
if (fetchCalls.length === 1) {
606+
throw new Error('socket hang up')
607+
}
608+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
609+
}) as unknown as typeof globalThis.fetch
610+
611+
const response = await createFireworksRequestWithFallback({
612+
body: liteBody as never,
613+
originalModel: 'z-ai/glm-5.1',
614+
fetch: mockFetch,
615+
logger,
616+
useCustomDeployment: true,
617+
sessionId: 'test-user-id',
618+
now: IN_DEPLOYMENT_HOURS,
619+
})
620+
621+
expect(response.status).toBe(200)
622+
expect(fetchCalls).toEqual([DEPLOYMENT_MODEL_ID, STANDARD_MODEL_ID])
623+
expect(logger.warn).toHaveBeenCalledTimes(1)
624+
})
511625
})
512626
})

web/src/llm-api/fireworks.ts

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -725,8 +725,19 @@ export async function createFireworksRequestWithFallback(params: {
725725
const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT
726726
const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel]
727727
const hasDeployment = useCustomDeployment && Boolean(deploymentModelId)
728+
const shouldFallbackToStandardApi = body.codebuff_metadata?.cost_mode === 'lite'
729+
730+
const createStandardApiRequest = () =>
731+
createFireworksRequest({ body, originalModel, fetch, sessionId })
728732

729733
if (hasDeployment && !isDeploymentHours(now)) {
734+
if (shouldFallbackToStandardApi) {
735+
logger.info(
736+
{ model: originalModel },
737+
'Falling back to Fireworks standard API outside deployment hours',
738+
)
739+
return createStandardApiRequest()
740+
}
730741
return new Response(
731742
JSON.stringify({
732743
error: {
@@ -740,6 +751,13 @@ export async function createFireworksRequestWithFallback(params: {
740751
}
741752

742753
if (hasDeployment && isDeploymentCoolingDown()) {
754+
if (shouldFallbackToStandardApi) {
755+
logger.info(
756+
{ model: originalModel },
757+
'Falling back to Fireworks standard API during deployment cooldown',
758+
)
759+
return createStandardApiRequest()
760+
}
743761
return new Response(
744762
JSON.stringify({
745763
error: {
@@ -757,13 +775,25 @@ export async function createFireworksRequestWithFallback(params: {
757775
{ model: originalModel, deploymentModel: deploymentModelId },
758776
'Trying Fireworks custom deployment',
759777
)
760-
const response = await createFireworksRequest({
761-
body,
762-
originalModel,
763-
fetch,
764-
modelIdOverride: deploymentModelId,
765-
sessionId,
766-
})
778+
let response: Response
779+
try {
780+
response = await createFireworksRequest({
781+
body,
782+
originalModel,
783+
fetch,
784+
modelIdOverride: deploymentModelId,
785+
sessionId,
786+
})
787+
} catch (error) {
788+
if (shouldFallbackToStandardApi) {
789+
logger.warn(
790+
{ model: originalModel, error: getErrorObject(error) },
791+
'Fireworks custom deployment request failed, falling back to standard API',
792+
)
793+
return createStandardApiRequest()
794+
}
795+
throw error
796+
}
767797

768798
if (response.status >= 500) {
769799
const errorText = await response.text()
@@ -774,6 +804,13 @@ export async function createFireworksRequestWithFallback(params: {
774804
if (errorText.includes('DEPLOYMENT_SCALING_UP')) {
775805
markDeploymentScalingUp()
776806
}
807+
if (shouldFallbackToStandardApi) {
808+
logger.info(
809+
{ model: originalModel, status: response.status },
810+
'Falling back to Fireworks standard API after deployment 5xx',
811+
)
812+
return createStandardApiRequest()
813+
}
777814
return new Response(errorText, {
778815
status: response.status,
779816
statusText: response.statusText,
@@ -783,7 +820,7 @@ export async function createFireworksRequestWithFallback(params: {
783820
return response
784821
}
785822

786-
return createFireworksRequest({ body, originalModel, fetch, sessionId })
823+
return createStandardApiRequest()
787824
}
788825

789826
function creditsToFakeCost(credits: number): number {

0 commit comments

Comments
 (0)