@@ -88,6 +88,10 @@ describe('Fireworks deployment routing', () => {
8888 model : 'z-ai/glm-5.1' ,
8989 messages : [ { role : 'user' as const , content : 'test' } ] ,
9090 }
91+ const liteBody = {
92+ ...minimalBody ,
93+ codebuff_metadata : { cost_mode : 'lite' } ,
94+ }
9195
9296 it ( 'uses standard API when custom deployment is disabled' , async ( ) => {
9397 const fetchCalls : string [ ] = [ ]
@@ -298,6 +302,29 @@ describe('Fireworks deployment routing', () => {
298302 expect ( body . error . code ) . toBe ( 'DEPLOYMENT_OUTSIDE_HOURS' )
299303 } )
300304
305+ it ( 'falls back to the standard Fireworks API in lite mode outside deployment hours' , async ( ) => {
306+ const fetchCalls : string [ ] = [ ]
307+
308+ const mockFetch = mock ( async ( _url : string | URL | Request , init ?: RequestInit ) => {
309+ const body = JSON . parse ( init ?. body as string )
310+ fetchCalls . push ( body . model )
311+ return new Response ( JSON . stringify ( { ok : true } ) , { status : 200 } )
312+ } ) as unknown as typeof globalThis . fetch
313+
314+ const response = await createFireworksRequestWithFallback ( {
315+ body : liteBody as never ,
316+ originalModel : 'z-ai/glm-5.1' ,
317+ fetch : mockFetch ,
318+ logger,
319+ useCustomDeployment : true ,
320+ sessionId : 'test-user-id' ,
321+ now : BEFORE_DEPLOYMENT_HOURS ,
322+ } )
323+
324+ expect ( response . status ) . toBe ( 200 )
325+ expect ( fetchCalls ) . toEqual ( [ STANDARD_MODEL_ID ] )
326+ } )
327+
301328 it ( 'returns non-5xx responses from deployment without fallback (e.g. 429)' , async ( ) => {
302329 const fetchCalls : string [ ] = [ ]
303330
@@ -508,5 +535,92 @@ describe('Fireworks deployment routing', () => {
508535
509536 expect ( logger . info ) . toHaveBeenCalledTimes ( 2 )
510537 } )
538+
539+ it ( 'falls back to the standard Fireworks API in lite mode after deployment scaling 503' , async ( ) => {
540+ const fetchCalls : string [ ] = [ ]
541+
542+ const mockFetch = mock ( async ( _url : string | URL | Request , init ?: RequestInit ) => {
543+ const body = JSON . parse ( init ?. body as string )
544+ fetchCalls . push ( body . model )
545+ if ( fetchCalls . length === 1 ) {
546+ return new Response (
547+ JSON . stringify ( {
548+ error : {
549+ message : 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.' ,
550+ code : 'DEPLOYMENT_SCALING_UP' ,
551+ type : 'error' ,
552+ } ,
553+ } ) ,
554+ { status : 503 , statusText : 'Service Unavailable' } ,
555+ )
556+ }
557+ return new Response ( JSON . stringify ( { ok : true } ) , { status : 200 } )
558+ } ) as unknown as typeof globalThis . fetch
559+
560+ const response = await createFireworksRequestWithFallback ( {
561+ body : liteBody as never ,
562+ originalModel : 'z-ai/glm-5.1' ,
563+ fetch : mockFetch ,
564+ logger,
565+ useCustomDeployment : true ,
566+ sessionId : 'test-user-id' ,
567+ now : IN_DEPLOYMENT_HOURS ,
568+ } )
569+
570+ expect ( response . status ) . toBe ( 200 )
571+ expect ( fetchCalls ) . toEqual ( [ DEPLOYMENT_MODEL_ID , STANDARD_MODEL_ID ] )
572+ expect ( isDeploymentCoolingDown ( ) ) . toBe ( true )
573+ } )
574+
575+ it ( 'falls back to the standard Fireworks API in lite mode during deployment cooldown' , async ( ) => {
576+ markDeploymentScalingUp ( )
577+
578+ const fetchCalls : string [ ] = [ ]
579+ const mockFetch = mock ( async ( _url : string | URL | Request , init ?: RequestInit ) => {
580+ const body = JSON . parse ( init ?. body as string )
581+ fetchCalls . push ( body . model )
582+ return new Response ( JSON . stringify ( { ok : true } ) , { status : 200 } )
583+ } ) as unknown as typeof globalThis . fetch
584+
585+ const response = await createFireworksRequestWithFallback ( {
586+ body : liteBody as never ,
587+ originalModel : 'z-ai/glm-5.1' ,
588+ fetch : mockFetch ,
589+ logger,
590+ useCustomDeployment : true ,
591+ sessionId : 'test-user-id' ,
592+ now : IN_DEPLOYMENT_HOURS ,
593+ } )
594+
595+ expect ( response . status ) . toBe ( 200 )
596+ expect ( fetchCalls ) . toEqual ( [ STANDARD_MODEL_ID ] )
597+ } )
598+
599+ it ( 'falls back to the standard Fireworks API in lite mode when the deployment request throws' , async ( ) => {
600+ const fetchCalls : string [ ] = [ ]
601+
602+ const mockFetch = mock ( async ( _url : string | URL | Request , init ?: RequestInit ) => {
603+ const body = JSON . parse ( init ?. body as string )
604+ fetchCalls . push ( body . model )
605+ if ( fetchCalls . length === 1 ) {
606+ throw new Error ( 'socket hang up' )
607+ }
608+ return new Response ( JSON . stringify ( { ok : true } ) , { status : 200 } )
609+ } ) as unknown as typeof globalThis . fetch
610+
611+ const response = await createFireworksRequestWithFallback ( {
612+ body : liteBody as never ,
613+ originalModel : 'z-ai/glm-5.1' ,
614+ fetch : mockFetch ,
615+ logger,
616+ useCustomDeployment : true ,
617+ sessionId : 'test-user-id' ,
618+ now : IN_DEPLOYMENT_HOURS ,
619+ } )
620+
621+ expect ( response . status ) . toBe ( 200 )
622+ expect ( fetchCalls ) . toEqual ( [ DEPLOYMENT_MODEL_ID , STANDARD_MODEL_ID ] )
623+ expect ( logger . warn ) . toHaveBeenCalledTimes ( 1 )
624+ } )
511625 } )
512626} )
0 commit comments