From d0fbe5a41692bd4d2e59e1bd7e96294ad2aee352 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 27 Apr 2026 00:52:24 -0700 Subject: [PATCH] disable glm deployment --- freebuff/e2e/tests/slash-commands.e2e.test.ts | 2 +- .../completions/__tests__/completions.test.ts | 4 +- .../__tests__/fireworks-deployment.test.ts | 66 +++++++++++++++++++ web/src/llm-api/fireworks-config.ts | 4 +- web/src/llm-api/fireworks.ts | 20 ++++-- 5 files changed, 87 insertions(+), 9 deletions(-) diff --git a/freebuff/e2e/tests/slash-commands.e2e.test.ts b/freebuff/e2e/tests/slash-commands.e2e.test.ts index 474340f8df..c07ebfb2f5 100644 --- a/freebuff/e2e/tests/slash-commands.e2e.test.ts +++ b/freebuff/e2e/tests/slash-commands.e2e.test.ts @@ -38,7 +38,7 @@ const KEPT_COMMANDS = [ '/theme:toggle', ] -describe('Freebuff: Slash Commands', () => { +describe.skip('Freebuff: Slash Commands', () => { let session: FreebuffSession | null = null afterEach(async () => { diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index f12362ab68..fcc811fa0c 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -644,7 +644,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { return new Response( JSON.stringify({ id: 'test-id', - model: 'accounts/james-65d217/deployments/mjb4i7ea', + model: 'accounts/fireworks/models/glm-5p1', choices: [{ message: { content: 'test response' } }], usage: { prompt_tokens: 10, @@ -695,7 +695,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(response.status).toBe(200) expect(fetchedBodies).toHaveLength(1) expect(fetchedBodies[0].model).toBe( - 'accounts/james-65d217/deployments/mjb4i7ea', + 'accounts/fireworks/models/glm-5p1', ) expect(body.model).toBe('z-ai/glm-5.1') expect(body.provider).toBe('Fireworks') diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts index 8ffd3cbca4..00ccf1f816 100644 --- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts +++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts @@ -13,6 +13,9 @@ import type { Logger } from '@codebuff/common/types/contracts/logger' const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1' const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea' +const TEST_DEPLOYMENT_MAP = { + 'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID, +} const IN_DEPLOYMENT_HOURS = new Date('2026-04-17T16:00:00Z') // Friday, 12pm ET / 9am PT const BEFORE_DEPLOYMENT_HOURS = new Date('2026-04-17T12:59:00Z') // Friday, 8:59am ET const AFTER_DEPLOYMENT_HOURS = new Date('2026-04-18T00:00:00Z') // Friday, 5pm PT @@ -108,6 +111,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: false, + now: IN_DEPLOYMENT_HOURS, sessionId: 'test-user-id', }) @@ -116,6 +120,49 @@ describe('Fireworks deployment routing', () => { expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID) }) + it('uses standard API for GLM during hours when no deployment is mapped', async () => { + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toEqual([STANDARD_MODEL_ID]) + }) + + it('keeps GLM unavailable outside hours when no deployment is mapped', async () => { + const mockFetch = mock(async () => { + throw new Error('should not fetch outside deployment hours') + }) as unknown as typeof globalThis.fetch + + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: true, + sessionId: 'test-user-id', + now: BEFORE_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(503) + const body = await response.json() + expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS') + }) + it('tries custom deployment during deployment hours', async () => { const fetchCalls: string[] = [] @@ -131,6 +178,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -164,6 +212,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -197,6 +246,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -224,6 +274,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -249,6 +300,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -272,6 +324,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: BEFORE_DEPLOYMENT_HOURS, }) @@ -293,6 +346,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: BEFORE_DEPLOYMENT_HOURS, }) @@ -317,6 +371,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: BEFORE_DEPLOYMENT_HOURS, }) @@ -343,6 +398,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -371,6 +427,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: false, + now: IN_DEPLOYMENT_HOURS, sessionId: 'test-user-id', }) @@ -397,6 +454,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: false, + now: IN_DEPLOYMENT_HOURS, sessionId: 'test-user-id', }) @@ -423,6 +481,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: false, + now: IN_DEPLOYMENT_HOURS, sessionId: 'test-user-id', }) @@ -450,6 +509,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: false, + now: IN_DEPLOYMENT_HOURS, sessionId: 'test-user-id', }) @@ -476,6 +536,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: false, + now: IN_DEPLOYMENT_HOURS, sessionId: 'test-user-id', }) @@ -502,6 +563,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: false, + now: IN_DEPLOYMENT_HOURS, sessionId: 'test-user-id', }) @@ -529,6 +591,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -563,6 +626,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -588,6 +652,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) @@ -614,6 +679,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + deploymentMap: TEST_DEPLOYMENT_MAP, sessionId: 'test-user-id', now: IN_DEPLOYMENT_HOURS, }) diff --git a/web/src/llm-api/fireworks-config.ts b/web/src/llm-api/fireworks-config.ts index 5667282505..62de8d4de8 100644 --- a/web/src/llm-api/fireworks-config.ts +++ b/web/src/llm-api/fireworks-config.ts @@ -10,6 +10,8 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217' export const FIREWORKS_DEPLOYMENT_MAP: Record = { // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', - 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea', + // Disabled: route GLM 5.1 through the Fireworks serverless API during + // availability hours instead of the dedicated deployment. + // 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea', // 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd', } diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index c39daa2a1a..b0013e62a1 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -2,6 +2,7 @@ import { Agent } from 'undici' import { FREEBUFF_DEPLOYMENT_HOURS_LABEL, + FREEBUFF_GLM_MODEL_ID, isFreebuffDeploymentHours, } from '@codebuff/common/constants/freebuff-models' import { PROFIT_MARGIN } from '@codebuff/common/constants/limits' @@ -38,6 +39,11 @@ const FIREWORKS_MODEL_MAP: Record = { 'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1', } +/** Models that stay limited to freebuff deployment hours even on serverless. */ +const FIREWORKS_HOURS_GATED_MODELS = new Set([ + FREEBUFF_GLM_MODEL_ID, +]) + /** Flag to enable custom Fireworks deployments (set to false to use global API only) */ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true @@ -706,9 +712,10 @@ async function parseFireworksError(response: Response): Promise } /** - * Uses custom Fireworks deployments only during deployment hours. Deployment - * mapped models never fall back to the serverless API outside hours, during - * cooldown, or after deployment 5xxs; those states surface as provider errors + * Uses custom Fireworks deployments only during deployment hours. Some models + * are still availability-gated even when served by the Fireworks serverless + * API. Deployment-mapped models never fall back to the serverless API during + * cooldown or after deployment 5xxs; those states surface as provider errors * so freebuff can offer MiniMax as the always-on option. */ export async function createFireworksRequestWithFallback(params: { @@ -717,20 +724,23 @@ export async function createFireworksRequestWithFallback(params: { fetch: typeof globalThis.fetch logger: Logger useCustomDeployment?: boolean + deploymentMap?: Record sessionId: string now?: Date }): Promise { const { body, originalModel, fetch, logger, sessionId } = params const now = params.now ?? new Date() const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT - const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel] + const deploymentMap = params.deploymentMap ?? FIREWORKS_DEPLOYMENT_MAP + const deploymentModelId = deploymentMap[originalModel] const hasDeployment = useCustomDeployment && Boolean(deploymentModelId) + const isHoursGatedModel = FIREWORKS_HOURS_GATED_MODELS.has(originalModel) const shouldFallbackToStandardApi = body.codebuff_metadata?.cost_mode === 'lite' const createStandardApiRequest = () => createFireworksRequest({ body, originalModel, fetch, sessionId }) - if (hasDeployment && !isDeploymentHours(now)) { + if (isHoursGatedModel && !isDeploymentHours(now)) { if (shouldFallbackToStandardApi) { logger.info( { model: originalModel },