From d53ece6b9d8da282c76089d52ee202e2de608214 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Tue, 16 Sep 2025 06:33:58 +0000 Subject: [PATCH 1/2] fix: apply tiered pricing for Gemini models via Vertex AI - Modified calculateCost method to handle models where cacheReadsPrice is only defined in tiers - Added comprehensive tests for Vertex AI tiered pricing calculation - Fixes issue where local cost calculation always showed highest tier rates Fixes #8017 --- .../__tests__/vertex-tiered-pricing.spec.ts | 154 ++++++++++++++++++ src/api/providers/gemini.ts | 15 +- 2 files changed, 165 insertions(+), 4 deletions(-) create mode 100644 src/api/providers/__tests__/vertex-tiered-pricing.spec.ts diff --git a/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts b/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts new file mode 100644 index 00000000000..827b6c67350 --- /dev/null +++ b/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts @@ -0,0 +1,154 @@ +// npx vitest run src/api/providers/__tests__/vertex-tiered-pricing.spec.ts + +import { type ModelInfo, vertexModels } from "@roo-code/types" +import { VertexHandler } from "../vertex" + +describe("VertexHandler Tiered Pricing", () => { + let handler: VertexHandler + + beforeEach(() => { + handler = new VertexHandler({ + apiModelId: "gemini-2.5-pro", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + }) + }) + + describe("calculateCost with tiered pricing", () => { + it("should apply lower tier pricing for tokens under 200K", () => { + const modelInfo = handler.getModel() + const inputTokens = 100_000 // Under 200K threshold + const outputTokens = 50_000 + + // According to the tiers in vertex.ts for gemini-2.5-pro: + // First tier (up to 200K): input $1.25/M, output $10/M + const expectedInputCost = (inputTokens / 1_000_000) * 1.25 + const expectedOutputCost = (outputTokens / 1_000_000) * 10 + const expectedTotalCost = expectedInputCost + expectedOutputCost + + const cost = handler.calculateCost({ + info: modelInfo.info, + inputTokens, + outputTokens, + }) + + expect(cost).toBeCloseTo(expectedTotalCost, 6) + // Verify it's using tier 1 pricing, not the default higher pricing + expect(cost).toBeLessThan((inputTokens / 1_000_000) * 2.5 + (outputTokens / 1_000_000) * 15) + }) + + it("should apply higher tier pricing for tokens over 200K", () => { + const modelInfo = handler.getModel() + const inputTokens = 300_000 // Over 200K threshold + const outputTokens = 100_000 + + // According to the tiers in vertex.ts for gemini-2.5-pro: + // Second tier (over 200K): input $2.5/M, output $15/M + const expectedInputCost = (inputTokens / 1_000_000) * 2.5 + const expectedOutputCost = (outputTokens / 1_000_000) * 15 + const expectedTotalCost = expectedInputCost + expectedOutputCost + + const cost = handler.calculateCost({ + info: modelInfo.info, + inputTokens, + outputTokens, + }) + + expect(cost).toBeCloseTo(expectedTotalCost, 6) + }) + + it("should apply cache read pricing based on tier", () => { + const modelInfo = handler.getModel() + const inputTokens = 150_000 // Under 200K - should use tier 1 + const outputTokens = 50_000 + const cacheReadTokens = 100_000 + + // First tier cache reads: $0.31/M + const uncachedInputTokens = inputTokens - cacheReadTokens + const expectedInputCost = (uncachedInputTokens / 1_000_000) * 1.25 + const expectedOutputCost = (outputTokens / 1_000_000) * 10 + const expectedCacheReadCost = (cacheReadTokens / 1_000_000) * 0.31 + const expectedTotalCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost + + const cost = handler.calculateCost({ + info: modelInfo.info, + inputTokens, + outputTokens, + cacheReadTokens, + }) + + expect(cost).toBeCloseTo(expectedTotalCost, 6) + }) + + it("should apply cache read pricing for higher tier", () => { + const modelInfo = handler.getModel() + const inputTokens = 400_000 // Over 200K - should use tier 2 + const outputTokens = 100_000 + const cacheReadTokens = 200_000 + + // Second tier cache reads: $0.625/M + const uncachedInputTokens = inputTokens - cacheReadTokens + const expectedInputCost = (uncachedInputTokens / 1_000_000) * 2.5 + const expectedOutputCost = (outputTokens / 1_000_000) * 15 + const expectedCacheReadCost = (cacheReadTokens / 1_000_000) * 0.625 + const expectedTotalCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost + + const cost = handler.calculateCost({ + info: modelInfo.info, + inputTokens, + outputTokens, + cacheReadTokens, + }) + + expect(cost).toBeCloseTo(expectedTotalCost, 6) + }) + + it("should return model info with tiers property", () => { + const modelInfo = handler.getModel() + + // Verify the model info has tiers defined + expect(modelInfo.info.tiers).toBeDefined() + expect(modelInfo.info.tiers).toHaveLength(2) + + // Verify tier 1 (up to 200K) + expect(modelInfo.info.tiers![0].contextWindow).toBe(200_000) + expect(modelInfo.info.tiers![0].inputPrice).toBe(1.25) + expect(modelInfo.info.tiers![0].outputPrice).toBe(10) + expect(modelInfo.info.tiers![0].cacheReadsPrice).toBe(0.31) + + // Verify tier 2 (over 200K) + expect(modelInfo.info.tiers![1].contextWindow).toBe(Infinity) + expect(modelInfo.info.tiers![1].inputPrice).toBe(2.5) + expect(modelInfo.info.tiers![1].outputPrice).toBe(15) + expect(modelInfo.info.tiers![1].cacheReadsPrice).toBe(0.625) + }) + }) + + describe("models without tiered pricing", () => { + it("should use flat pricing for models without tiers", () => { + const handlerFlat = new VertexHandler({ + apiModelId: "gemini-2.5-flash", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + }) + + const modelInfo = handlerFlat.getModel() + const inputTokens = 100_000 + const outputTokens = 50_000 + + // gemini-2.5-flash has flat pricing: input $0.3/M, output $2.5/M + const expectedInputCost = (inputTokens / 1_000_000) * 0.3 + const expectedOutputCost = (outputTokens / 1_000_000) * 2.5 + const expectedTotalCost = expectedInputCost + expectedOutputCost + + const cost = handlerFlat.calculateCost({ + info: modelInfo.info, + inputTokens, + outputTokens, + }) + + expect(cost).toBeCloseTo(expectedTotalCost, 6) + expect(modelInfo.info.tiers).toBeUndefined() + }) + }) +}) diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index 775d763a05f..573adda879e 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -286,10 +286,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl outputTokens: number cacheReadTokens?: number }) { - if (!info.inputPrice || !info.outputPrice || !info.cacheReadsPrice) { - return undefined - } - + // For models with tiered pricing, prices might only be defined in tiers let inputPrice = info.inputPrice let outputPrice = info.outputPrice let cacheReadsPrice = info.cacheReadsPrice @@ -306,6 +303,16 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl } } + // Check if we have the required prices after considering tiers + if (!inputPrice || !outputPrice) { + return undefined + } + + // cacheReadsPrice is optional - if not defined, treat as 0 + if (!cacheReadsPrice) { + cacheReadsPrice = 0 + } + // Subtract the cached input tokens from the total input tokens. const uncachedInputTokens = inputTokens - cacheReadTokens From d99d7305d2882ab56c4092274660baa69a0a63fe Mon Sep 17 00:00:00 2001 From: Daniel <57051444+daniel-lxs@users.noreply.github.com> Date: Wed, 17 Sep 2025 18:17:36 -0500 Subject: [PATCH 2/2] Delete src/api/providers/__tests__/vertex-tiered-pricing.spec.ts --- .../__tests__/vertex-tiered-pricing.spec.ts | 154 ------------------ 1 file changed, 154 deletions(-) delete mode 100644 src/api/providers/__tests__/vertex-tiered-pricing.spec.ts diff --git a/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts b/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts deleted file mode 100644 index 827b6c67350..00000000000 --- a/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts +++ /dev/null @@ -1,154 +0,0 @@ -// npx vitest run src/api/providers/__tests__/vertex-tiered-pricing.spec.ts - -import { type ModelInfo, vertexModels } from "@roo-code/types" -import { VertexHandler } from "../vertex" - -describe("VertexHandler Tiered Pricing", () => { - let handler: VertexHandler - - beforeEach(() => { - handler = new VertexHandler({ - apiModelId: "gemini-2.5-pro", - vertexProjectId: "test-project", - vertexRegion: "us-central1", - }) - }) - - describe("calculateCost with tiered pricing", () => { - it("should apply lower tier pricing for tokens under 200K", () => { - const modelInfo = handler.getModel() - const inputTokens = 100_000 // Under 200K threshold - const outputTokens = 50_000 - - // According to the tiers in vertex.ts for gemini-2.5-pro: - // First tier (up to 200K): input $1.25/M, output $10/M - const expectedInputCost = (inputTokens / 1_000_000) * 1.25 - const expectedOutputCost = (outputTokens / 1_000_000) * 10 - const expectedTotalCost = expectedInputCost + expectedOutputCost - - const cost = handler.calculateCost({ - info: modelInfo.info, - inputTokens, - outputTokens, - }) - - expect(cost).toBeCloseTo(expectedTotalCost, 6) - // Verify it's using tier 1 pricing, not the default higher pricing - expect(cost).toBeLessThan((inputTokens / 1_000_000) * 2.5 + (outputTokens / 1_000_000) * 15) - }) - - it("should apply higher tier pricing for tokens over 200K", () => { - const modelInfo = handler.getModel() - const inputTokens = 300_000 // Over 200K threshold - const outputTokens = 100_000 - - // According to the tiers in vertex.ts for gemini-2.5-pro: - // Second tier (over 200K): input $2.5/M, output $15/M - const expectedInputCost = (inputTokens / 1_000_000) * 2.5 - const expectedOutputCost = (outputTokens / 1_000_000) * 15 - const expectedTotalCost = expectedInputCost + expectedOutputCost - - const cost = handler.calculateCost({ - info: modelInfo.info, - inputTokens, - outputTokens, - }) - - expect(cost).toBeCloseTo(expectedTotalCost, 6) - }) - - it("should apply cache read pricing based on tier", () => { - const modelInfo = handler.getModel() - const inputTokens = 150_000 // Under 200K - should use tier 1 - const outputTokens = 50_000 - const cacheReadTokens = 100_000 - - // First tier cache reads: $0.31/M - const uncachedInputTokens = inputTokens - cacheReadTokens - const expectedInputCost = (uncachedInputTokens / 1_000_000) * 1.25 - const expectedOutputCost = (outputTokens / 1_000_000) * 10 - const expectedCacheReadCost = (cacheReadTokens / 1_000_000) * 0.31 - const expectedTotalCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost - - const cost = handler.calculateCost({ - info: modelInfo.info, - inputTokens, - outputTokens, - cacheReadTokens, - }) - - expect(cost).toBeCloseTo(expectedTotalCost, 6) - }) - - it("should apply cache read pricing for higher tier", () => { - const modelInfo = handler.getModel() - const inputTokens = 400_000 // Over 200K - should use tier 2 - const outputTokens = 100_000 - const cacheReadTokens = 200_000 - - // Second tier cache reads: $0.625/M - const uncachedInputTokens = inputTokens - cacheReadTokens - const expectedInputCost = (uncachedInputTokens / 1_000_000) * 2.5 - const expectedOutputCost = (outputTokens / 1_000_000) * 15 - const expectedCacheReadCost = (cacheReadTokens / 1_000_000) * 0.625 - const expectedTotalCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost - - const cost = handler.calculateCost({ - info: modelInfo.info, - inputTokens, - outputTokens, - cacheReadTokens, - }) - - expect(cost).toBeCloseTo(expectedTotalCost, 6) - }) - - it("should return model info with tiers property", () => { - const modelInfo = handler.getModel() - - // Verify the model info has tiers defined - expect(modelInfo.info.tiers).toBeDefined() - expect(modelInfo.info.tiers).toHaveLength(2) - - // Verify tier 1 (up to 200K) - expect(modelInfo.info.tiers![0].contextWindow).toBe(200_000) - expect(modelInfo.info.tiers![0].inputPrice).toBe(1.25) - expect(modelInfo.info.tiers![0].outputPrice).toBe(10) - expect(modelInfo.info.tiers![0].cacheReadsPrice).toBe(0.31) - - // Verify tier 2 (over 200K) - expect(modelInfo.info.tiers![1].contextWindow).toBe(Infinity) - expect(modelInfo.info.tiers![1].inputPrice).toBe(2.5) - expect(modelInfo.info.tiers![1].outputPrice).toBe(15) - expect(modelInfo.info.tiers![1].cacheReadsPrice).toBe(0.625) - }) - }) - - describe("models without tiered pricing", () => { - it("should use flat pricing for models without tiers", () => { - const handlerFlat = new VertexHandler({ - apiModelId: "gemini-2.5-flash", - vertexProjectId: "test-project", - vertexRegion: "us-central1", - }) - - const modelInfo = handlerFlat.getModel() - const inputTokens = 100_000 - const outputTokens = 50_000 - - // gemini-2.5-flash has flat pricing: input $0.3/M, output $2.5/M - const expectedInputCost = (inputTokens / 1_000_000) * 0.3 - const expectedOutputCost = (outputTokens / 1_000_000) * 2.5 - const expectedTotalCost = expectedInputCost + expectedOutputCost - - const cost = handlerFlat.calculateCost({ - info: modelInfo.info, - inputTokens, - outputTokens, - }) - - expect(cost).toBeCloseTo(expectedTotalCost, 6) - expect(modelInfo.info.tiers).toBeUndefined() - }) - }) -})