From d53ece6b9d8da282c76089d52ee202e2de608214 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Tue, 16 Sep 2025 06:33:58 +0000
Subject: [PATCH 1/2] fix: apply tiered pricing for Gemini models via Vertex AI

- Modified calculateCost method to handle models where cacheReadsPrice is only defined in tiers
- Added comprehensive tests for Vertex AI tiered pricing calculation
- Fixes issue where local cost calculation always showed highest tier rates

Fixes #8017
---
 .../__tests__/vertex-tiered-pricing.spec.ts   | 154 ++++++++++++++++++
 src/api/providers/gemini.ts                   |  15 +-
 2 files changed, 165 insertions(+), 4 deletions(-)
 create mode 100644 src/api/providers/__tests__/vertex-tiered-pricing.spec.ts

diff --git a/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts b/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts
new file mode 100644
index 00000000000..827b6c67350
--- /dev/null
+++ b/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts
@@ -0,0 +1,154 @@
+// npx vitest run src/api/providers/__tests__/vertex-tiered-pricing.spec.ts
+
+import { type ModelInfo, vertexModels } from "@roo-code/types"
+import { VertexHandler } from "../vertex"
+
+describe("VertexHandler Tiered Pricing", () => {
+	let handler: VertexHandler
+
+	beforeEach(() => {
+		handler = new VertexHandler({
+			apiModelId: "gemini-2.5-pro",
+			vertexProjectId: "test-project",
+			vertexRegion: "us-central1",
+		})
+	})
+
+	describe("calculateCost with tiered pricing", () => {
+		it("should apply lower tier pricing for tokens under 200K", () => {
+			const modelInfo = handler.getModel()
+			const inputTokens = 100_000 // Under 200K threshold
+			const outputTokens = 50_000
+
+			// According to the tiers in vertex.ts for gemini-2.5-pro:
+			// First tier (up to 200K): input $1.25/M, output $10/M
+			const expectedInputCost = (inputTokens / 1_000_000) * 1.25
+			const expectedOutputCost = (outputTokens / 1_000_000) * 10
+			const expectedTotalCost = expectedInputCost + expectedOutputCost
+
+			const cost = handler.calculateCost({
+				info: modelInfo.info,
+				inputTokens,
+				outputTokens,
+			})
+
+			expect(cost).toBeCloseTo(expectedTotalCost, 6)
+			// Verify it's using tier 1 pricing, not the default higher pricing
+			expect(cost).toBeLessThan((inputTokens / 1_000_000) * 2.5 + (outputTokens / 1_000_000) * 15)
+		})
+
+		it("should apply higher tier pricing for tokens over 200K", () => {
+			const modelInfo = handler.getModel()
+			const inputTokens = 300_000 // Over 200K threshold
+			const outputTokens = 100_000
+
+			// According to the tiers in vertex.ts for gemini-2.5-pro:
+			// Second tier (over 200K): input $2.5/M, output $15/M
+			const expectedInputCost = (inputTokens / 1_000_000) * 2.5
+			const expectedOutputCost = (outputTokens / 1_000_000) * 15
+			const expectedTotalCost = expectedInputCost + expectedOutputCost
+
+			const cost = handler.calculateCost({
+				info: modelInfo.info,
+				inputTokens,
+				outputTokens,
+			})
+
+			expect(cost).toBeCloseTo(expectedTotalCost, 6)
+		})
+
+		it("should apply cache read pricing based on tier", () => {
+			const modelInfo = handler.getModel()
+			const inputTokens = 150_000 // Under 200K - should use tier 1
+			const outputTokens = 50_000
+			const cacheReadTokens = 100_000
+
+			// First tier cache reads: $0.31/M
+			const uncachedInputTokens = inputTokens - cacheReadTokens
+			const expectedInputCost = (uncachedInputTokens / 1_000_000) * 1.25
+			const expectedOutputCost = (outputTokens / 1_000_000) * 10
+			const expectedCacheReadCost = (cacheReadTokens / 1_000_000) * 0.31
+			const expectedTotalCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost
+
+			const cost = handler.calculateCost({
+				info: modelInfo.info,
+				inputTokens,
+				outputTokens,
+				cacheReadTokens,
+			})
+
+			expect(cost).toBeCloseTo(expectedTotalCost, 6)
+		})
+
+		it("should apply cache read pricing for higher tier", () => {
+			const modelInfo = handler.getModel()
+			const inputTokens = 400_000 // Over 200K - should use tier 2
+			const outputTokens = 100_000
+			const cacheReadTokens = 200_000
+
+			// Second tier cache reads: $0.625/M
+			const uncachedInputTokens = inputTokens - cacheReadTokens
+			const expectedInputCost = (uncachedInputTokens / 1_000_000) * 2.5
+			const expectedOutputCost = (outputTokens / 1_000_000) * 15
+			const expectedCacheReadCost = (cacheReadTokens / 1_000_000) * 0.625
+			const expectedTotalCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost
+
+			const cost = handler.calculateCost({
+				info: modelInfo.info,
+				inputTokens,
+				outputTokens,
+				cacheReadTokens,
+			})
+
+			expect(cost).toBeCloseTo(expectedTotalCost, 6)
+		})
+
+		it("should return model info with tiers property", () => {
+			const modelInfo = handler.getModel()
+
+			// Verify the model info has tiers defined
+			expect(modelInfo.info.tiers).toBeDefined()
+			expect(modelInfo.info.tiers).toHaveLength(2)
+
+			// Verify tier 1 (up to 200K)
+			expect(modelInfo.info.tiers![0].contextWindow).toBe(200_000)
+			expect(modelInfo.info.tiers![0].inputPrice).toBe(1.25)
+			expect(modelInfo.info.tiers![0].outputPrice).toBe(10)
+			expect(modelInfo.info.tiers![0].cacheReadsPrice).toBe(0.31)
+
+			// Verify tier 2 (over 200K)
+			expect(modelInfo.info.tiers![1].contextWindow).toBe(Infinity)
+			expect(modelInfo.info.tiers![1].inputPrice).toBe(2.5)
+			expect(modelInfo.info.tiers![1].outputPrice).toBe(15)
+			expect(modelInfo.info.tiers![1].cacheReadsPrice).toBe(0.625)
+		})
+	})
+
+	describe("models without tiered pricing", () => {
+		it("should use flat pricing for models without tiers", () => {
+			const handlerFlat = new VertexHandler({
+				apiModelId: "gemini-2.5-flash",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+			})
+
+			const modelInfo = handlerFlat.getModel()
+			const inputTokens = 100_000
+			const outputTokens = 50_000
+
+			// gemini-2.5-flash has flat pricing: input $0.3/M, output $2.5/M
+			const expectedInputCost = (inputTokens / 1_000_000) * 0.3
+			const expectedOutputCost = (outputTokens / 1_000_000) * 2.5
+			const expectedTotalCost = expectedInputCost + expectedOutputCost
+
+			const cost = handlerFlat.calculateCost({
+				info: modelInfo.info,
+				inputTokens,
+				outputTokens,
+			})
+
+			expect(cost).toBeCloseTo(expectedTotalCost, 6)
+			expect(modelInfo.info.tiers).toBeUndefined()
+		})
+	})
+})
diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts
index 775d763a05f..573adda879e 100644
--- a/src/api/providers/gemini.ts
+++ b/src/api/providers/gemini.ts
@@ -286,10 +286,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 		outputTokens: number
 		cacheReadTokens?: number
 	}) {
-		if (!info.inputPrice || !info.outputPrice || !info.cacheReadsPrice) {
-			return undefined
-		}
-
+		// For models with tiered pricing, prices might only be defined in tiers
 		let inputPrice = info.inputPrice
 		let outputPrice = info.outputPrice
 		let cacheReadsPrice = info.cacheReadsPrice
@@ -306,6 +303,16 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 		}
 
+		// Check if we have the required prices after considering tiers
+		if (!inputPrice || !outputPrice) {
+			return undefined
+		}
+
+		// cacheReadsPrice is optional - if not defined, treat as 0
+		if (!cacheReadsPrice) {
+			cacheReadsPrice = 0
+		}
+
 		// Subtract the cached input tokens from the total input tokens.
 		const uncachedInputTokens = inputTokens - cacheReadTokens
 

From d99d7305d2882ab56c4092274660baa69a0a63fe Mon Sep 17 00:00:00 2001
From: Daniel <57051444+daniel-lxs@users.noreply.github.com>
Date: Wed, 17 Sep 2025 18:17:36 -0500
Subject: [PATCH 2/2] Delete
 src/api/providers/__tests__/vertex-tiered-pricing.spec.ts

---
 .../__tests__/vertex-tiered-pricing.spec.ts   | 154 ------------------
 1 file changed, 154 deletions(-)
 delete mode 100644 src/api/providers/__tests__/vertex-tiered-pricing.spec.ts

diff --git a/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts b/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts
deleted file mode 100644
index 827b6c67350..00000000000
--- a/src/api/providers/__tests__/vertex-tiered-pricing.spec.ts
+++ /dev/null
@@ -1,154 +0,0 @@
-// npx vitest run src/api/providers/__tests__/vertex-tiered-pricing.spec.ts
-
-import { type ModelInfo, vertexModels } from "@roo-code/types"
-import { VertexHandler } from "../vertex"
-
-describe("VertexHandler Tiered Pricing", () => {
-	let handler: VertexHandler
-
-	beforeEach(() => {
-		handler = new VertexHandler({
-			apiModelId: "gemini-2.5-pro",
-			vertexProjectId: "test-project",
-			vertexRegion: "us-central1",
-		})
-	})
-
-	describe("calculateCost with tiered pricing", () => {
-		it("should apply lower tier pricing for tokens under 200K", () => {
-			const modelInfo = handler.getModel()
-			const inputTokens = 100_000 // Under 200K threshold
-			const outputTokens = 50_000
-
-			// According to the tiers in vertex.ts for gemini-2.5-pro:
-			// First tier (up to 200K): input $1.25/M, output $10/M
-			const expectedInputCost = (inputTokens / 1_000_000) * 1.25
-			const expectedOutputCost = (outputTokens / 1_000_000) * 10
-			const expectedTotalCost = expectedInputCost + expectedOutputCost
-
-			const cost = handler.calculateCost({
-				info: modelInfo.info,
-				inputTokens,
-				outputTokens,
-			})
-
-			expect(cost).toBeCloseTo(expectedTotalCost, 6)
-			// Verify it's using tier 1 pricing, not the default higher pricing
-			expect(cost).toBeLessThan((inputTokens / 1_000_000) * 2.5 + (outputTokens / 1_000_000) * 15)
-		})
-
-		it("should apply higher tier pricing for tokens over 200K", () => {
-			const modelInfo = handler.getModel()
-			const inputTokens = 300_000 // Over 200K threshold
-			const outputTokens = 100_000
-
-			// According to the tiers in vertex.ts for gemini-2.5-pro:
-			// Second tier (over 200K): input $2.5/M, output $15/M
-			const expectedInputCost = (inputTokens / 1_000_000) * 2.5
-			const expectedOutputCost = (outputTokens / 1_000_000) * 15
-			const expectedTotalCost = expectedInputCost + expectedOutputCost
-
-			const cost = handler.calculateCost({
-				info: modelInfo.info,
-				inputTokens,
-				outputTokens,
-			})
-
-			expect(cost).toBeCloseTo(expectedTotalCost, 6)
-		})
-
-		it("should apply cache read pricing based on tier", () => {
-			const modelInfo = handler.getModel()
-			const inputTokens = 150_000 // Under 200K - should use tier 1
-			const outputTokens = 50_000
-			const cacheReadTokens = 100_000
-
-			// First tier cache reads: $0.31/M
-			const uncachedInputTokens = inputTokens - cacheReadTokens
-			const expectedInputCost = (uncachedInputTokens / 1_000_000) * 1.25
-			const expectedOutputCost = (outputTokens / 1_000_000) * 10
-			const expectedCacheReadCost = (cacheReadTokens / 1_000_000) * 0.31
-			const expectedTotalCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost
-
-			const cost = handler.calculateCost({
-				info: modelInfo.info,
-				inputTokens,
-				outputTokens,
-				cacheReadTokens,
-			})
-
-			expect(cost).toBeCloseTo(expectedTotalCost, 6)
-		})
-
-		it("should apply cache read pricing for higher tier", () => {
-			const modelInfo = handler.getModel()
-			const inputTokens = 400_000 // Over 200K - should use tier 2
-			const outputTokens = 100_000
-			const cacheReadTokens = 200_000
-
-			// Second tier cache reads: $0.625/M
-			const uncachedInputTokens = inputTokens - cacheReadTokens
-			const expectedInputCost = (uncachedInputTokens / 1_000_000) * 2.5
-			const expectedOutputCost = (outputTokens / 1_000_000) * 15
-			const expectedCacheReadCost = (cacheReadTokens / 1_000_000) * 0.625
-			const expectedTotalCost = expectedInputCost + expectedOutputCost + expectedCacheReadCost
-
-			const cost = handler.calculateCost({
-				info: modelInfo.info,
-				inputTokens,
-				outputTokens,
-				cacheReadTokens,
-			})
-
-			expect(cost).toBeCloseTo(expectedTotalCost, 6)
-		})
-
-		it("should return model info with tiers property", () => {
-			const modelInfo = handler.getModel()
-
-			// Verify the model info has tiers defined
-			expect(modelInfo.info.tiers).toBeDefined()
-			expect(modelInfo.info.tiers).toHaveLength(2)
-
-			// Verify tier 1 (up to 200K)
-			expect(modelInfo.info.tiers![0].contextWindow).toBe(200_000)
-			expect(modelInfo.info.tiers![0].inputPrice).toBe(1.25)
-			expect(modelInfo.info.tiers![0].outputPrice).toBe(10)
-			expect(modelInfo.info.tiers![0].cacheReadsPrice).toBe(0.31)
-
-			// Verify tier 2 (over 200K)
-			expect(modelInfo.info.tiers![1].contextWindow).toBe(Infinity)
-			expect(modelInfo.info.tiers![1].inputPrice).toBe(2.5)
-			expect(modelInfo.info.tiers![1].outputPrice).toBe(15)
-			expect(modelInfo.info.tiers![1].cacheReadsPrice).toBe(0.625)
-		})
-	})
-
-	describe("models without tiered pricing", () => {
-		it("should use flat pricing for models without tiers", () => {
-			const handlerFlat = new VertexHandler({
-				apiModelId: "gemini-2.5-flash",
-				vertexProjectId: "test-project",
-				vertexRegion: "us-central1",
-			})
-
-			const modelInfo = handlerFlat.getModel()
-			const inputTokens = 100_000
-			const outputTokens = 50_000
-
-			// gemini-2.5-flash has flat pricing: input $0.3/M, output $2.5/M
-			const expectedInputCost = (inputTokens / 1_000_000) * 0.3
-			const expectedOutputCost = (outputTokens / 1_000_000) * 2.5
-			const expectedTotalCost = expectedInputCost + expectedOutputCost
-
-			const cost = handlerFlat.calculateCost({
-				info: modelInfo.info,
-				inputTokens,
-				outputTokens,
-			})
-
-			expect(cost).toBeCloseTo(expectedTotalCost, 6)
-			expect(modelInfo.info.tiers).toBeUndefined()
-		})
-	})
-})