diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts index aae428d90c0..5eeaef1097e 100644 --- a/packages/types/src/providers/gemini.ts +++ b/packages/types/src/providers/gemini.ts @@ -3,138 +3,12 @@ import type { ModelInfo } from "../model.js" // https://ai.google.dev/gemini-api/docs/models/gemini export type GeminiModelId = keyof typeof geminiModels -export const geminiDefaultModelId: GeminiModelId = "gemini-2.0-flash-001" +export const geminiDefaultModelId: GeminiModelId = "gemini-2.5-pro" export const geminiModels = { - // Latest models (pointing to the most recent stable versions) - "gemini-flash-latest": { - maxTokens: 65_536, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.3, - outputPrice: 2.5, - cacheReadsPrice: 0.075, - cacheWritesPrice: 1.0, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - }, - "gemini-flash-lite-latest": { - maxTokens: 65_536, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.1, - outputPrice: 0.4, - cacheReadsPrice: 0.025, - cacheWritesPrice: 1.0, - supportsReasoningBudget: true, - maxThinkingTokens: 24_576, - }, - - // 2.5 Flash models (09-2025 versions - most recent) - "gemini-2.5-flash-preview-09-2025": { - maxTokens: 65_536, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.3, - outputPrice: 2.5, - cacheReadsPrice: 0.075, - cacheWritesPrice: 1.0, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - }, - "gemini-2.5-flash-lite-preview-09-2025": { - maxTokens: 65_536, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.1, - outputPrice: 0.4, - cacheReadsPrice: 0.025, - cacheWritesPrice: 1.0, - supportsReasoningBudget: true, - maxThinkingTokens: 24_576, - }, - - // 2.5 Flash models (06-17 version) - "gemini-2.5-flash-lite-preview-06-17": { - maxTokens: 64_000, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.1, - outputPrice: 0.4, - cacheReadsPrice: 0.025, - cacheWritesPrice: 1.0, - supportsReasoningBudget: true, - maxThinkingTokens: 24_576, - }, - - // 2.5 Flash models (05-20 versions) - "gemini-2.5-flash-preview-05-20:thinking": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, - outputPrice: 3.5, - cacheReadsPrice: 0.0375, - cacheWritesPrice: 1.0, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - requiredReasoningBudget: true, - }, - "gemini-2.5-flash-preview-05-20": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.15, - outputPrice: 0.6, - cacheReadsPrice: 0.0375, - cacheWritesPrice: 1.0, - }, - - // 2.5 Flash models (04-17 versions) - "gemini-2.5-flash-preview-04-17:thinking": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 3.5, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - requiredReasoningBudget: true, - }, - "gemini-2.5-flash-preview-04-17": { - maxTokens: 65_535, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0.15, - outputPrice: 0.6, - }, - - // 2.5 Flash stable - "gemini-2.5-flash": { - maxTokens: 64_000, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: true, - inputPrice: 0.3, - outputPrice: 2.5, - cacheReadsPrice: 0.075, - cacheWritesPrice: 1.0, - maxThinkingTokens: 24_576, - supportsReasoningBudget: true, - }, - // 2.5 Pro models - "gemini-2.5-pro-preview-06-05": { - maxTokens: 65_535, + "gemini-2.5-pro": { + maxTokens: 64_000, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, @@ -144,6 +18,7 @@ export const geminiModels = { cacheWritesPrice: 4.5, maxThinkingTokens: 32_768, supportsReasoningBudget: true, + requiredReasoningBudget: true, tiers: [ { contextWindow: 200_000, @@ -159,7 +34,7 @@ export const geminiModels = { }, ], }, - "gemini-2.5-pro-preview-05-06": { + "gemini-2.5-pro-preview-06-05": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, @@ -168,6 +43,8 @@ export const geminiModels = { outputPrice: 15, cacheReadsPrice: 0.625, cacheWritesPrice: 4.5, + maxThinkingTokens: 32_768, + supportsReasoningBudget: true, tiers: [ { contextWindow: 200_000, @@ -183,7 +60,7 @@ export const geminiModels = { }, ], }, - "gemini-2.5-pro-preview-03-25": { + "gemini-2.5-pro-preview-05-06": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, @@ -192,8 +69,6 @@ export const geminiModels = { outputPrice: 15, cacheReadsPrice: 0.625, cacheWritesPrice: 4.5, - maxThinkingTokens: 32_768, - supportsReasoningBudget: true, tiers: [ { contextWindow: 200_000, @@ -209,18 +84,10 @@ export const geminiModels = { }, ], }, - "gemini-2.5-pro-exp-03-25": { + "gemini-2.5-pro-preview-03-25": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.5-pro": { - maxTokens: 64_000, - contextWindow: 1_048_576, - supportsImages: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, @@ -228,7 +95,6 @@ export const geminiModels = { cacheWritesPrice: 4.5, maxThinkingTokens: 32_768, supportsReasoningBudget: true, - requiredReasoningBudget: true, tiers: [ { contextWindow: 200_000, @@ -245,41 +111,47 @@ export const geminiModels = { ], }, - // 2.0 Flash models - "gemini-2.0-flash-lite-preview-02-05": { - maxTokens: 8192, + // 2.5 Flash models + "gemini-flash-latest": { + maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, + supportsPromptCache: true, + inputPrice: 0.3, + outputPrice: 2.5, + cacheReadsPrice: 0.075, + cacheWritesPrice: 1.0, + maxThinkingTokens: 24_576, + supportsReasoningBudget: true, }, - "gemini-2.0-flash-thinking-exp-01-21": { + "gemini-2.5-flash-preview-09-2025": { maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-2.0-flash-thinking-exp-1219": { - maxTokens: 8192, - contextWindow: 32_767, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, + supportsPromptCache: true, + inputPrice: 0.3, + outputPrice: 2.5, + cacheReadsPrice: 0.075, + cacheWritesPrice: 1.0, + maxThinkingTokens: 24_576, + supportsReasoningBudget: true, }, - "gemini-2.0-flash-exp": { - maxTokens: 8192, + "gemini-2.5-flash": { + maxTokens: 64_000, contextWindow: 1_048_576, supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, + supportsPromptCache: true, + inputPrice: 0.3, + outputPrice: 2.5, + cacheReadsPrice: 0.075, + cacheWritesPrice: 1.0, + maxThinkingTokens: 24_576, + supportsReasoningBudget: true, }, - "gemini-2.0-flash-001": { - maxTokens: 8192, + + // 2.5 Flash Lite models + "gemini-flash-lite-latest": { + maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, @@ -287,85 +159,19 @@ export const geminiModels = { outputPrice: 0.4, cacheReadsPrice: 0.025, cacheWritesPrice: 1.0, + supportsReasoningBudget: true, + maxThinkingTokens: 24_576, }, - - // 2.0 Pro models - "gemini-2.0-pro-exp-02-05": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - - // 1.5 Flash models - "gemini-1.5-flash-002": { - maxTokens: 8192, + "gemini-2.5-flash-lite-preview-09-2025": { + maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, - inputPrice: 0.15, // This is the pricing for prompts above 128k tokens. - outputPrice: 0.6, - cacheReadsPrice: 0.0375, + inputPrice: 0.1, + outputPrice: 0.4, + cacheReadsPrice: 0.025, cacheWritesPrice: 1.0, - tiers: [ - { - contextWindow: 128_000, - inputPrice: 0.075, - outputPrice: 0.3, - cacheReadsPrice: 0.01875, - }, - { - contextWindow: Infinity, - inputPrice: 0.15, - outputPrice: 0.6, - cacheReadsPrice: 0.0375, - }, - ], - }, - "gemini-1.5-flash-exp-0827": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-flash-8b-exp-0827": { - maxTokens: 8192, - contextWindow: 1_048_576, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - - // 1.5 Pro models - "gemini-1.5-pro-002": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - "gemini-1.5-pro-exp-0827": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, - }, - - // Experimental models - "gemini-exp-1206": { - maxTokens: 8192, - contextWindow: 2_097_152, - supportsImages: true, - supportsPromptCache: false, - inputPrice: 0, - outputPrice: 0, + supportsReasoningBudget: true, + maxThinkingTokens: 24_576, }, } as const satisfies Record diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e803a1a72f7..5f82147ecb8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -627,8 +627,8 @@ importers: specifier: ^3.922.0 version: 3.922.0 '@google/genai': - specifier: ^1.0.0 - version: 1.3.0(@modelcontextprotocol/sdk@1.12.0) + specifier: ^1.29.1 + version: 1.29.1(@modelcontextprotocol/sdk@1.12.0) '@lmstudio/sdk': specifier: ^1.1.1 version: 1.2.0 @@ -1914,11 +1914,14 @@ packages: '@floating-ui/utils@0.2.9': resolution: {integrity: sha512-MDWhGtE+eHw5JW7lq4qhc5yRLS11ERl1c7Z6Xd0a58DozHES6EnNNwUWbMiG4J9Cgj053Bhk8zvlhFYKVhULwg==} - '@google/genai@1.3.0': - resolution: {integrity: sha512-rrMzAELX4P902FUpuWy/W3NcQ7L3q/qtCzfCmGVqIce8yWpptTF9hkKsw744tvZpwqhuzD0URibcJA95wd8QFA==} + '@google/genai@1.29.1': + resolution: {integrity: sha512-Buywpq0A6xf9cOdhiWCi5KUiDBbZkjCH5xbl+xxNQRItoYQgd31p0OKyn5cUnT0YNzC/pAmszqXoOc7kncqfFQ==} engines: {node: '>=20.0.0'} peerDependencies: - '@modelcontextprotocol/sdk': ^1.11.0 + '@modelcontextprotocol/sdk': ^1.20.1 + peerDependenciesMeta: + '@modelcontextprotocol/sdk': + optional: true '@hookform/resolvers@5.1.1': resolution: {integrity: sha512-J/NVING3LMAEvexJkyTLjruSm7aOFx7QX21pzkiJfMoNG0wl5aFEjLTl7ay7IQb9EWY6AkrBy7tHL2Alijpdcg==} @@ -6208,10 +6211,18 @@ packages: resolution: {integrity: sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==} engines: {node: '>=14'} + gaxios@7.1.3: + resolution: {integrity: sha512-YGGyuEdVIjqxkxVH1pUTMY/XtmmsApXrCVv5EU25iX6inEPbV+VakJfLealkBtJN69AQmh1eGOdCl9Sm1UP6XQ==} + engines: {node: '>=18'} + gcp-metadata@6.1.1: resolution: {integrity: sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==} engines: {node: '>=14'} + gcp-metadata@8.1.2: + resolution: {integrity: sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==} + engines: {node: '>=18'} + gel@2.1.0: resolution: {integrity: sha512-HCeRqInCt6BjbMmeghJ6BKeYwOj7WJT5Db6IWWAA3IMUUa7or7zJfTUEkUWCxiOtoXnwnm96sFK9Fr47Yh2hOA==} engines: {node: '>= 18.0.0'} @@ -6328,6 +6339,10 @@ packages: resolution: {integrity: sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==} engines: {node: '>=10'} + google-auth-library@10.5.0: + resolution: {integrity: sha512-7ABviyMOlX5hIVD60YOfHw4/CxOfBhyduaYB+wbFWCWoni4N7SLcV46hrVRktuBbZjFC9ONyqamZITN7q3n32w==} + engines: {node: '>=18'} + google-auth-library@9.15.1: resolution: {integrity: sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==} engines: {node: '>=14'} @@ -6336,6 +6351,10 @@ packages: resolution: {integrity: sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==} engines: {node: '>=14'} + google-logging-utils@1.1.3: + resolution: {integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==} + engines: {node: '>=14'} + gopd@1.2.0: resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} engines: {node: '>= 0.4'} @@ -6354,6 +6373,10 @@ packages: resolution: {integrity: sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==} engines: {node: '>=14.0.0'} + gtoken@8.0.0: + resolution: {integrity: sha512-+CqsMbHPiSTdtSO14O51eMNlrp9N79gmeqmXeouJOhfucAedHw9noVe/n5uJk3tbKE6a+6ZCQg3RPhVhHByAIw==} + engines: {node: '>=18'} + hachure-fill@0.5.2: resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==} @@ -8750,6 +8773,10 @@ packages: deprecated: Rimraf versions prior to v4 are no longer supported hasBin: true + rimraf@5.0.10: + resolution: {integrity: sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==} + hasBin: true + rimraf@6.0.1: resolution: {integrity: sha512-9dkvaxAsk/xNXSJzMgFqqMCuFgt2+KsOFek3TMLfo8NCPfWpBmqwyNn5Y+NX56QUYfCtsyhF3ayiboEoUmJk/A==} engines: {node: 20 || >=22} @@ -11470,16 +11497,14 @@ snapshots: '@floating-ui/utils@0.2.9': {} - '@google/genai@1.3.0(@modelcontextprotocol/sdk@1.12.0)': + '@google/genai@1.29.1(@modelcontextprotocol/sdk@1.12.0)': dependencies: + google-auth-library: 10.5.0 + ws: 8.18.3 + optionalDependencies: '@modelcontextprotocol/sdk': 1.12.0 - google-auth-library: 9.15.1 - ws: 8.18.2 - zod: 3.25.76 - zod-to-json-schema: 3.24.5(zod@3.25.76) transitivePeerDependencies: - bufferutil - - encoding - supports-color - utf-8-validate @@ -14041,7 +14066,7 @@ snapshots: sirv: 3.0.1 tinyglobby: 0.2.14 tinyrainbow: 2.0.0 - vitest: 3.2.4(@types/debug@4.1.12)(@types/node@20.17.50)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) + vitest: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.1)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) '@vitest/utils@3.2.4': dependencies: @@ -15110,8 +15135,7 @@ snapshots: d3: 7.9.0 lodash-es: 4.17.21 - data-uri-to-buffer@4.0.1: - optional: true + data-uri-to-buffer@4.0.1: {} data-uri-to-buffer@6.0.2: {} @@ -15991,7 +16015,6 @@ snapshots: dependencies: node-domexception: 1.0.0 web-streams-polyfill: 3.3.3 - optional: true fflate@0.4.8: {} @@ -16085,7 +16108,6 @@ snapshots: formdata-polyfill@4.0.10: dependencies: fetch-blob: 3.2.0 - optional: true forwarded@0.2.0: {} @@ -16170,6 +16192,15 @@ snapshots: - encoding - supports-color + gaxios@7.1.3: + dependencies: + extend: 3.0.2 + https-proxy-agent: 7.0.6 + node-fetch: 3.3.2 + rimraf: 5.0.10 + transitivePeerDependencies: + - supports-color + gcp-metadata@6.1.1: dependencies: gaxios: 6.7.1 @@ -16179,6 +16210,14 @@ snapshots: - encoding - supports-color + gcp-metadata@8.1.2: + dependencies: + gaxios: 7.1.3 + google-logging-utils: 1.1.3 + json-bigint: 1.0.0 + transitivePeerDependencies: + - supports-color + gel@2.1.0: dependencies: '@petamoriken/float16': 3.9.3 @@ -16316,6 +16355,18 @@ snapshots: merge2: 1.4.1 slash: 3.0.0 + google-auth-library@10.5.0: + dependencies: + base64-js: 1.5.1 + ecdsa-sig-formatter: 1.0.11 + gaxios: 7.1.3 + gcp-metadata: 8.1.2 + google-logging-utils: 1.1.3 + gtoken: 8.0.0 + jws: 4.0.0 + transitivePeerDependencies: + - supports-color + google-auth-library@9.15.1: dependencies: base64-js: 1.5.1 @@ -16330,6 +16381,8 @@ snapshots: google-logging-utils@0.0.2: {} + google-logging-utils@1.1.3: {} + gopd@1.2.0: {} graceful-fs@4.2.11: {} @@ -16351,6 +16404,13 @@ snapshots: - encoding - supports-color + gtoken@8.0.0: + dependencies: + gaxios: 7.1.3 + jws: 4.0.0 + transitivePeerDependencies: + - supports-color + hachure-fill@0.5.2: {} harmony-reflect@1.6.2: {} @@ -18141,7 +18201,6 @@ snapshots: data-uri-to-buffer: 4.0.1 fetch-blob: 3.2.0 formdata-polyfill: 4.0.10 - optional: true node-ipc@12.0.0: dependencies: @@ -19211,6 +19270,10 @@ snapshots: dependencies: glob: 7.2.3 + rimraf@5.0.10: + dependencies: + glob: 10.4.5 + rimraf@6.0.1: dependencies: glob: 11.0.3 @@ -20788,8 +20851,7 @@ snapshots: web-namespaces@2.0.1: {} - web-streams-polyfill@3.3.3: - optional: true + web-streams-polyfill@3.3.3: {} web-streams-polyfill@4.0.0-beta.3: {} @@ -20933,8 +20995,7 @@ snapshots: ws@8.18.2: {} - ws@8.18.3: - optional: true + ws@8.18.3: {} xml-name-validator@5.0.0: {} diff --git a/src/api/providers/__tests__/gemini.spec.ts b/src/api/providers/__tests__/gemini.spec.ts index 812c1ae1a64..5aa968f2a8b 100644 --- a/src/api/providers/__tests__/gemini.spec.ts +++ b/src/api/providers/__tests__/gemini.spec.ts @@ -7,7 +7,7 @@ import { type ModelInfo, geminiDefaultModelId } from "@roo-code/types" import { t } from "i18next" import { GeminiHandler } from "../gemini" -const GEMINI_20_FLASH_THINKING_NAME = "gemini-2.0-flash-thinking-exp-1219" +const GEMINI_MODEL_NAME = geminiDefaultModelId describe("GeminiHandler", () => { let handler: GeminiHandler @@ -20,7 +20,7 @@ describe("GeminiHandler", () => { handler = new GeminiHandler({ apiKey: "test-key", - apiModelId: GEMINI_20_FLASH_THINKING_NAME, + apiModelId: GEMINI_MODEL_NAME, geminiApiKey: "test-key", }) @@ -37,7 +37,7 @@ describe("GeminiHandler", () => { describe("constructor", () => { it("should initialize with provided config", () => { expect(handler["options"].geminiApiKey).toBe("test-key") - expect(handler["options"].apiModelId).toBe(GEMINI_20_FLASH_THINKING_NAME) + expect(handler["options"].apiModelId).toBe(GEMINI_MODEL_NAME) }) }) @@ -76,14 +76,14 @@ describe("GeminiHandler", () => { expect(chunks.length).toBe(3) expect(chunks[0]).toEqual({ type: "text", text: "Hello" }) expect(chunks[1]).toEqual({ type: "text", text: " world!" }) - expect(chunks[2]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 5 }) + expect(chunks[2]).toMatchObject({ type: "usage", inputTokens: 10, outputTokens: 5 }) // Verify the call to generateContentStream expect(handler["client"].models.generateContentStream).toHaveBeenCalledWith( expect.objectContaining({ - model: GEMINI_20_FLASH_THINKING_NAME, + model: GEMINI_MODEL_NAME, config: expect.objectContaining({ - temperature: 0, + temperature: 1, systemInstruction: systemPrompt, }), }), @@ -116,11 +116,11 @@ describe("GeminiHandler", () => { // Verify the call to generateContent expect(handler["client"].models.generateContent).toHaveBeenCalledWith({ - model: GEMINI_20_FLASH_THINKING_NAME, + model: GEMINI_MODEL_NAME, contents: [{ role: "user", parts: [{ text: "Test prompt" }] }], config: { httpOptions: undefined, - temperature: 0, + temperature: 1, }, }) }) @@ -148,10 +148,8 @@ describe("GeminiHandler", () => { describe("getModel", () => { it("should return correct model info", () => { const modelInfo = handler.getModel() - expect(modelInfo.id).toBe(GEMINI_20_FLASH_THINKING_NAME) + expect(modelInfo.id).toBe(GEMINI_MODEL_NAME) expect(modelInfo.info).toBeDefined() - expect(modelInfo.info.maxTokens).toBe(8192) - expect(modelInfo.info.contextWindow).toBe(32_767) }) it("should return default model if invalid model specified", () => { diff --git a/src/api/providers/__tests__/vertex.spec.ts b/src/api/providers/__tests__/vertex.spec.ts index d147e79ba8c..1420b05c7a0 100644 --- a/src/api/providers/__tests__/vertex.spec.ts +++ b/src/api/providers/__tests__/vertex.spec.ts @@ -95,7 +95,7 @@ describe("VertexHandler", () => { model: expect.any(String), contents: [{ role: "user", parts: [{ text: "Test prompt" }] }], config: expect.objectContaining({ - temperature: 0, + temperature: 1, }), }), ) diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index 573adda879e..bcf489e573a 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -29,6 +29,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl protected options: ApiHandlerOptions private client: GoogleGenAI + private lastThoughtSignature?: string + private lastResponseId?: string constructor({ isVertex, ...options }: GeminiHandlerOptions) { super() @@ -66,8 +68,33 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { const { id: model, info, reasoning: thinkingConfig, maxTokens } = this.getModel() + // Reset per-request metadata that we persist into apiConversationHistory. + this.lastThoughtSignature = undefined + this.lastResponseId = undefined + + // Only forward encrypted reasoning continuations (thoughtSignature) when we are + // using effort-based reasoning (thinkingLevel). Budget-only configs should NOT + // send thoughtSignature parts back to Gemini. + const includeThoughtSignatures = Boolean(thinkingConfig?.thinkingLevel) + + // The message list can include provider-specific meta entries such as + // `{ type: "reasoning", ... }` that are intended only for providers like + // openai-native. Gemini should never see those; they are not valid + // Anthropic.MessageParam values and will cause failures (e.g. missing + // `content` for the converter). Filter them out here. + type ReasoningMetaLike = { type?: string } + + const geminiMessages = messages.filter((message): message is Anthropic.Messages.MessageParam => { + const meta = message as ReasoningMetaLike + if (meta.type === "reasoning") { + return false + } + return true + }) - const contents = messages.map(convertAnthropicMessageToGemini) + const contents = geminiMessages.map((message) => + convertAnthropicMessageToGemini(message, { includeThoughtSignatures }), + ) const tools: GenerateContentConfig["tools"] = [] if (this.options.enableUrlContext) { @@ -78,12 +105,22 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl tools.push({ googleSearch: {} }) } + // Determine temperature respecting model capabilities and defaults: + // - If supportsTemperature is explicitly false, ignore user overrides + // and pin to the model's defaultTemperature (or omit if undefined). + // - Otherwise, allow the user setting to override, falling back to model default, + // then to 1 for Gemini provider default. + const supportsTemperature = info.supportsTemperature !== false + const temperatureConfig: number | undefined = supportsTemperature + ? (this.options.modelTemperature ?? info.defaultTemperature ?? 1) + : info.defaultTemperature + const config: GenerateContentConfig = { systemInstruction, httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined, thinkingConfig, maxOutputTokens: this.options.modelMaxTokens ?? maxTokens ?? undefined, - temperature: this.options.modelTemperature ?? 0, + temperature: temperatureConfig, ...(tools.length > 0 ? { tools } : {}), } @@ -94,8 +131,13 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined let pendingGroundingMetadata: GroundingMetadata | undefined + let finalResponse: { responseId?: string } | undefined for await (const chunk of result) { + // Track the final structured response (per SDK pattern: candidate.finishReason) + if (chunk.candidates && chunk.candidates[0]?.finishReason) { + finalResponse = chunk as { responseId?: string } + } // Process candidates and their parts to separate thoughts from content if (chunk.candidates && chunk.candidates.length > 0) { const candidate = chunk.candidates[0] @@ -105,7 +147,20 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl } if (candidate.content && candidate.content.parts) { - for (const part of candidate.content.parts) { + for (const part of candidate.content.parts as Array<{ + thought?: boolean + text?: string + thoughtSignature?: string + }>) { + // Capture thought signatures so they can be persisted into API history. + const thoughtSignature = part.thoughtSignature + // Only persist encrypted reasoning when an effort-based thinking level is set + // (i.e. thinkingConfig.thinkingLevel is present). Budget-based configs that only + // set thinkingBudget should NOT trigger encrypted continuation. + if (thinkingConfig?.thinkingLevel && thoughtSignature) { + this.lastThoughtSignature = thoughtSignature + } + if (part.thought) { // This is a thinking/reasoning part if (part.text) { @@ -131,6 +186,12 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl } } + if (finalResponse?.responseId) { + // Capture responseId so Task.addToApiConversationHistory can store it + // alongside the assistant message in api_history.json. + this.lastResponseId = finalResponse.responseId + } + if (pendingGroundingMetadata) { const sources = this.extractGroundingSources(pendingGroundingMetadata) if (sources.length > 0) { @@ -150,7 +211,13 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl outputTokens, cacheReadTokens, reasoningTokens, - totalCost: this.calculateCost({ info, inputTokens, outputTokens, cacheReadTokens }), + totalCost: this.calculateCost({ + info, + inputTokens, + outputTokens, + cacheReadTokens, + reasoningTokens, + }), } } } catch (error) { @@ -166,7 +233,14 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl const modelId = this.options.apiModelId let id = modelId && modelId in geminiModels ? (modelId as GeminiModelId) : geminiDefaultModelId let info: ModelInfo = geminiModels[id] - const params = getModelParams({ format: "gemini", modelId: id, model: info, settings: this.options }) + + const params = getModelParams({ + format: "gemini", + modelId: id, + model: info, + settings: this.options, + defaultTemperature: info.defaultTemperature ?? 1, + }) // The `:thinking` suffix indicates that the model is a "Hybrid" // reasoning model and that reasoning is required to be enabled. @@ -211,7 +285,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl async completePrompt(prompt: string): Promise { try { - const { id: model } = this.getModel() + const { id: model, info } = this.getModel() const tools: GenerateContentConfig["tools"] = [] if (this.options.enableUrlContext) { @@ -220,19 +294,27 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl if (this.options.enableGrounding) { tools.push({ googleSearch: {} }) } + + const supportsTemperature = info.supportsTemperature !== false + const temperatureConfig: number | undefined = supportsTemperature + ? (this.options.modelTemperature ?? info.defaultTemperature ?? 1) + : info.defaultTemperature + const promptConfig: GenerateContentConfig = { httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined, - temperature: this.options.modelTemperature ?? 0, + temperature: temperatureConfig, ...(tools.length > 0 ? { tools } : {}), } - const result = await this.client.models.generateContent({ + const request = { model, contents: [{ role: "user", parts: [{ text: prompt }] }], config: promptConfig, - }) + } + + const result = await this.client.models.generateContent(request) let text = result.text ?? "" @@ -258,10 +340,13 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl try { const { id: model } = this.getModel() - const response = await this.client.models.countTokens({ + const countTokensRequest = { model, - contents: convertAnthropicContentToGemini(content), - }) + // Token counting does not need encrypted continuation; always drop thoughtSignature. + contents: convertAnthropicContentToGemini(content, { includeThoughtSignatures: false }), + } + + const response = await this.client.models.countTokens(countTokensRequest) if (response.totalTokens === undefined) { console.warn("Gemini token counting returned undefined, using fallback") @@ -275,16 +360,26 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl } } + public getThoughtSignature(): string | undefined { + return this.lastThoughtSignature + } + + public getResponseId(): string | undefined { + return this.lastResponseId + } + public calculateCost({ info, inputTokens, outputTokens, cacheReadTokens = 0, + reasoningTokens = 0, }: { info: ModelInfo inputTokens: number outputTokens: number cacheReadTokens?: number + reasoningTokens?: number }) { // For models with tiered pricing, prices might only be defined in tiers let inputPrice = info.inputPrice @@ -316,23 +411,24 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl // Subtract the cached input tokens from the total input tokens. const uncachedInputTokens = inputTokens - cacheReadTokens + // Bill both completion and reasoning ("thoughts") tokens as output. + const billedOutputTokens = outputTokens + reasoningTokens + let cacheReadCost = cacheReadTokens > 0 ? cacheReadsPrice * (cacheReadTokens / 1_000_000) : 0 const inputTokensCost = inputPrice * (uncachedInputTokens / 1_000_000) - const outputTokensCost = outputPrice * (outputTokens / 1_000_000) + const outputTokensCost = outputPrice * (billedOutputTokens / 1_000_000) const totalCost = inputTokensCost + outputTokensCost + cacheReadCost const trace: Record = { input: { price: inputPrice, tokens: uncachedInputTokens, cost: inputTokensCost }, - output: { price: outputPrice, tokens: outputTokens, cost: outputTokensCost }, + output: { price: outputPrice, tokens: billedOutputTokens, cost: outputTokensCost }, } if (cacheReadTokens > 0) { trace.cacheRead = { price: cacheReadsPrice, tokens: cacheReadTokens, cost: cacheReadCost } } - // console.log(`[GeminiHandler] calculateCost -> ${totalCost}`, trace) - return totalCost } } diff --git a/src/api/transform/gemini-format.ts b/src/api/transform/gemini-format.ts index ee22cff32a4..58310e70a88 100644 --- a/src/api/transform/gemini-format.ts +++ b/src/api/transform/gemini-format.ts @@ -1,12 +1,44 @@ import { Anthropic } from "@anthropic-ai/sdk" import { Content, Part } from "@google/genai" -export function convertAnthropicContentToGemini(content: string | Anthropic.ContentBlockParam[]): Part[] { +type ThoughtSignatureContentBlock = { + type: "thoughtSignature" + thoughtSignature?: string +} + +type ExtendedContentBlockParam = Anthropic.ContentBlockParam | ThoughtSignatureContentBlock +type ExtendedAnthropicContent = string | ExtendedContentBlockParam[] + +function isThoughtSignatureContentBlock(block: ExtendedContentBlockParam): block is ThoughtSignatureContentBlock { + return block.type === "thoughtSignature" +} + +export function convertAnthropicContentToGemini( + content: ExtendedAnthropicContent, + options?: { includeThoughtSignatures?: boolean }, +): Part[] { + const includeThoughtSignatures = options?.includeThoughtSignatures ?? true + if (typeof content === "string") { return [{ text: content }] } return content.flatMap((block): Part | Part[] => { + // Handle thoughtSignature blocks first so that the main switch can continue + // to operate on the standard Anthropic content union. This preserves strong + // typing for known block types while still allowing provider-specific + // extensions when needed. + if (isThoughtSignatureContentBlock(block)) { + if (includeThoughtSignatures && typeof block.thoughtSignature === "string") { + // The Google GenAI SDK currently exposes thoughtSignature as an + // extension field on Part; model it structurally without widening + // the upstream type. + return { thoughtSignature: block.thoughtSignature } as Part + } + // Explicitly omit thoughtSignature when not including it. + return [] + } + switch (block.type) { case "text": return { text: block.text } @@ -70,9 +102,12 @@ export function convertAnthropicContentToGemini(content: string | Anthropic.Cont }) } -export function convertAnthropicMessageToGemini(message: Anthropic.Messages.MessageParam): Content { +export function convertAnthropicMessageToGemini( + message: Anthropic.Messages.MessageParam, + options?: { includeThoughtSignatures?: boolean }, +): Content { return { role: message.role === "assistant" ? "model" : "user", - parts: convertAnthropicContentToGemini(message.content), + parts: convertAnthropicContentToGemini(message.content, options), } } diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts index b305431e139..5e9d9d844e7 100644 --- a/src/api/transform/model-params.ts +++ b/src/api/transform/model-params.ts @@ -129,16 +129,15 @@ export function getModelParams({ temperature = 1.0 } else if (shouldUseReasoningEffort({ model, settings })) { // "Traditional" reasoning models use the `reasoningEffort` parameter. - const effort = (customReasoningEffort ?? model.reasoningEffort) as any - // Do not propagate "disable" into model params; treat as omission + const effort = (customReasoningEffort ?? model.reasoningEffort) as + | ReasoningEffortExtended + | "disable" + | undefined + // Capability and settings checks are handled by shouldUseReasoningEffort. + // Here we simply propagate the resolved effort into the params, while + // still treating "disable" as an omission. if (effort && effort !== "disable") { - if (model.supportsReasoningEffort === true) { - // Boolean capability: accept extended efforts; UI still exposes low/medium/high by default - reasoningEffort = effort as ReasoningEffortExtended - } else { - // Array capability: honor exactly what's defined by the model - reasoningEffort = effort as ReasoningEffortExtended - } + reasoningEffort = effort as ReasoningEffortExtended } } diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts index 9f3ef6e7730..d44db731c2c 100644 --- a/src/api/transform/reasoning.ts +++ b/src/api/transform/reasoning.ts @@ -21,7 +21,9 @@ export type AnthropicReasoningParams = BetaThinkingConfigParam export type OpenAiReasoningParams = { reasoning_effort: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"] } -export type GeminiReasoningParams = GenerateContentConfig["thinkingConfig"] +export type GeminiReasoningParams = GenerateContentConfig["thinkingConfig"] & { + thinkingLevel?: "low" | "high" +} export type GetModelReasoningOptions = { model: ModelInfo @@ -98,14 +100,43 @@ export const getOpenAiReasoning = ({ if (reasoningEffort === "disable" || !reasoningEffort) return undefined // Include "none" | "minimal" | "low" | "medium" | "high" literally - return { reasoning_effort: reasoningEffort as any } + return { + reasoning_effort: reasoningEffort as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"], + } } export const getGeminiReasoning = ({ model, reasoningBudget, + reasoningEffort, settings, -}: GetModelReasoningOptions): GeminiReasoningParams | undefined => - shouldUseReasoningBudget({ model, settings }) - ? { thinkingBudget: reasoningBudget!, includeThoughts: true } - : undefined +}: GetModelReasoningOptions): GeminiReasoningParams | undefined => { + // Budget-based (2.5) models: use thinkingBudget, not thinkingLevel. + if (shouldUseReasoningBudget({ model, settings })) { + return { thinkingBudget: reasoningBudget!, includeThoughts: true } + } + + // If reasoning effort shouldn't be used (toggle off, unsupported capability, etc.), + // do not send a thinkingConfig at all. + if (!shouldUseReasoningEffort({ model, settings })) { + return undefined + } + + // Effort-based models on Google GenAI: only support explicit low/high levels. + const selectedEffort = (settings.reasoningEffort ?? model.reasoningEffort) as + | ReasoningEffortExtended + | "disable" + | undefined + + // Respect “off” / unset semantics. + if (!selectedEffort || selectedEffort === "disable") { + return undefined + } + + // Only map "low" and "high" to thinkingLevel; ignore other values. + if (selectedEffort !== "low" && selectedEffort !== "high") { + return undefined + } + + return { thinkingLevel: selectedEffort, includeThoughts: true } +} diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index ac1d67335a0..09041d83478 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -613,16 +613,18 @@ export class Task extends EventEmitter implements TaskLike { } private async addToApiConversationHistory(message: Anthropic.MessageParam) { - // Capture the encrypted_content from the provider (e.g., OpenAI Responses API) if present. + // Capture the encrypted_content / thought signatures from the provider (e.g., OpenAI Responses API, Google GenAI) if present. // We only persist data reported by the current response body. const handler = this.api as ApiHandler & { getResponseId?: () => string | undefined getEncryptedContent?: () => { encrypted_content: string; id?: string } | undefined + getThoughtSignature?: () => string | undefined } if (message.role === "assistant") { const responseId = handler.getResponseId?.() const reasoningData = handler.getEncryptedContent?.() + const thoughtSignature = handler.getThoughtSignature?.() // Start from the original assistant message const messageWithTs: any = { @@ -654,6 +656,26 @@ export class Task extends EventEmitter implements TaskLike { } } + // If we have a thought signature, append it as a dedicated content block + // so it can be round-tripped in api_history.json and re-sent on subsequent calls. + if (thoughtSignature) { + const thoughtSignatureBlock = { + type: "thoughtSignature", + thoughtSignature, + } + + if (typeof messageWithTs.content === "string") { + messageWithTs.content = [ + { type: "text", text: messageWithTs.content } satisfies Anthropic.Messages.TextBlockParam, + thoughtSignatureBlock, + ] + } else if (Array.isArray(messageWithTs.content)) { + messageWithTs.content = [...messageWithTs.content, thoughtSignatureBlock] + } else if (!messageWithTs.content) { + messageWithTs.content = [thoughtSignatureBlock] + } + } + this.apiConversationHistory.push(messageWithTs) } else { const messageWithTs = { ...message, ts: Date.now() } diff --git a/src/package.json b/src/package.json index ba5370f84ae..13992d899c6 100644 --- a/src/package.json +++ b/src/package.json @@ -460,7 +460,7 @@ "@anthropic-ai/vertex-sdk": "^0.7.0", "@aws-sdk/client-bedrock-runtime": "^3.922.0", "@aws-sdk/credential-providers": "^3.922.0", - "@google/genai": "^1.0.0", + "@google/genai": "^1.29.1", "@lmstudio/sdk": "^1.1.1", "@mistralai/mistralai": "^1.9.18", "@modelcontextprotocol/sdk": "1.12.0",