From d4d4d52e80a0b3e7f1ac7b774ee0d3c4bd8721d0 Mon Sep 17 00:00:00 2001 From: Und3rf10w Date: Mon, 28 Jul 2025 10:08:41 -0400 Subject: [PATCH 1/4] feat(providers/google): Add codeExecution provider tool support --- .changeset/bright-needles-talk.md | 5 + .../15-google-generative-ai.mdx | 20 +++ .../01-ai-sdk-providers/16-google-vertex.mdx | 21 +++ .../google-vertex-code-execution.ts | 18 +++ .../google-vertex-code-execution.ts | 71 +++++++++ ...oogle-generative-ai-language-model.test.ts | 141 +++++++++++++++++- .../google-generative-ai-language-model.ts | 75 +++++++++- packages/google/src/google-prepare-tools.ts | 12 ++ packages/google/src/google-provider.test.ts | 17 ++- packages/google/src/google-tools.ts | 12 ++ packages/google/src/tool/code-execution.ts | 36 +++++ 11 files changed, 418 insertions(+), 10 deletions(-) create mode 100644 .changeset/bright-needles-talk.md create mode 100644 examples/ai-core/src/generate-text/google-vertex-code-execution.ts create mode 100644 examples/ai-core/src/stream-text/google-vertex-code-execution.ts create mode 100644 packages/google/src/tool/code-execution.ts diff --git a/.changeset/bright-needles-talk.md b/.changeset/bright-needles-talk.md new file mode 100644 index 000000000000..4926023d92df --- /dev/null +++ b/.changeset/bright-needles-talk.md @@ -0,0 +1,5 @@ +--- +'@ai-sdk/google': minor +--- + +Add code execution provider defined tool diff --git a/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx b/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx index 0bfb1e2e1c85..e3cc109d417d 100644 --- a/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx +++ b/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx @@ -340,6 +340,26 @@ const { text: meatLasangaRecipe } = await generateText({ }); ``` +### Code Execution + +With [Code Execution](https://ai.google.dev/gemini-api/docs/code-execution), certain models can generate and execute Python code to perform calculations, solve problems, or provide more accurate information. + +You can enable code execution by adding the `code_execution` tool to your request. + +```ts +import { google } from '@ai-sdk/google'; +import { googleTools } from '@ai-sdk/google/internal'; +import { generateText } from 'ai'; + +const { text, toolCalls, toolResults } = await generateText({ + model: google('gemini-2.5-pro'), + tools: { code_execution: google.tools.codeExecution({}) }, + prompt: 'Use python to calculate the 20th fibonacci number.', +}); +``` + +The response will contain the tool calls and results from the code execution. + ### Google Search With [search grounding](https://ai.google.dev/gemini-api/docs/google-search), diff --git a/content/providers/01-ai-sdk-providers/16-google-vertex.mdx b/content/providers/01-ai-sdk-providers/16-google-vertex.mdx index 8f2c0398a70a..e33759cff214 100644 --- a/content/providers/01-ai-sdk-providers/16-google-vertex.mdx +++ b/content/providers/01-ai-sdk-providers/16-google-vertex.mdx @@ -326,6 +326,27 @@ const { text } = await generateText({ Google Vertex language models can also be used in the `streamText` function (see [AI SDK Core](/docs/ai-sdk-core)). +#### Code Execution + +With [Code Execution](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution), certain Gemini models on Vertex AI can generate and execute Python code. This allows the model to perform calculations, data manipulation, and other programmatic tasks to enhance its responses. + +You can enable code execution by adding the `code_execution` tool to your request. + +```ts +import { vertex } from '@ai-sdk/google-vertex'; +import { googleTools } from '@ai-sdk/google/internal'; +import { generateText } from 'ai'; + +const result = await generateText({ + model: vertex('gemini-2.5-pro'), + tools: { code_execution: googleTools.codeExecution({}) }, + prompt: + 'Use python to calculate 20th fibonacci number. Then find the nearest palindrome to it.', +}); +``` + +The response will contain `tool-call` and `tool-result` parts for the executed code. + #### Reasoning (Thinking Tokens) Google Vertex AI, through its support for Gemini models, can also emit "thinking" tokens, representing the model's reasoning process. The AI SDK exposes these as reasoning information. diff --git a/examples/ai-core/src/generate-text/google-vertex-code-execution.ts b/examples/ai-core/src/generate-text/google-vertex-code-execution.ts new file mode 100644 index 000000000000..69d6d2068c7d --- /dev/null +++ b/examples/ai-core/src/generate-text/google-vertex-code-execution.ts @@ -0,0 +1,18 @@ +import { vertex } from '@ai-sdk/google-vertex'; +import { googleTools } from '@ai-sdk/google/internal'; +import { generateText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = await generateText({ + model: vertex('gemini-2.5-pro'), + tools: { code_execution: googleTools.codeExecution({}) }, + maxOutputTokens: 2048, + prompt: + 'Use python to calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + console.log(JSON.stringify(result, null, 2)); +} + +main().catch(console.error); diff --git a/examples/ai-core/src/stream-text/google-vertex-code-execution.ts b/examples/ai-core/src/stream-text/google-vertex-code-execution.ts new file mode 100644 index 000000000000..0d2c3b3337ec --- /dev/null +++ b/examples/ai-core/src/stream-text/google-vertex-code-execution.ts @@ -0,0 +1,71 @@ +import { vertex } from '@ai-sdk/google-vertex'; +import { googleTools } from '@ai-sdk/google/internal'; +import { ModelMessage, streamText, ToolCallPart, ToolResultPart } from 'ai'; +import 'dotenv/config'; +import * as process from 'process'; + +const messages: ModelMessage[] = []; +async function main() { + let toolResponseAvailable = false; + + const result = streamText({ + model: vertex('gemini-2.5-pro'), + tools: { code_execution: googleTools.codeExecution({}) }, + maxOutputTokens: 10000, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + let fullResponse = ''; + const toolCalls: ToolCallPart[] = []; + const toolResponses: ToolResultPart[] = []; + + for await (const delta of result.fullStream) { + switch (delta.type) { + case 'text-delta': { + fullResponse += delta.text; + process.stdout.write(delta.text); + break; + } + + case 'tool-call': { + toolCalls.push(delta); + + process.stdout.write( + `\nTool call: '${delta.toolName}' ${JSON.stringify(delta.input)}`, + ); + break; + } + + case 'tool-result': { + const transformedDelta: ToolResultPart = { + ...delta, + output: { type: 'json', value: delta.output }, + }; + toolResponses.push(transformedDelta); + + process.stdout.write( + `\nTool response: '${delta.toolName}' ${JSON.stringify( + delta.output, + )}`, + ); + break; + } + } + } + process.stdout.write('\n\n'); + + messages.push({ + role: 'assistant', + content: [{ type: 'text', text: fullResponse }, ...toolCalls], + }); + + if (toolResponses.length > 0) { + messages.push({ role: 'tool', content: toolResponses }); + } + + toolResponseAvailable = toolCalls.length > 0; + console.log('Messages:', messages[0].content); +} + +main().catch(console.error); diff --git a/packages/google/src/google-generative-ai-language-model.test.ts b/packages/google/src/google-generative-ai-language-model.test.ts index 9abd1ce1208f..488dcc6c1650 100644 --- a/packages/google/src/google-generative-ai-language-model.test.ts +++ b/packages/google/src/google-generative-ai-language-model.test.ts @@ -1,4 +1,7 @@ -import { LanguageModelV2Prompt } from '@ai-sdk/provider'; +import { + LanguageModelV2Prompt, + LanguageModelV2ProviderDefinedTool, +} from '@ai-sdk/provider'; import { convertReadableStreamToArray, createTestServer, @@ -985,6 +988,67 @@ describe('doGenerate', () => { }); }); + it('should handle code execution tool calls', async () => { + server.urls[TEST_URL_GEMINI_2_0_PRO].response = { + type: 'json-value', + body: { + candidates: [ + { + content: { + parts: [ + { + executableCode: { + language: 'PYTHON', + code: 'print(1+1)', + }, + }, + { + codeExecutionResult: { + outcome: 'OUTCOME_OK', + output: '2', + }, + }, + ], + role: 'model', + }, + finishReason: 'STOP', + }, + ], + }, + }; + + const model = provider.languageModel('gemini-2.0-pro'); + const { content } = await model.doGenerate({ + tools: [ + provider.tools.codeExecution({}) as LanguageModelV2ProviderDefinedTool, + ], + prompt: TEST_PROMPT, + }); + + const requestBody = await server.calls[0].requestBodyJson; + expect(requestBody.tools).toEqual({ codeExecution: {} }); + + expect(content).toEqual([ + { + type: 'tool-call', + toolCallId: 'test-id', + toolName: 'code_execution', + input: '{"language":"PYTHON","code":"print(1+1)"}', + providerExecuted: true, + }, + { + type: 'tool-result', + toolCallId: 'test-id', + toolName: 'code_execution', + result: { + outcome: 'OUTCOME_OK', + output: '2', + }, + providerExecuted: true, + }, + ]); + }); + describe('search tool selection', () => { const provider = createGoogleGenerativeAI({ apiKey: 'test-api-key', @@ -1774,6 +1838,81 @@ describe('doStream', () => { ]); }); + it('should stream code execution tool calls and results', async () => { + server.urls[TEST_URL_GEMINI_2_0_PRO].response = { + type: 'stream-chunks', + chunks: [ + `data: ${JSON.stringify({ + candidates: [ + { + content: { + parts: [ + { + executableCode: { + language: 'PYTHON', + code: 'print("hello")', + }, + }, + ], + }, + }, + ], + })}\n\n`, + `data: ${JSON.stringify({ + candidates: [ + { + content: { + parts: [ + { + codeExecutionResult: { + outcome: 'OUTCOME_OK', + output: 'hello\n', + }, + }, + ], + }, + finishReason: 'STOP', + }, + ], + })}\n\n`, + ], + }; + + const model = provider.languageModel('gemini-2.0-pro'); + const { stream } = await model.doStream({ + tools: [ + provider.tools.codeExecution({}) as LanguageModelV2ProviderDefinedTool, + ], + prompt: TEST_PROMPT, + }); + + const events = await convertReadableStreamToArray(stream); + + const toolEvents = events.filter( + e => e.type === 'tool-call' || e.type === 'tool-result', + ); + + expect(toolEvents).toEqual([ + { + type: 'tool-call', + toolCallId: 'test-id', + toolName: 'code_execution', + input: '{"language":"PYTHON","code":"print(\\"hello\\")"}', + providerExecuted: true, + }, + { + type: 'tool-result', + toolCallId: 'test-id', + toolName: 'code_execution', + result: { + outcome: 'OUTCOME_OK', + output: 'hello\n', + }, + providerExecuted: true, + }, + ]); + }); + describe('search tool selection', () => { const provider = createGoogleGenerativeAI({ apiKey: 'test-api-key', diff --git a/packages/google/src/google-generative-ai-language-model.ts b/packages/google/src/google-generative-ai-language-model.ts index 108e159ebbe8..ed49b3b5eeeb 100644 --- a/packages/google/src/google-generative-ai-language-model.ts +++ b/packages/google/src/google-generative-ai-language-model.ts @@ -216,7 +216,34 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { // Build content array from all parts for (const part of parts) { - if ('text' in part && part.text != null && part.text.length > 0) { + if ('executableCode' in part && part.executableCode?.code) { + content.push({ + type: 'tool-call', + toolCallId: this.config.generateId(), // Generate a unique ID + toolName: 'code_execution', + input: JSON.stringify(part.executableCode), + providerExecuted: true, + }); + } else if ('codeExecutionResult' in part && part.codeExecutionResult) { + content.push({ + type: 'tool-result', + // This is a limitation of the non-streaming API format. + // We find the last code execution tool call that hasn't received a result yet. + toolCallId: ( + [...content] + .reverse() + .find( + c => c.type === 'tool-call' && c.toolName === 'code_execution', + ) as any + )?.toolCallId, + toolName: 'code_execution', + result: { + outcome: part.codeExecutionResult.outcome, + output: part.codeExecutionResult.output, + }, + providerExecuted: true, + }); + } else if ('text' in part && part.text != null && part.text.length > 0) { if (part.thought === true) { content.push({ type: 'reasoning', text: part.text }); } else { @@ -319,6 +346,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { // Track emitted sources to prevent duplicates const emittedSourceUrls = new Set(); + const pendingCodeExecutionToolCallIds: string[] = []; return { stream: response.pipeThrough( @@ -385,7 +413,38 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { // Process text parts individually to handle reasoning parts const parts = content.parts ?? []; for (const part of parts) { - if ( + if ('executableCode' in part && part.executableCode?.code) { + const toolCallId = generateId(); + pendingCodeExecutionToolCallIds.push(toolCallId); + + controller.enqueue({ + type: 'tool-call', + toolCallId, + toolName: 'code_execution', + input: JSON.stringify(part.executableCode), + providerExecuted: true, + }); + + hasToolCalls = true; + } else if ( + 'codeExecutionResult' in part && + part.codeExecutionResult + ) { + const toolCallId = pendingCodeExecutionToolCallIds.shift(); + + if (toolCallId) { + controller.enqueue({ + type: 'tool-result', + toolCallId, + toolName: 'code_execution', + result: { + outcome: part.codeExecutionResult.outcome, + output: part.codeExecutionResult.output, + }, + providerExecuted: true, + }); + } + } else if ( 'text' in part && part.text != null && part.text.length > 0 @@ -625,6 +684,18 @@ const contentSchema = z.object({ }), }), z.object({ + executableCode: z + .object({ + language: z.string(), + code: z.string(), + }) + .nullish(), + codeExecutionResult: z + .object({ + outcome: z.string(), + output: z.string(), + }) + .nullish(), text: z.string().nullish(), thought: z.boolean().nullish(), }), diff --git a/packages/google/src/google-prepare-tools.ts b/packages/google/src/google-prepare-tools.ts index a60d452399d9..2e9663216101 100644 --- a/packages/google/src/google-prepare-tools.ts +++ b/packages/google/src/google-prepare-tools.ts @@ -103,6 +103,18 @@ export function prepareTools({ }); } break; + case 'google.code_execution': + if (isGemini2) { + googleTools.codeExecution = {}; + } else { + toolWarnings.push({ + type: 'unsupported-tool', + tool, + details: + 'The code execution tools is not supported with other Gemini models than Gemini 2.', + }); + } + break; default: toolWarnings.push({ type: 'unsupported-tool', tool }); break; diff --git a/packages/google/src/google-provider.test.ts b/packages/google/src/google-provider.test.ts index d359e615fdaf..3209cbfe7a40 100644 --- a/packages/google/src/google-provider.test.ts +++ b/packages/google/src/google-provider.test.ts @@ -4,13 +4,16 @@ import { GoogleGenerativeAILanguageModel } from './google-generative-ai-language import { GoogleGenerativeAIEmbeddingModel } from './google-generative-ai-embedding-model'; import { GoogleGenerativeAIImageModel } from './google-generative-ai-image-model'; -// Mock the imported modules -vi.mock('@ai-sdk/provider-utils', () => ({ - loadApiKey: vi.fn().mockImplementation(({ apiKey }) => apiKey), - generateId: vi.fn().mockReturnValue('mock-id'), - withoutTrailingSlash: vi.fn().mockImplementation(url => url), - createProviderDefinedToolFactory: vi.fn(), -})); +// Mock the imported modules using a partial mock to preserve original exports +vi.mock('@ai-sdk/provider-utils', async importOriginal => { + const mod = await importOriginal(); + return { + ...mod, + loadApiKey: vi.fn().mockImplementation(({ apiKey }) => apiKey), + generateId: vi.fn().mockReturnValue('mock-id'), + withoutTrailingSlash: vi.fn().mockImplementation(url => url), + }; +}); vi.mock('./google-generative-ai-language-model', () => ({ GoogleGenerativeAILanguageModel: vi.fn(), diff --git a/packages/google/src/google-tools.ts b/packages/google/src/google-tools.ts index e9ad562b86f2..5e5211751fa1 100644 --- a/packages/google/src/google-tools.ts +++ b/packages/google/src/google-tools.ts @@ -1,3 +1,4 @@ +import { codeExecution } from './tool/code-execution'; import { googleSearch } from './tool/google-search'; import { urlContext } from './tool/url-context'; @@ -13,4 +14,15 @@ export const googleTools = { * Must have name "url_context". */ urlContext, + /** + * A tool that enables the model to generate and run Python code. + * Must have name "code_execution". + * + * @note Ensure the selected model supports Code Execution. + * Multi-tool usage with the code execution tool is typically compatible with Gemini >=2 models. + * + * @see https://ai.google.dev/gemini-api/docs/code-execution (Google AI) + * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/code-execution-api (Vertex AI) + */ + codeExecution, }; diff --git a/packages/google/src/tool/code-execution.ts b/packages/google/src/tool/code-execution.ts new file mode 100644 index 000000000000..90fca2e10cd0 --- /dev/null +++ b/packages/google/src/tool/code-execution.ts @@ -0,0 +1,36 @@ +import { createProviderDefinedToolFactoryWithOutputSchema } from '@ai-sdk/provider-utils'; +import { z } from 'zod/v4'; + +/** + * A tool that enables the model to generate and run Python code. + * + * @note Ensure the selected model supports Code Execution. + * Multi-tool usage with the code execution tool is typically compatible with Gemini >=2 models. + * + * @see https://ai.google.dev/gemini-api/docs/code-execution (Google AI) + * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/code-execution-api (Vertex AI) + */ +export const codeExecution = createProviderDefinedToolFactoryWithOutputSchema< + { + language: string; + code: string; + }, + { + outcome: string; + output: string; + }, + {} +>({ + id: 'google.code_execution', + name: 'code_execution', + inputSchema: z.object({ + language: z.string().describe('The programming language of the code.'), + code: z.string().describe('The code to be executed.'), + }), + outputSchema: z.object({ + outcome: z + .string() + .describe('The outcome of the execution (e.g., "OUTCOME_OK").'), + output: z.string().describe('The output from the code execution.'), + }), +}); From dc9967cba34dd05af5288b127ec2e1c014451ef3 Mon Sep 17 00:00:00 2001 From: Und3rf10w Date: Mon, 28 Jul 2025 10:13:42 -0400 Subject: [PATCH 2/4] fix(changeset): Fixed changeset --- .changeset/bright-needles-talk.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/bright-needles-talk.md b/.changeset/bright-needles-talk.md index 4926023d92df..adb52e826f2f 100644 --- a/.changeset/bright-needles-talk.md +++ b/.changeset/bright-needles-talk.md @@ -1,5 +1,5 @@ --- -'@ai-sdk/google': minor +'@ai-sdk/google': patch --- Add code execution provider defined tool From 1dbfa7a6fcfe39f543796af359f83e6c5b8913d6 Mon Sep 17 00:00:00 2001 From: Und3rf10w Date: Mon, 28 Jul 2025 19:30:31 -0400 Subject: [PATCH 3/4] fix(examples/google): Fix typecheck error on code execution example --- .../ai-core/src/stream-text/google-vertex-code-execution.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ai-core/src/stream-text/google-vertex-code-execution.ts b/examples/ai-core/src/stream-text/google-vertex-code-execution.ts index 0d2c3b3337ec..672dc81c9fe2 100644 --- a/examples/ai-core/src/stream-text/google-vertex-code-execution.ts +++ b/examples/ai-core/src/stream-text/google-vertex-code-execution.ts @@ -40,7 +40,7 @@ async function main() { case 'tool-result': { const transformedDelta: ToolResultPart = { ...delta, - output: { type: 'json', value: delta.output }, + output: { type: 'json', value: delta.output as any }, }; toolResponses.push(transformedDelta); From 21f63664aff61fe405690f798df64a9700ec1818 Mon Sep 17 00:00:00 2001 From: Und3rf10w Date: Tue, 29 Jul 2025 12:34:24 -0400 Subject: [PATCH 4/4] fix(providers/google): Track codeExecution tool calls in variable --- .../google-generative-ai-language-model.ts | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/google/src/google-generative-ai-language-model.ts b/packages/google/src/google-generative-ai-language-model.ts index ed49b3b5eeeb..851c829741eb 100644 --- a/packages/google/src/google-generative-ai-language-model.ts +++ b/packages/google/src/google-generative-ai-language-model.ts @@ -205,21 +205,22 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { const content: Array = []; // map ordered parts to content: - const parts = - candidate.content == null || - typeof candidate.content !== 'object' || - !('parts' in candidate.content) - ? [] - : (candidate.content.parts ?? []); + const parts = candidate.content?.parts ?? []; const usageMetadata = response.usageMetadata; + // Associates a code execution result with its preceding call. + let lastCodeExecutionToolCallId: string | undefined; + // Build content array from all parts for (const part of parts) { if ('executableCode' in part && part.executableCode?.code) { + const toolCallId = this.config.generateId(); + lastCodeExecutionToolCallId = toolCallId; + content.push({ type: 'tool-call', - toolCallId: this.config.generateId(), // Generate a unique ID + toolCallId, toolName: 'code_execution', input: JSON.stringify(part.executableCode), providerExecuted: true, @@ -227,15 +228,8 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { } else if ('codeExecutionResult' in part && part.codeExecutionResult) { content.push({ type: 'tool-result', - // This is a limitation of the non-streaming API format. - // We find the last code execution tool call that hasn't received a result yet. - toolCallId: ( - [...content] - .reverse() - .find( - c => c.type === 'tool-call' && c.toolName === 'code_execution', - ) as any - )?.toolCallId, + // Assumes a result directly follows its corresponding call part. + toolCallId: lastCodeExecutionToolCallId!, toolName: 'code_execution', result: { outcome: part.codeExecutionResult.outcome, @@ -243,6 +237,8 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { }, providerExecuted: true, }); + // Clear the ID after use to avoid accidental reuse. + lastCodeExecutionToolCallId = undefined; } else if ('text' in part && part.text != null && part.text.length > 0) { if (part.thought === true) { content.push({ type: 'reasoning', text: part.text }); @@ -346,7 +342,8 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { // Track emitted sources to prevent duplicates const emittedSourceUrls = new Set(); - const pendingCodeExecutionToolCallIds: string[] = []; + // Associates a code execution result with its preceding call. + let lastCodeExecutionToolCallId: string | undefined; return { stream: response.pipeThrough( @@ -415,7 +412,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { for (const part of parts) { if ('executableCode' in part && part.executableCode?.code) { const toolCallId = generateId(); - pendingCodeExecutionToolCallIds.push(toolCallId); + lastCodeExecutionToolCallId = toolCallId; controller.enqueue({ type: 'tool-call', @@ -430,7 +427,8 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { 'codeExecutionResult' in part && part.codeExecutionResult ) { - const toolCallId = pendingCodeExecutionToolCallIds.shift(); + // Assumes a result directly follows its corresponding call part. + const toolCallId = lastCodeExecutionToolCallId; if (toolCallId) { controller.enqueue({ @@ -443,6 +441,8 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { }, providerExecuted: true, }); + // Clear the ID after use. + lastCodeExecutionToolCallId = undefined; } } else if ( 'text' in part &&