diff --git a/.changeset/bright-needles-talk.md b/.changeset/bright-needles-talk.md new file mode 100644 index 000000000000..adb52e826f2f --- /dev/null +++ b/.changeset/bright-needles-talk.md @@ -0,0 +1,5 @@ +--- +'@ai-sdk/google': patch +--- + +Add code execution provider defined tool diff --git a/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx b/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx index 0bfb1e2e1c85..e3cc109d417d 100644 --- a/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx +++ b/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx @@ -340,6 +340,26 @@ const { text: meatLasangaRecipe } = await generateText({ }); ``` +### Code Execution + +With [Code Execution](https://ai.google.dev/gemini-api/docs/code-execution), certain models can generate and execute Python code to perform calculations, solve problems, or provide more accurate information. + +You can enable code execution by adding the `code_execution` tool to your request. + +```ts +import { google } from '@ai-sdk/google'; +import { googleTools } from '@ai-sdk/google/internal'; +import { generateText } from 'ai'; + +const { text, toolCalls, toolResults } = await generateText({ + model: google('gemini-2.5-pro'), + tools: { code_execution: google.tools.codeExecution({}) }, + prompt: 'Use python to calculate the 20th fibonacci number.', +}); +``` + +The response will contain the tool calls and results from the code execution. + ### Google Search With [search grounding](https://ai.google.dev/gemini-api/docs/google-search), diff --git a/content/providers/01-ai-sdk-providers/16-google-vertex.mdx b/content/providers/01-ai-sdk-providers/16-google-vertex.mdx index 8f2c0398a70a..e33759cff214 100644 --- a/content/providers/01-ai-sdk-providers/16-google-vertex.mdx +++ b/content/providers/01-ai-sdk-providers/16-google-vertex.mdx @@ -326,6 +326,27 @@ const { text } = await generateText({ Google Vertex language models can also be used in the `streamText` function (see [AI SDK Core](/docs/ai-sdk-core)). +#### Code Execution + +With [Code Execution](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution), certain Gemini models on Vertex AI can generate and execute Python code. This allows the model to perform calculations, data manipulation, and other programmatic tasks to enhance its responses. + +You can enable code execution by adding the `code_execution` tool to your request. + +```ts +import { vertex } from '@ai-sdk/google-vertex'; +import { googleTools } from '@ai-sdk/google/internal'; +import { generateText } from 'ai'; + +const result = await generateText({ + model: vertex('gemini-2.5-pro'), + tools: { code_execution: googleTools.codeExecution({}) }, + prompt: + 'Use python to calculate 20th fibonacci number. Then find the nearest palindrome to it.', +}); +``` + +The response will contain `tool-call` and `tool-result` parts for the executed code. + #### Reasoning (Thinking Tokens) Google Vertex AI, through its support for Gemini models, can also emit "thinking" tokens, representing the model's reasoning process. The AI SDK exposes these as reasoning information. diff --git a/examples/ai-core/src/generate-text/google-vertex-code-execution.ts b/examples/ai-core/src/generate-text/google-vertex-code-execution.ts new file mode 100644 index 000000000000..69d6d2068c7d --- /dev/null +++ b/examples/ai-core/src/generate-text/google-vertex-code-execution.ts @@ -0,0 +1,18 @@ +import { vertex } from '@ai-sdk/google-vertex'; +import { googleTools } from '@ai-sdk/google/internal'; +import { generateText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = await generateText({ + model: vertex('gemini-2.5-pro'), + tools: { code_execution: googleTools.codeExecution({}) }, + maxOutputTokens: 2048, + prompt: + 'Use python to calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + console.log(JSON.stringify(result, null, 2)); +} + +main().catch(console.error); diff --git a/examples/ai-core/src/stream-text/google-vertex-code-execution.ts b/examples/ai-core/src/stream-text/google-vertex-code-execution.ts new file mode 100644 index 000000000000..672dc81c9fe2 --- /dev/null +++ b/examples/ai-core/src/stream-text/google-vertex-code-execution.ts @@ -0,0 +1,71 @@ +import { vertex } from '@ai-sdk/google-vertex'; +import { googleTools } from '@ai-sdk/google/internal'; +import { ModelMessage, streamText, ToolCallPart, ToolResultPart } from 'ai'; +import 'dotenv/config'; +import * as process from 'process'; + +const messages: ModelMessage[] = []; +async function main() { + let toolResponseAvailable = false; + + const result = streamText({ + model: vertex('gemini-2.5-pro'), + tools: { code_execution: googleTools.codeExecution({}) }, + maxOutputTokens: 10000, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + let fullResponse = ''; + const toolCalls: ToolCallPart[] = []; + const toolResponses: ToolResultPart[] = []; + + for await (const delta of result.fullStream) { + switch (delta.type) { + case 'text-delta': { + fullResponse += delta.text; + process.stdout.write(delta.text); + break; + } + + case 'tool-call': { + toolCalls.push(delta); + + process.stdout.write( + `\nTool call: '${delta.toolName}' ${JSON.stringify(delta.input)}`, + ); + break; + } + + case 'tool-result': { + const transformedDelta: ToolResultPart = { + ...delta, + output: { type: 'json', value: delta.output as any }, + }; + toolResponses.push(transformedDelta); + + process.stdout.write( + `\nTool response: '${delta.toolName}' ${JSON.stringify( + delta.output, + )}`, + ); + break; + } + } + } + process.stdout.write('\n\n'); + + messages.push({ + role: 'assistant', + content: [{ type: 'text', text: fullResponse }, ...toolCalls], + }); + + if (toolResponses.length > 0) { + messages.push({ role: 'tool', content: toolResponses }); + } + + toolResponseAvailable = toolCalls.length > 0; + console.log('Messages:', messages[0].content); +} + +main().catch(console.error); diff --git a/packages/google/src/google-generative-ai-language-model.test.ts b/packages/google/src/google-generative-ai-language-model.test.ts index 9abd1ce1208f..488dcc6c1650 100644 --- a/packages/google/src/google-generative-ai-language-model.test.ts +++ b/packages/google/src/google-generative-ai-language-model.test.ts @@ -1,4 +1,7 @@ -import { LanguageModelV2Prompt } from '@ai-sdk/provider'; +import { + LanguageModelV2Prompt, + LanguageModelV2ProviderDefinedTool, +} from '@ai-sdk/provider'; import { convertReadableStreamToArray, createTestServer, @@ -985,6 +988,67 @@ describe('doGenerate', () => { }); }); + it('should handle code execution tool calls', async () => { + server.urls[TEST_URL_GEMINI_2_0_PRO].response = { + type: 'json-value', + body: { + candidates: [ + { + content: { + parts: [ + { + executableCode: { + language: 'PYTHON', + code: 'print(1+1)', + }, + }, + { + codeExecutionResult: { + outcome: 'OUTCOME_OK', + output: '2', + }, + }, + ], + role: 'model', + }, + finishReason: 'STOP', + }, + ], + }, + }; + + const model = provider.languageModel('gemini-2.0-pro'); + const { content } = await model.doGenerate({ + tools: [ + provider.tools.codeExecution({}) as LanguageModelV2ProviderDefinedTool, + ], + prompt: TEST_PROMPT, + }); + + const requestBody = await server.calls[0].requestBodyJson; + expect(requestBody.tools).toEqual({ codeExecution: {} }); + + expect(content).toEqual([ + { + type: 'tool-call', + toolCallId: 'test-id', + toolName: 'code_execution', + input: '{"language":"PYTHON","code":"print(1+1)"}', + providerExecuted: true, + }, + { + type: 'tool-result', + toolCallId: 'test-id', + toolName: 'code_execution', + result: { + outcome: 'OUTCOME_OK', + output: '2', + }, + providerExecuted: true, + }, + ]); + }); + describe('search tool selection', () => { const provider = createGoogleGenerativeAI({ apiKey: 'test-api-key', @@ -1774,6 +1838,81 @@ describe('doStream', () => { ]); }); + it('should stream code execution tool calls and results', async () => { + server.urls[TEST_URL_GEMINI_2_0_PRO].response = { + type: 'stream-chunks', + chunks: [ + `data: ${JSON.stringify({ + candidates: [ + { + content: { + parts: [ + { + executableCode: { + language: 'PYTHON', + code: 'print("hello")', + }, + }, + ], + }, + }, + ], + })}\n\n`, + `data: ${JSON.stringify({ + candidates: [ + { + content: { + parts: [ + { + codeExecutionResult: { + outcome: 'OUTCOME_OK', + output: 'hello\n', + }, + }, + ], + }, + finishReason: 'STOP', + }, + ], + })}\n\n`, + ], + }; + + const model = provider.languageModel('gemini-2.0-pro'); + const { stream } = await model.doStream({ + tools: [ + provider.tools.codeExecution({}) as LanguageModelV2ProviderDefinedTool, + ], + prompt: TEST_PROMPT, + }); + + const events = await convertReadableStreamToArray(stream); + + const toolEvents = events.filter( + e => e.type === 'tool-call' || e.type === 'tool-result', + ); + + expect(toolEvents).toEqual([ + { + type: 'tool-call', + toolCallId: 'test-id', + toolName: 'code_execution', + input: '{"language":"PYTHON","code":"print(\\"hello\\")"}', + providerExecuted: true, + }, + { + type: 'tool-result', + toolCallId: 'test-id', + toolName: 'code_execution', + result: { + outcome: 'OUTCOME_OK', + output: 'hello\n', + }, + providerExecuted: true, + }, + ]); + }); + describe('search tool selection', () => { const provider = createGoogleGenerativeAI({ apiKey: 'test-api-key', diff --git a/packages/google/src/google-generative-ai-language-model.ts b/packages/google/src/google-generative-ai-language-model.ts index 108e159ebbe8..851c829741eb 100644 --- a/packages/google/src/google-generative-ai-language-model.ts +++ b/packages/google/src/google-generative-ai-language-model.ts @@ -205,18 +205,41 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { const content: Array = []; // map ordered parts to content: - const parts = - candidate.content == null || - typeof candidate.content !== 'object' || - !('parts' in candidate.content) - ? [] - : (candidate.content.parts ?? []); + const parts = candidate.content?.parts ?? []; const usageMetadata = response.usageMetadata; + // Associates a code execution result with its preceding call. + let lastCodeExecutionToolCallId: string | undefined; + // Build content array from all parts for (const part of parts) { - if ('text' in part && part.text != null && part.text.length > 0) { + if ('executableCode' in part && part.executableCode?.code) { + const toolCallId = this.config.generateId(); + lastCodeExecutionToolCallId = toolCallId; + + content.push({ + type: 'tool-call', + toolCallId, + toolName: 'code_execution', + input: JSON.stringify(part.executableCode), + providerExecuted: true, + }); + } else if ('codeExecutionResult' in part && part.codeExecutionResult) { + content.push({ + type: 'tool-result', + // Assumes a result directly follows its corresponding call part. + toolCallId: lastCodeExecutionToolCallId!, + toolName: 'code_execution', + result: { + outcome: part.codeExecutionResult.outcome, + output: part.codeExecutionResult.output, + }, + providerExecuted: true, + }); + // Clear the ID after use to avoid accidental reuse. + lastCodeExecutionToolCallId = undefined; + } else if ('text' in part && part.text != null && part.text.length > 0) { if (part.thought === true) { content.push({ type: 'reasoning', text: part.text }); } else { @@ -319,6 +342,8 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { // Track emitted sources to prevent duplicates const emittedSourceUrls = new Set(); + // Associates a code execution result with its preceding call. + let lastCodeExecutionToolCallId: string | undefined; return { stream: response.pipeThrough( @@ -385,7 +410,41 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { // Process text parts individually to handle reasoning parts const parts = content.parts ?? []; for (const part of parts) { - if ( + if ('executableCode' in part && part.executableCode?.code) { + const toolCallId = generateId(); + lastCodeExecutionToolCallId = toolCallId; + + controller.enqueue({ + type: 'tool-call', + toolCallId, + toolName: 'code_execution', + input: JSON.stringify(part.executableCode), + providerExecuted: true, + }); + + hasToolCalls = true; + } else if ( + 'codeExecutionResult' in part && + part.codeExecutionResult + ) { + // Assumes a result directly follows its corresponding call part. + const toolCallId = lastCodeExecutionToolCallId; + + if (toolCallId) { + controller.enqueue({ + type: 'tool-result', + toolCallId, + toolName: 'code_execution', + result: { + outcome: part.codeExecutionResult.outcome, + output: part.codeExecutionResult.output, + }, + providerExecuted: true, + }); + // Clear the ID after use. + lastCodeExecutionToolCallId = undefined; + } + } else if ( 'text' in part && part.text != null && part.text.length > 0 @@ -625,6 +684,18 @@ const contentSchema = z.object({ }), }), z.object({ + executableCode: z + .object({ + language: z.string(), + code: z.string(), + }) + .nullish(), + codeExecutionResult: z + .object({ + outcome: z.string(), + output: z.string(), + }) + .nullish(), text: z.string().nullish(), thought: z.boolean().nullish(), }), diff --git a/packages/google/src/google-prepare-tools.ts b/packages/google/src/google-prepare-tools.ts index a60d452399d9..2e9663216101 100644 --- a/packages/google/src/google-prepare-tools.ts +++ b/packages/google/src/google-prepare-tools.ts @@ -103,6 +103,18 @@ export function prepareTools({ }); } break; + case 'google.code_execution': + if (isGemini2) { + googleTools.codeExecution = {}; + } else { + toolWarnings.push({ + type: 'unsupported-tool', + tool, + details: + 'The code execution tools is not supported with other Gemini models than Gemini 2.', + }); + } + break; default: toolWarnings.push({ type: 'unsupported-tool', tool }); break; diff --git a/packages/google/src/google-provider.test.ts b/packages/google/src/google-provider.test.ts index d359e615fdaf..3209cbfe7a40 100644 --- a/packages/google/src/google-provider.test.ts +++ b/packages/google/src/google-provider.test.ts @@ -4,13 +4,16 @@ import { GoogleGenerativeAILanguageModel } from './google-generative-ai-language import { GoogleGenerativeAIEmbeddingModel } from './google-generative-ai-embedding-model'; import { GoogleGenerativeAIImageModel } from './google-generative-ai-image-model'; -// Mock the imported modules -vi.mock('@ai-sdk/provider-utils', () => ({ - loadApiKey: vi.fn().mockImplementation(({ apiKey }) => apiKey), - generateId: vi.fn().mockReturnValue('mock-id'), - withoutTrailingSlash: vi.fn().mockImplementation(url => url), - createProviderDefinedToolFactory: vi.fn(), -})); +// Mock the imported modules using a partial mock to preserve original exports +vi.mock('@ai-sdk/provider-utils', async importOriginal => { + const mod = await importOriginal(); + return { + ...mod, + loadApiKey: vi.fn().mockImplementation(({ apiKey }) => apiKey), + generateId: vi.fn().mockReturnValue('mock-id'), + withoutTrailingSlash: vi.fn().mockImplementation(url => url), + }; +}); vi.mock('./google-generative-ai-language-model', () => ({ GoogleGenerativeAILanguageModel: vi.fn(), diff --git a/packages/google/src/google-tools.ts b/packages/google/src/google-tools.ts index e9ad562b86f2..5e5211751fa1 100644 --- a/packages/google/src/google-tools.ts +++ b/packages/google/src/google-tools.ts @@ -1,3 +1,4 @@ +import { codeExecution } from './tool/code-execution'; import { googleSearch } from './tool/google-search'; import { urlContext } from './tool/url-context'; @@ -13,4 +14,15 @@ export const googleTools = { * Must have name "url_context". */ urlContext, + /** + * A tool that enables the model to generate and run Python code. + * Must have name "code_execution". + * + * @note Ensure the selected model supports Code Execution. + * Multi-tool usage with the code execution tool is typically compatible with Gemini >=2 models. + * + * @see https://ai.google.dev/gemini-api/docs/code-execution (Google AI) + * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/code-execution-api (Vertex AI) + */ + codeExecution, }; diff --git a/packages/google/src/tool/code-execution.ts b/packages/google/src/tool/code-execution.ts new file mode 100644 index 000000000000..90fca2e10cd0 --- /dev/null +++ b/packages/google/src/tool/code-execution.ts @@ -0,0 +1,36 @@ +import { createProviderDefinedToolFactoryWithOutputSchema } from '@ai-sdk/provider-utils'; +import { z } from 'zod/v4'; + +/** + * A tool that enables the model to generate and run Python code. + * + * @note Ensure the selected model supports Code Execution. + * Multi-tool usage with the code execution tool is typically compatible with Gemini >=2 models. + * + * @see https://ai.google.dev/gemini-api/docs/code-execution (Google AI) + * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/code-execution-api (Vertex AI) + */ +export const codeExecution = createProviderDefinedToolFactoryWithOutputSchema< + { + language: string; + code: string; + }, + { + outcome: string; + output: string; + }, + {} +>({ + id: 'google.code_execution', + name: 'code_execution', + inputSchema: z.object({ + language: z.string().describe('The programming language of the code.'), + code: z.string().describe('The code to be executed.'), + }), + outputSchema: z.object({ + outcome: z + .string() + .describe('The outcome of the execution (e.g., "OUTCOME_OK").'), + output: z.string().describe('The output from the code execution.'), + }), +});