diff --git a/.changeset/thin-eagles-serve.md b/.changeset/thin-eagles-serve.md new file mode 100644 index 000000000000..510725348c37 --- /dev/null +++ b/.changeset/thin-eagles-serve.md @@ -0,0 +1,7 @@ +--- +'@ai-sdk/google-vertex': patch +'@example/ai-core': patch +'@ai-sdk/google': patch +--- + +Added Reasoning and Code Execution support to google providers diff --git a/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx b/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx index 92078eb6f2e1..18677de7c9c2 100644 --- a/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx +++ b/content/providers/01-ai-sdk-providers/15-google-generative-ai.mdx @@ -143,6 +143,14 @@ The following optional provider options are available for Google Generative AI m - `BLOCK_ONLY_HIGH` - `BLOCK_NONE` +- **useSearchGrounding** _boolean_ + + Optional. When enabled, the model will [use Google search to ground the response](https://ai.google.dev/gemini-api/docs/grounding). + +- **useCodeExecution** _boolean_ + + Optional. When enabled, the model will make use of a code execution tool that [enables the model to generate and run Python code](https://ai.google.dev/gemini-api/docs/code-execution). + - **responseModalities** _string[]_ The modalities to use for the response. The following modalities are supported: `TEXT`, `IMAGE`. When not defined or empty, the model defaults to returning only text. @@ -396,6 +404,190 @@ const { sources } = await generateText({ }); ``` +### Code Execution + +With [Code Execution](https://ai.google.dev/gemini-api/docs/code-execution), certain models can generate and execute Python code to perform calculations, solve problems, or provide more accurate information. + +To enable this feature, set `useCodeExecution: true` in the `providerOptions` for the Google provider: + +```ts highlight="6-10" +import { google } from '@ai-sdk/google'; +import { generateText } from 'ai'; + +async function main() { + const result = await generateText({ + model: google('gemini-2.5-flash-preview-04-17'), + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + prompt: + 'Calculate the 20th Fibonacci number. Then find the nearest palindrome to it.', + }); + + // Process result.content which may include file and text parts + // (see example below) + console.log('Final aggregated text:', result.text); +} + +main(); +``` + +When Code Execution is enabled, the model's response will surface the generated code and its execution results as distinct parts within the output: + +- **Generated Python Code**: This is represented as a `file` content part (or stream part). + - `type`: `'file'` + - `mediaType`: `'text/x-python'` + - `data`: A base64-encoded string of the Python code that the model generated and executed. +- **Code Execution Result**: This is represented as a `text` content part (or stream part). + - `type`: `'text'` + - `text`: A formatted string detailing the execution `outcome` (e.g., "OUTCOME_OK") and the `output` from the code. The format is typically: `Execution Result (Outcome: ):\n`. + +#### `generateText` with Code Execution + +When using `generateText`, the `result.content` array will contain these `file` (for executable code) and `text` (for execution results) parts interspersed with other text parts generated by the model. + +Here's how you can process these parts: + +```ts +import { google } from '@ai-sdk/google'; +import { generateText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = await generateText({ + model: google('gemini-2.5-flash-preview-04-17'), + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + maxOutputTokens: 2048, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + console.log('Processing content parts:'); + for (const part of result.content) { + switch (part.type) { + case 'file': { + // This is the executableCode part + process.stdout.write( + '\x1b[33m' + // Yellow color for "file" + part.type + + '\x1b[34m: ' + // Blue color for mediaType + part.mediaType + // Should be 'text/x-python' + '\x1b[0m', // Reset color + ); + console.log(); // Newline + // Data is base64 encoded Python code + console.log('Code:\n', atob(part.data as string)); + break; + } + case 'text': { + // This can be a regular text part or a codeExecutionResult + process.stdout.write( + '\x1b[34m' + part.type + '\x1b[0m', // Blue color for "text" + ); + console.log(); // Newline + console.log(part.text); // Contains model's text or formatted execution result + break; + } + } + } + + process.stdout.write('\n\n--- Full Response Details ---\n'); + console.log('Aggregated Text:', result.text); + console.log('Warnings:', result.warnings); + console.log('Token usage:', result.usage); + console.log('Finish reason:', result.finishReason); +} + +main().catch(console.error); +``` + +#### Streaming Code Execution Details (`streamText`) + +When using `streamText` with `useCodeExecution: true`, the generated Python code and its execution results are streamed as distinct part types: + +- **Generated Python Code**: Arrives as a stream part where `delta.type === 'file'`. + - `delta.mediaType` will be `'text/x-python'`. + - `delta.data` will be the base64-encoded Python code string. +- **Code Execution Result**: Arrives as a stream part where `delta.type === 'text'`. + - `delta.text` will contain the formatted string with the execution outcome and output (e.g., `Execution Result (Outcome: OUTCOME_OK):\nOutput...`). + +Here's an example of how you might process the stream: + +```ts +import { google } from '@ai-sdk/google'; +import { streamText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = streamText({ + model: google('gemini-2.5-flash-preview-04-17'), + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + maxOutputTokens: 10000, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + let fullResponse = ''; + console.log('Streaming content parts:'); + + for await (const delta of result.fullStream) { + switch (delta.type) { + case 'file': { + // This is the executableCode part + process.stdout.write( + '\x1b[33m' + // Yellow color for "file" + delta.type + + '\x1b[34m: ' + // Blue color for mediaType + delta.mediaType + // Should be 'text/x-python' + '\x1b[0m', // Reset color + ); + console.log(); // Newline + // Data is base64 encoded Python code + console.log('Code:\n', atob(delta.data as string)); + break; + } + case 'text': { + // This can be a regular text part or a codeExecutionResult + process.stdout.write( + '\x1b[34m' + delta.type + '\x1b[0m', // Blue color for "text" + ); + console.log(); // Newline + console.log(delta.text); // Contains model's text or formatted execution result + fullResponse += delta.text; + break; + } + // Other stream part types like 'reasoning', 'tool-call-delta', 'tool-call', + // 'stream-start', 'finish', 'error' can be handled here if needed. + } + } + + process.stdout.write('\n\n--- Full Response Details ---\n'); + console.log('Aggregated Text from Stream:', fullResponse); + console.log('Warnings:', await result.warnings); + console.log('Token usage:', await result.usage); + console.log('Finish reason:', await result.finishReason); +} + +main().catch(console.error); +``` + + + Code Execution capabilities and specific model support are subject to Google's + offerings. Always refer to the [official Google AI + documentation](https://ai.google.dev/gemini-api/docs/code-execution) for the + most current information on compatible models and features. + + ### Image Outputs The model `gemini-2.0-flash-exp` supports image generation. Images are exposed as files in the response. diff --git a/content/providers/01-ai-sdk-providers/16-google-vertex.mdx b/content/providers/01-ai-sdk-providers/16-google-vertex.mdx index c83f2c1ac648..e094121ff48f 100644 --- a/content/providers/01-ai-sdk-providers/16-google-vertex.mdx +++ b/content/providers/01-ai-sdk-providers/16-google-vertex.mdx @@ -304,6 +304,10 @@ The following optional provider options are available for Google Vertex models: Optional. When enabled, the model will [use Google search to ground the response](https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/overview). +- **useCodeExecution** _boolean_ + + Optional. When enabled, the model will make use of a code execution tool that enables the model to [generate and run Python code](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution). + - **audioTimestamp** _boolean_ Optional. Enables timestamp understanding for audio files. Defaults to false. @@ -446,6 +450,190 @@ Example response excerpt: threshold](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/ground-gemini#dynamic-retrieval). +#### Code Execution + +With [Code Execution](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution), certain Gemini models on Vertex AI can generate and execute Python code. This allows the model to perform calculations, data manipulation, and other programmatic tasks to enhance its responses. + +To enable this feature, set `useCodeExecution: true` in the `providerOptions` for the Google provider when making a call: + +```ts highlight="6-10" +import { vertex } from '@ai-sdk/google-vertex'; +import { generateText } from 'ai'; + +async function main() { + const result = await generateText({ + model: vertex('gemini-2.5-pro-preview-05-06'), + providerOptions: { + google: { + // Note: providerOptions are nested under 'google' key for Vertex + useCodeExecution: true, + }, + }, + prompt: 'What is the result of 7 factorial divided by 3 factorial?', + }); + + // Process result.content which may include file and text parts + // (see example below) + console.log('Final aggregated text:', result.text); +} + +main(); +``` + +When Code Execution is active, the model's response will surface the generated code and its execution results as distinct parts within the output: + +- **Generated Python Code**: This is represented as a `file` content part (or stream part). + - `type`: `'file'` + - `mediaType`: `'text/x-python'` + - `data`: A base64-encoded string of the Python code that the model generated and executed. +- **Code Execution Result**: This is represented as a `text` content part (or stream part). + - `type`: `'text'` + - `text`: A formatted string detailing the execution `outcome` (e.g., "OUTCOME_OK") and the `output` from the code. The format is typically: `Execution Result (Outcome: ):\n`. + +##### `generateText` with Code Execution + +When using `generateText`, the `result.content` array will contain these `file` (for executable code) and `text` (for execution results) parts, potentially interspersed with other text parts generated by the model. + +Here's how you can process these parts: + +```ts +import { vertex } from '@ai-sdk/google-vertex'; +import { generateText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = await generateText({ + model: vertex('gemini-2.5-pro-preview-05-06'), + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + maxOutputTokens: 2048, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + console.log('Processing content parts:'); + for (const part of result.content) { + switch (part.type) { + case 'file': { + // This is the executableCode part + process.stdout.write( + '\x1b[33m' + // Yellow color for "file" + part.type + + '\x1b[34m: ' + // Blue color for mediaType + part.mediaType + // Should be 'text/x-python' + '\x1b[0m', // Reset color + ); + console.log(); // Newline + // Data is base64 encoded Python code + console.log('Code:\n', atob(part.data as string)); + break; + } + case 'text': { + // This can be a regular text part or a codeExecutionResult + process.stdout.write( + '\x1b[34m' + part.type + '\x1b[0m', // Blue color for "text" + ); + console.log(); // Newline + console.log(part.text); // Contains model's text or formatted execution result + break; + } + } + } + + process.stdout.write('\n\n--- Full Response Details ---\n'); + console.log('Aggregated Text:', result.text); + console.log('Warnings:', result.warnings); + console.log('Token usage:', result.usage); + console.log('Finish reason:', result.finishReason); +} + +main().catch(console.error); +``` + +##### Streaming Code Execution Details (`streamText`) + +When using `streamText` with a Vertex model that has `useCodeExecution: true` enabled in `providerOptions`, the generated Python code and its execution results are streamed as distinct part types: + +- **Generated Python Code**: Arrives as a stream part where `delta.type === 'file'`. + - `delta.mediaType` will be `'text/x-python'`. + - `delta.data` will be the base64-encoded Python code string. +- **Code Execution Result**: Arrives as a stream part where `delta.type === 'text'`. + - `delta.text` will contain the formatted string with the execution outcome and output (e.g., `Execution Result (Outcome: OUTCOME_OK):\nOutput...`). + +Here's an example of how you might process the stream: + +```ts +import { vertex } from '@ai-sdk/google-vertex'; +import { streamText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = streamText({ + model: vertex('gemini-2.5-pro-preview-05-06'), + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + maxOutputTokens: 10000, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + let fullResponse = ''; + console.log('Streaming content parts:'); + + for await (const delta of result.fullStream) { + switch (delta.type) { + case 'file': { + // This is the executableCode part + process.stdout.write( + '\x1b[33m' + // Yellow color for "file" + delta.type + + '\x1b[34m: ' + // Blue color for mediaType + delta.mediaType + // Should be 'text/x-python' + '\x1b[0m', // Reset color + ); + console.log(); // Newline + // Data is base64 encoded Python code + console.log('Code:\n', atob(delta.data as string)); + break; + } + case 'text': { + // This can be a regular text part or a codeExecutionResult + process.stdout.write( + '\x1b[34m' + delta.type + '\x1b[0m', // Blue color for "text" + ); + console.log(); // Newline + console.log(delta.text); // Contains model's text or formatted execution result + fullResponse += delta.text; + break; + } + // Other stream part types like 'reasoning', 'tool-call-delta', 'tool-call', + // 'stream-start', 'finish', 'error' can be handled here if needed. + } + } + + process.stdout.write('\n\n--- Full Response Details ---\n'); + console.log('Aggregated Text from Stream:', fullResponse); + console.log('Warnings:', await result.warnings); + console.log('Token usage:', await result.usage); + console.log('Finish reason:', await result.finishReason); +} + +main().catch(console.error); +``` + + + Code Execution capabilities and specific model support on Vertex AI are + subject to Google Cloud's offerings. Always refer to the [official Vertex AI + documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/code-execution) + for the most current information on compatible models and features. + + ### Sources When you use [Search Grounding](#search-grounding), the model will include sources in the response. diff --git a/examples/ai-core/src/generate-text/google-code-execution.ts b/examples/ai-core/src/generate-text/google-code-execution.ts new file mode 100644 index 000000000000..1575c1468258 --- /dev/null +++ b/examples/ai-core/src/generate-text/google-code-execution.ts @@ -0,0 +1,53 @@ +import { google } from '@ai-sdk/google'; +import { generateText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = await generateText({ + model: google('gemini-2.5-flash-preview-04-17'), + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + maxOutputTokens: 2048, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + for (const part of result.content) { + switch (part.type) { + case 'file': { + if (part.type === 'file') { + process.stdout.write( + '\x1b[33m' + + part.type + + '\x1b[34m: ' + + part.file.mediaType + + '\x1b[0m', + ); + console.log(); + console.log(atob(part.file.base64)); + } + } + case 'text': { + if (part.type === 'text') { + process.stdout.write('\x1b[34m' + part.type + '\x1b[0m'); + console.log(); + console.log(part.text); + } + } + } + } + + process.stdout.write('\n\n'); + + console.log(); + console.log('Warnings:', await result.warnings); + + console.log(); + console.log('Token usage:', await result.usage); + console.log('Finish reason:', await result.finishReason); +} + +main().catch(console.error); diff --git a/examples/ai-core/src/generate-text/google-reasoning.ts b/examples/ai-core/src/generate-text/google-reasoning.ts index f813c67f875b..86538355615a 100644 --- a/examples/ai-core/src/generate-text/google-reasoning.ts +++ b/examples/ai-core/src/generate-text/google-reasoning.ts @@ -4,7 +4,15 @@ import 'dotenv/config'; async function main() { const result = await generateText({ - model: google('gemini-2.5-pro-exp-03-25'), + model: google('gemini-2.5-pro-preview-03-25'), + providerOptions: { + google: { + thinkingConfig: { + thinkingBudget: 1024, + }, + }, + }, + maxOutputTokens: 2048, prompt: 'How many "r"s are in the word "strawberry"?', }); diff --git a/examples/ai-core/src/generate-text/google-vertex-code-execution.ts b/examples/ai-core/src/generate-text/google-vertex-code-execution.ts new file mode 100644 index 000000000000..8759517a2319 --- /dev/null +++ b/examples/ai-core/src/generate-text/google-vertex-code-execution.ts @@ -0,0 +1,53 @@ +import { vertex } from '@ai-sdk/google-vertex'; +import { generateText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = await generateText({ + model: vertex('gemini-2.5-pro-preview-05-06'), + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + maxOutputTokens: 2048, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + for (const part of result.content) { + switch (part.type) { + case 'file': { + if (part.type === 'file') { + process.stdout.write( + '\x1b[33m' + + part.type + + '\x1b[34m: ' + + part.file.mediaType + + '\x1b[0m', + ); + console.log(); + console.log(atob(part.file.base64)); + } + } + case 'text': { + if (part.type === 'text') { + process.stdout.write('\x1b[34m' + part.type + '\x1b[0m'); + console.log(); + console.log(part.text); + } + } + } + } + + process.stdout.write('\n\n'); + + console.log(); + console.log('Warnings:', await result.warnings); + + console.log(); + console.log('Token usage:', await result.usage); + console.log('Finish reason:', await result.finishReason); +} + +main().catch(console.error); diff --git a/examples/ai-core/src/generate-text/google-vertex-reasoning.ts b/examples/ai-core/src/generate-text/google-vertex-reasoning.ts new file mode 100644 index 000000000000..0d0c025fb74c --- /dev/null +++ b/examples/ai-core/src/generate-text/google-vertex-reasoning.ts @@ -0,0 +1,27 @@ +import { vertex } from '@ai-sdk/google-vertex'; +import { generateText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = await generateText({ + model: vertex('gemini-2.5-flash-preview-04-17'), + providerOptions: { + google: { + thinkingConfig: { + thinkingBudget: 1024, + includeThoughts: true, + }, + }, + }, + maxOutputTokens: 2048, + prompt: 'How many "r"s are in the word "strawberry"?', + }); + + process.stdout.write('\x1b[34m' + result.reasoningText + '\x1b[0m'); + console.log(result.text); + console.log(); + console.log('Token usage:', result.usage); + console.log('Finish reason:', result.finishReason); +} + +main().catch(console.error); diff --git a/examples/ai-core/src/stream-text/google-code-execution.ts b/examples/ai-core/src/stream-text/google-code-execution.ts new file mode 100644 index 000000000000..ad7067542528 --- /dev/null +++ b/examples/ai-core/src/stream-text/google-code-execution.ts @@ -0,0 +1,54 @@ +import { google } from '@ai-sdk/google'; +import { streamText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = streamText({ + model: google('gemini-2.5-flash-preview-04-17'), + maxOutputTokens: 10000, + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + let fullResponse = ''; + + for await (const delta of result.fullStream) { + switch (delta.type) { + case 'file': { + if (delta.type === 'file') { + process.stdout.write( + '\x1b[33m' + + delta.type + + '\x1b[34m: ' + + delta.file.mediaType + + '\x1b[0m', + ); + console.log(); + console.log(atob(delta.file.base64 as string)); + } + } + case 'text': { + if (delta.type === 'text') { + process.stdout.write('\x1b[34m' + delta.type + '\x1b[0m'); + console.log(); + console.log(delta.text); + fullResponse += delta.text; + } + break; + } + } + } + console.log(); + console.log('Warnings:', await result.warnings); + + console.log(); + console.log('Token usage:', await result.usage); + console.log('Finish reason:', await result.finishReason); +} + +main().catch(console.log); diff --git a/examples/ai-core/src/stream-text/google-reasoning-code-execution-and-grounding.ts b/examples/ai-core/src/stream-text/google-reasoning-code-execution-and-grounding.ts new file mode 100644 index 000000000000..4cb18e68a4d0 --- /dev/null +++ b/examples/ai-core/src/stream-text/google-reasoning-code-execution-and-grounding.ts @@ -0,0 +1,141 @@ +import { google, GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google'; +import { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google'; +import { streamText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = streamText({ + model: google('gemini-2.5-flash-preview-04-17'), + maxOutputTokens: 10000, + providerOptions: { + google: { + // Only GoogleGenerativeAI Provider supports both grounding and code execution + useCodeExecution: true, + useSearchGrounding: true, + // Flash Preview supports thinking + thinkingConfig: { + thinkingBudget: 2048, + }, + } as GoogleGenerativeAIProviderOptions, + }, + temperature: 0, // Use temp 0 for this to make the model make better use of search grounding + onError(error) { + console.error(error); + }, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it. Also, provide the current XMR to USD rate.', + }); + + for await (const delta of result.fullStream) { + switch (delta.type) { + case 'file': { + if (delta.type === 'file') { + console.log( + '\x1b[33m' + + delta.type + + '\x1b[34m: ' + + delta.file.mediaType + + '\x1b[0m', + ); + console.log(atob(delta.file.base64 as string)); + console.log('\x1b[31m' + delta.type + '\x1b[0m'); + } + break; + } + case 'text': { + if (delta.type === 'text') { + console.log(delta.text); + } + break; + } + case 'source': { + if (delta.type === 'source' && delta.sourceType === 'url') { + console.log('ID:', delta.id); + console.log('Title:', delta.title); + console.log('URL:', delta.url); + console.log(); + } + break; + } + case 'reasoning': { + if (delta.type === 'reasoning') { + console.log('\x1b[34m' + delta.type); + console.log(delta.text); + console.log('\x1b[0m'); + } + break; + } + case 'tool-call': { + if (delta.type === 'tool-call') { + console.log( + 'TOOL CALL: ', + delta.toolName, + '(', + delta.toolCallId, + ')', + ); + console.log('Args: ', delta.args); + console.log('\x1b[0m'); + } + break; + } + case 'tool-result': { + if (delta.type === 'tool-result') { + console.log(); + console.log( + 'TOOL RESULT: ', + delta.toolName, + '(', + delta.toolCallId, + ')', + ); + console.log(delta.result); + console.log('\x1b[0m'); + } + break; + } + case 'error': { + if (delta.type === 'error' && delta.error != null) { + console.log(delta.error); + } + break; + } + } + } + console.log(); + + // Show sources + const providerMetadata = await result.providerMetadata; + if ( + providerMetadata != null && + typeof providerMetadata === 'object' && + 'google' in providerMetadata && + providerMetadata.google != null + ) { + const metadata = + providerMetadata.google as unknown as GoogleGenerativeAIProviderMetadata; + if (metadata != null) { + console.log('\x1b[35m' + 'sources' + '\x1b[0m'); + if (metadata?.groundingMetadata?.webSearchQueries) { + console.log('\x1b[36m' + 'Web Queries:' + '\x1b[0m'); + for (const query of metadata?.groundingMetadata?.webSearchQueries) { + console.log(query); + } + } + if (metadata.groundingMetadata?.searchEntryPoint != null) { + console.log('\x1b[36m' + 'Search Entry Point: ' + '\x1b[0m'); + console.log( + JSON.stringify(metadata.groundingMetadata?.searchEntryPoint, null, 2), + ); + } + } + } + console.log(); + console.log('Warnings:', await result.warnings); + + console.log(); + console.log('Token usage:', await result.usage); + console.log('Finish reason:', await result.finishReason); +} + +main().catch(console.error); diff --git a/examples/ai-core/src/stream-text/google-vertex-code-execution.ts b/examples/ai-core/src/stream-text/google-vertex-code-execution.ts new file mode 100644 index 000000000000..68b8b5222ab6 --- /dev/null +++ b/examples/ai-core/src/stream-text/google-vertex-code-execution.ts @@ -0,0 +1,54 @@ +import { vertex } from '@ai-sdk/google-vertex'; +import { streamText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = streamText({ + model: vertex('gemini-2.5-flash-preview-04-17'), + providerOptions: { + google: { + useCodeExecution: true, + }, + }, + maxOutputTokens: 10000, + prompt: + 'Calculate 20th fibonacci number. Then find the nearest palindrome to it.', + }); + + let fullResponse = ''; + + for await (const delta of result.fullStream) { + switch (delta.type) { + case 'file': { + if (delta.type === 'file') { + process.stdout.write( + '\x1b[33m' + + delta.type + + '\x1b[34m: ' + + delta.file.mediaType + + '\x1b[0m', + ); + console.log(); + console.log(atob(delta.file.base64 as string)); + } + } + case 'text': { + if (delta.type === 'text') { + process.stdout.write('\x1b[34m' + delta.type + '\x1b[0m'); + console.log(); + console.log(delta.text); + fullResponse += delta.text; + } + break; + } + } + } + console.log(); + console.log('Warnings:', await result.warnings); + + console.log(); + console.log('Token usage:', await result.usage); + console.log('Finish reason:', await result.finishReason); +} + +main().catch(console.log); diff --git a/examples/ai-core/src/stream-text/google-vertex-reasoning.ts b/examples/ai-core/src/stream-text/google-vertex-reasoning.ts new file mode 100644 index 000000000000..aa9e445c1681 --- /dev/null +++ b/examples/ai-core/src/stream-text/google-vertex-reasoning.ts @@ -0,0 +1,35 @@ +import { vertex } from '@ai-sdk/google-vertex'; +import { streamText } from 'ai'; +import 'dotenv/config'; + +async function main() { + const result = streamText({ + model: vertex('gemini-2.5-flash-preview-04-17'), + prompt: 'Tell me the history of the San Francisco Mission-style burrito.', + providerOptions: { + google: { + thinkingConfig: { + thinkingBudget: 1024, + includeThoughts: true, + }, + }, + }, + }); + + for await (const part of result.fullStream) { + if (part.type === 'reasoning') { + process.stdout.write('\x1b[34m' + part.text + '\x1b[0m'); + } else if (part.type === 'text') { + process.stdout.write(part.text); + } + } + + console.log(); + console.log('Warnings:', await result.warnings); + + console.log(); + console.log('Token usage:', await result.usage); + console.log('Finish reason:', await result.finishReason); +} + +main().catch(console.error); diff --git a/packages/google/src/google-generative-ai-language-model.ts b/packages/google/src/google-generative-ai-language-model.ts index 3a7bf1ea3810..8a0b13be5968 100644 --- a/packages/google/src/google-generative-ai-language-model.ts +++ b/packages/google/src/google-generative-ai-language-model.ts @@ -91,6 +91,19 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { schema: googleGenerativeAIProviderOptions, }); + if ( + googleOptions?.thinkingConfig?.includeThoughts === true && + !this.config.provider.startsWith('google.vertex.') + ) { + warnings.push({ + type: 'other', + message: + "The 'includeThoughts' option is only supported with the Google Vertex provider " + + 'and might not be supported or could behave unexpectedly with the current Google provider ' + + `(${this.config.provider}).`, + }); + } + const { contents, systemInstruction } = convertToGoogleGenerativeAIMessages(prompt); @@ -104,8 +117,12 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { useSearchGrounding: googleOptions?.useSearchGrounding ?? false, dynamicRetrievalConfig: googleOptions?.dynamicRetrievalConfig, modelId: this.modelId, + useCodeExecution: googleOptions?.useCodeExecution ?? false, + provider: this.provider, }); + warnings.push(...toolWarnings); + return { args: { generationConfig: { @@ -189,8 +206,46 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { : (candidate.content.parts ?? []); for (const part of parts) { - if ('text' in part && part.text.length > 0) { + // Text parts + if ('text' in part && !part.thought && part.text.length > 0) { content.push({ type: 'text', text: part.text }); + // Reasoning parts + } else if ( + 'text' in part && + (part as any).thought === true && + !('executableCode' in part) && + !('codeExecutionResult' in part) && + part.text != null && + part.text.length > 0 + ) { + content.push({ type: 'reasoning', text: part.text }); + // code exectuion: Executable code + } else if ( + 'executableCode' in part && + part.executableCode != null && + part.executableCode.code.length > 0 + ) { + /** + * NOTE: The vertex api just returns a string, but the ai-sdk expects either a base64 string or uint8Arry. + * So we just convert it to base64 + */ + content.push({ + type: 'file', + mediaType: 'text/x-python', + data: Buffer.from(part.executableCode.code, 'utf-8').toString( + 'base64', + ), + }); + // code execution: Execution result + } else if ( + 'codeExecutionResult' in part && + part.codeExecutionResult != null + ) { + content.push({ + type: 'text' as const, + text: `Execution Result (Outcome: ${part.codeExecutionResult.outcome}):\n${part.codeExecutionResult.output}`, + }); + // function calls } else if ('functionCall' in part) { content.push({ type: 'tool-call' as const, @@ -199,6 +254,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { toolName: part.functionCall.name, args: JSON.stringify(part.functionCall.args), }); + // inline data } else if ('inlineData' in part) { content.push({ type: 'file' as const, @@ -325,9 +381,33 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { // Process tool call's parts before determining finishReason to ensure hasToolCalls is properly set if (content != null) { - const deltaText = getTextFromParts(content.parts); - if (deltaText != null) { - controller.enqueue(deltaText); + const reasoningContent = getReasoningParts(content.parts); + if ( + reasoningContent?.type === 'reasoning' && + reasoningContent.text.length > 0 + ) { + controller.enqueue({ + type: 'reasoning', + text: reasoningContent.text, + }); + } + + // Process code execution parts prior to text parts + const executableCodeFilePart = + getExecutableCodeFilePartFromStreamParts(content.parts); + if (executableCodeFilePart != null) { + controller.enqueue(executableCodeFilePart); + } + + const codeExecutionResultTextParts = + getCodeExecutionResultStreamParts(content.parts); + for (const textPart of codeExecutionResultTextParts) { + controller.enqueue(textPart); + } + + const textContent = getTextFromParts(content.parts); // getTextFromParts is now refactored + if (textContent?.text && textContent.text.length > 0) { + controller.enqueue({ type: 'text', text: textContent.text }); } const inlineDataParts = getInlineDataParts(content.parts); @@ -437,9 +517,13 @@ function getToolCallsFromParts({ } function getTextFromParts(parts: z.infer['parts']) { - const textParts = parts?.filter(part => 'text' in part) as Array< - GoogleGenerativeAIContentPart & { text: string } - >; + // Only include plain text parts (not thoughts, not executable code, not code execution results) + const textParts = parts?.filter( + part => + 'text' in part && + part.text != null && + !(part as { thought?: boolean }).thought, + ) as Array; return textParts == null || textParts.length === 0 ? undefined @@ -449,6 +533,49 @@ function getTextFromParts(parts: z.infer['parts']) { }; } +function getExecutableCodeFilePartFromStreamParts( + parts: z.infer['parts'], +): LanguageModelV2StreamPart | undefined { + const part = parts?.find( + p => 'executableCode' in p && p.executableCode != null, + ); + if ( + part && + 'executableCode' in part && + part.executableCode && + part.executableCode.code.length > 0 + ) { + return { + type: 'file', + mediaType: 'text/x-python', + data: Buffer.from(part.executableCode.code, 'utf-8').toString('base64'), + }; + } + return undefined; +} + +function getCodeExecutionResultStreamParts( + parts: z.infer['parts'], +): LanguageModelV2StreamPart[] { + const resultParts: LanguageModelV2StreamPart[] = []; + parts?.forEach(part => { + if ( + 'codeExecutionResult' in part && + part.codeExecutionResult != null && + part.codeExecutionResult.output != null + ) { + // Ensure output might be empty but outcome is present + const outputText = part.codeExecutionResult.output; + // Only create a part if there's an outcome, even if output is empty. + if (part.codeExecutionResult != null) { + const formattedText = `Execution Result (Outcome: ${part.codeExecutionResult.outcome}):\n${outputText}`; + resultParts.push({ type: 'text', text: formattedText }); + } + } + }); + return resultParts; +} + function getInlineDataParts(parts: z.infer['parts']) { return parts?.filter( ( @@ -459,6 +586,31 @@ function getInlineDataParts(parts: z.infer['parts']) { ); } +function getReasoningParts( + parts: z.infer['parts'], +): LanguageModelV2Content | undefined { + const reasoningContentParts: string[] = []; + parts?.forEach(part => { + if ( + 'text' in part && + (part as { thought?: boolean }).thought === true && + part.text != null && + part.text.length > 0 + ) { + reasoningContentParts.push(part.text); + } + }); + if (reasoningContentParts.length === 0) { + return undefined; + } + + // Join reasoning segments + return { + type: 'reasoning', + text: reasoningContentParts.join(''), + }; +} + function extractSources({ groundingMetadata, generateId, @@ -483,6 +635,24 @@ function extractSources({ })); } +const executableCodePartSchema = z.object({ + executableCode: z + .object({ + language: z.string(), + code: z.string(), + }) + .nullish(), +}); + +const codeExecutionResultPartSchema = z.object({ + codeExecutionResult: z + .object({ + outcome: z.string(), + output: z.string(), + }) + .nullish(), +}); + const contentSchema = z.object({ role: z.string(), parts: z @@ -490,6 +660,7 @@ const contentSchema = z.object({ z.union([ z.object({ text: z.string(), + thought: z.boolean().nullish(), }), z.object({ functionCall: z.object({ @@ -503,6 +674,8 @@ const contentSchema = z.object({ data: z.string(), }), }), + executableCodePartSchema, + codeExecutionResultPartSchema, ]), ) .nullish(), diff --git a/packages/google/src/google-generative-ai-options.ts b/packages/google/src/google-generative-ai-options.ts index 3715b4fe4231..4a8cd8841834 100644 --- a/packages/google/src/google-generative-ai-options.ts +++ b/packages/google/src/google-generative-ai-options.ts @@ -51,6 +51,15 @@ export const googleGenerativeAIProviderOptions = z.object({ thinkingConfig: z .object({ thinkingBudget: z.number().optional(), + /** + * Optional. Set to true to include thinking process information in the response. + * This is primarily for use with Google Vertex AI, as behavior with other + * Google Generative AI endpoints might vary or not be fully supported. + * + * @see https://ai.google.dev/gemini-api/docs/thinking (for general concept) + * @see https://cloud.google.com/vertex-ai/generative-ai/docs/thinking (Vertex specific) + */ + includeThoughts: z.boolean().optional(), }) .optional(), @@ -130,6 +139,17 @@ Optional. Specifies the dynamic retrieval configuration. @see https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/ground-with-google-search#dynamic-retrieval */ dynamicRetrievalConfig: dynamicRetrievalConfig.optional(), + /** +Optional. When enabled, the model will make use of a code execution tool that +enables the model to generate and run Python code. + +@note Ensure the selected model supports Code Execution. +Multi-tool usage with the code execution tool is typically compatible with Flash experimental models. + +@see https://ai.google.dev/gemini-api/docs/code-execution (Google AI) +@see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/code-execution-api (Vertex AI) + */ + useCodeExecution: z.boolean().optional(), }); export type GoogleGenerativeAIProviderOptions = z.infer< diff --git a/packages/google/src/google-prepare-tools.ts b/packages/google/src/google-prepare-tools.ts index d7434cb17ac4..c907f79cc9f0 100644 --- a/packages/google/src/google-prepare-tools.ts +++ b/packages/google/src/google-prepare-tools.ts @@ -15,12 +15,16 @@ export function prepareTools({ useSearchGrounding, dynamicRetrievalConfig, modelId, + useCodeExecution, + provider, }: { tools: LanguageModelV2CallOptions['tools']; toolChoice?: LanguageModelV2CallOptions['toolChoice']; useSearchGrounding: boolean; dynamicRetrievalConfig: DynamicRetrievalConfig | undefined; modelId: GoogleGenerativeAIModelId; + useCodeExecution: boolean; + provider: string; }): { tools: | undefined @@ -36,7 +40,12 @@ export function prepareTools({ | Record | { dynamicRetrievalConfig: DynamicRetrievalConfig }; } - | { googleSearch: Record }; + | { + googleSearch: Record; + codeExecution: Record; + } + | { googleSearch: Record } + | { codeExecution: Record }; toolConfig: | undefined | { @@ -56,6 +65,52 @@ export function prepareTools({ const supportsDynamicRetrieval = modelId.includes('gemini-1.5-flash') && !modelId.includes('-8b'); + if ((useSearchGrounding || useCodeExecution) && tools) { + throw new UnsupportedFunctionalityError({ + functionality: + 'Provider-defined tools (useSearchGrounding or useCodeExecution) ' + + 'cannot be used in combination with user-defined tools. ' + + 'Please disable either the provider tools or your custom tools.', + }); + } + + // Ensure mutual exclusivity of provider-defined tools + if (useSearchGrounding && useCodeExecution) { + if (provider !== 'google.generative-ai') { + throw new UnsupportedFunctionalityError({ + functionality: + 'useSearchGrounding and useCodeExecution only be enabled simultaneously with the Google Generative AI provider.', + }); + } + if (!isGemini2) { + throw new UnsupportedFunctionalityError({ + functionality: + 'useSearchGrounding cannot be used with useCodeExecution in Gemini <2 models.', + }); + } + return { + tools: { codeExecution: {}, googleSearch: {} }, + toolConfig: undefined, + toolWarnings, + }; + } + + if (useCodeExecution) { + // Add model compatibility check for code execution if necessary + // For example, if only specific models support it: + if (!isGemini2) { + // Replace with actual model check for code execution + throw new UnsupportedFunctionalityError({ + functionality: `Code Execution is not supported for model ${modelId}. It requires a Gemini 2 or compatible model.`, + }); + } + return { + tools: { codeExecution: {} }, + toolConfig: undefined, + toolWarnings, + }; + } + if (useSearchGrounding) { return { tools: isGemini2