-
Notifications
You must be signed in to change notification settings - Fork 3.1k
(gemini live 3.1): fix tool responses #5413
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,7 +58,6 @@ | |
| { | ||
| "gemini-3.1-flash-live-preview", | ||
| "gemini-2.5-flash-native-audio-preview-12-2025", | ||
| "gemini-2.5-flash-native-audio-preview-09-2025", | ||
| } | ||
| ) | ||
|
|
||
|
|
@@ -275,7 +274,19 @@ def __init__( | |
| ): | ||
| server_turn_detection = False | ||
| modalities = modalities if is_given(modalities) else [types.Modality.AUDIO] | ||
| use_vertexai = ( | ||
| vertexai | ||
| if is_given(vertexai) | ||
| else os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "0").lower() in ["true", "1"] | ||
| ) | ||
| if not is_given(model): | ||
| model = ( | ||
| "gemini-live-2.5-flash-native-audio" | ||
| if use_vertexai | ||
| else "gemini-2.5-flash-native-audio-preview-12-2025" | ||
| ) | ||
|
|
||
| mutable = "3.1" not in model | ||
| super().__init__( | ||
| capabilities=llm.RealtimeCapabilities( | ||
| message_truncation=False, | ||
|
|
@@ -284,30 +295,20 @@ def __init__( | |
| auto_tool_reply_generation=True, | ||
| audio_output=types.Modality.AUDIO in modalities, | ||
| manual_function_calls=False, | ||
| mutable_instructions=True, | ||
| mutable_chat_context=mutable, | ||
| mutable_instructions=mutable, | ||
| mutable_tools=False, | ||
| per_response_tool_choice=False, | ||
| ) | ||
| ) | ||
|
|
||
| if not is_given(model): | ||
| if vertexai: | ||
| model = "gemini-live-2.5-flash-native-audio" | ||
| else: | ||
| model = "gemini-2.5-flash-native-audio-preview-12-2025" | ||
|
|
||
| gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY") | ||
| gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT") | ||
| gcp_location: str | None = ( | ||
| location | ||
| if is_given(location) | ||
| else os.environ.get("GOOGLE_CLOUD_LOCATION") or "us-central1" | ||
| ) | ||
| use_vertexai = ( | ||
| vertexai | ||
| if is_given(vertexai) | ||
| else os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "0").lower() in ["true", "1"] | ||
| ) | ||
|
|
||
| if use_vertexai: | ||
| if not gcp_project: | ||
|
|
@@ -335,6 +336,12 @@ def __init__( | |
| # Validate model/API compatibility for known models | ||
| _validate_model_api_match(model, use_vertexai) | ||
|
|
||
| if "3.1" in model: | ||
| logger.warning( | ||
| f"'{model}' has limited mid-session update support. instructions, chat " | ||
| "context, and tool updates will not be applied until the next session." | ||
| ) | ||
|
|
||
| self._opts = _RealtimeOptions( | ||
| model=model, | ||
| api_key=gemini_api_key, | ||
|
|
@@ -549,12 +556,6 @@ def update_options( | |
| self._mark_restart_needed() | ||
|
|
||
| async def update_instructions(self, instructions: str) -> None: | ||
| if self._opts.model == "gemini-3.1-flash-live-preview": | ||
| logger.warning( | ||
| "update_instructions is not compatible with 'gemini-3.1-flash-live-preview' and will be ignored." | ||
| ) | ||
| self._opts.instructions = instructions | ||
| return | ||
| if not is_given(self._opts.instructions) or self._opts.instructions != instructions: | ||
| self._opts.instructions = instructions | ||
|
|
||
|
|
@@ -564,6 +565,9 @@ async def update_instructions(self, instructions: str) -> None: | |
| self._mark_restart_needed() | ||
| return | ||
|
|
||
| if not self._realtime_model.capabilities.mutable_instructions: | ||
| return | ||
|
|
||
| # Active session exists — send mid-session system instruction update (no reconnect needed) | ||
| logger.debug("Updating instructions mid-session") | ||
| self._send_client_event( | ||
|
|
@@ -581,17 +585,6 @@ async def update_instructions(self, instructions: str) -> None: | |
| ) | ||
|
|
||
| async def update_chat_ctx(self, chat_ctx: llm.ChatContext) -> None: | ||
| if self._opts.model == "gemini-3.1-flash-live-preview": | ||
| logger.warning( | ||
| "update_chat_ctx is not compatible with 'gemini-3.1-flash-live-preview' and will be ignored." | ||
| ) | ||
| self._chat_ctx = chat_ctx.copy( | ||
| exclude_handoff=True, | ||
| exclude_instructions=True, | ||
| exclude_empty_message=True, | ||
| exclude_config_update=True, | ||
| ) | ||
| return | ||
| # Check for system/developer messages that will be dropped | ||
| system_msg_count = sum( | ||
| 1 for msg in chat_ctx.messages() if msg.role in ("system", "developer") | ||
|
|
@@ -627,18 +620,20 @@ async def update_chat_ctx(self, chat_ctx: llm.ChatContext) -> None: | |
| append_ctx.items.append(item) | ||
|
|
||
| if append_ctx.items: | ||
| turns_dict, _ = append_ctx.copy(exclude_function_call=True).to_provider_format( | ||
| format="google", inject_dummy_user_message=False | ||
| ) | ||
| # we are not generating, and do not need to inject | ||
| turns = [types.Content.model_validate(turn) for turn in turns_dict] | ||
| tool_results = get_tool_results_for_realtime( | ||
| append_ctx, | ||
| vertexai=self._opts.vertexai, | ||
| tool_response_scheduling=self._opts.tool_response_scheduling, | ||
| ) | ||
| if turns: | ||
| self._send_client_event(types.LiveClientContent(turns=turns, turn_complete=False)) | ||
| if self._realtime_model.capabilities.mutable_chat_context: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we log a warning if it's not mutable?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i added a warning here in the init, could be kinda noisy in
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. got it, that makes sense |
||
| turns_dict, _ = append_ctx.copy(exclude_function_call=True).to_provider_format( | ||
| format="google", inject_dummy_user_message=False | ||
| ) | ||
| turns = [types.Content.model_validate(turn) for turn in turns_dict] | ||
| if turns: | ||
| self._send_client_event( | ||
| types.LiveClientContent(turns=turns, turn_complete=False) | ||
| ) | ||
| if tool_results: | ||
| self._send_client_event(tool_results) | ||
|
|
||
|
|
@@ -709,15 +704,13 @@ def generate_reply( | |
| ) -> asyncio.Future[llm.GenerationCreatedEvent]: | ||
| if is_given(tools): | ||
| logger.warning("per-response tools is not supported by Google Realtime API, ignoring") | ||
| if self._opts.model == "gemini-3.1-flash-live-preview": | ||
| if not self._realtime_model.capabilities.mutable_chat_context: | ||
| logger.warning( | ||
| "generate_reply is not compatible with 'gemini-3.1-flash-live-preview' and will be ignored." | ||
| f"generate_reply is not compatible with '{self._opts.model}' and will be ignored." | ||
| ) | ||
| fut = asyncio.Future[llm.GenerationCreatedEvent]() | ||
| fut.set_exception( | ||
| llm.RealtimeError( | ||
| "generate_reply is not compatible with 'gemini-3.1-flash-live-preview'" | ||
| ) | ||
| llm.RealtimeError(f"generate_reply is not compatible with '{self._opts.model}'") | ||
| ) | ||
| return fut | ||
| if self._pending_generation_fut and not self._pending_generation_fut.done(): | ||
|
|
@@ -857,12 +850,13 @@ async def _main_task(self) -> None: | |
| exclude_empty_message=True, | ||
| exclude_config_update=True, | ||
| ).to_provider_format(format="google", inject_dummy_user_message=False) | ||
| if turns_dict: | ||
| turns = [types.Content.model_validate(turn) for turn in turns_dict] | ||
| turns = [types.Content.model_validate(turn) for turn in turns_dict] | ||
| if turns: | ||
| await session.send_client_content( | ||
| turns=turns, # type: ignore | ||
| turn_complete=False, | ||
| ) | ||
|
|
||
| # queue up existing chat context | ||
| send_task = asyncio.create_task( | ||
| self._send_task(session), name="gemini-realtime-send" | ||
|
|
@@ -1064,6 +1058,9 @@ def _build_connect_config(self) -> types.LiveConnectConfig: | |
| tools_config = create_tools_config(self._tools, tool_behavior=self._opts.tool_behavior) | ||
| conf = types.LiveConnectConfig( | ||
| response_modalities=self._opts.response_modalities, | ||
| history_config=types.HistoryConfig(initial_history_in_client_content=True) | ||
| if not self._realtime_model.capabilities.mutable_chat_context | ||
| else None, | ||
| generation_config=types.GenerationConfig( | ||
| candidate_count=self._opts.candidate_count, | ||
| temperature=temp, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.