From 26ffeacdc245aed41e6ba9989354ece8ccf75eaf Mon Sep 17 00:00:00 2001
From: Yufeng He <40085740+he-yufeng@users.noreply.github.com>
Date: Mon, 30 Mar 2026 21:35:42 +0800
Subject: [PATCH] fix: filter Gemini thinking parts from user-facing message
 chain

Gemini 3 models return thinking parts (part.thought=True) alongside the
actual response text.  _process_content_parts was including these thinking
parts in the message chain sent to the user, effectively leaking internal
reasoning into the output.  On platforms that split long messages (e.g.
aiocqhttp with realtime segmenting), this caused duplicate or triple
replies since the thinking text often mirrors the actual response.

The streaming path already handled this correctly via chunk.text which
skips thinking parts, but the non-streaming path and the final-chunk
processing in streaming both went through _process_content_parts.

Also switch the Gemini 3 model name matching from an exhaustive list to
prefix matching (gemini-3- / gemini-3.) so new variants like gemini-3.1
get proper thinkingLevel config without code changes.

Fixes #7183
---
 .../core/provider/sources/gemini_source.py    | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py
index 388ab5c91f..489ca358b8 100644
--- a/astrbot/core/provider/sources/gemini_source.py
+++ b/astrbot/core/provider/sources/gemini_source.py
@@ -241,15 +241,10 @@ async def _prepare_query_config(
                 thinking_config = types.ThinkingConfig(
                     thinking_budget=thinking_budget,
                 )
-        elif model_name in [
-            "gemini-3-pro",
-            "gemini-3-pro-preview",
-            "gemini-3-flash",
-            "gemini-3-flash-preview",
-            "gemini-3-flash-lite",
-            "gemini-3-flash-lite-preview",
-        ]:
-            # The thinkingLevel parameter, recommended for Gemini 3 models and onwards
+        elif any(model_name.startswith(p) for p in ("gemini-3-", "gemini-3.")):
+            # The thinkingLevel parameter, recommended for Gemini 3 models and onwards.
+            # Use prefix match so new variants (3.1, 3-flash-lite-preview, etc.) are
+            # covered without needing to keep an exhaustive list up to date.
             # Gemini 2.5 series models don't support thinkingLevel; use thinkingBudget instead.
             thinking_level = self.provider_config.get("gm_thinking_config", {}).get(
                 "level", "HIGH"
@@ -517,7 +512,11 @@ def _process_content_parts(
         ):
             chain.append(Comp.Plain("这是图片"))
         for part in result_parts:
-            if part.text:
+            # Skip thinking parts — their text is already captured via
+            # _extract_reasoning_content above.  Including them here would
+            # leak the model's internal reasoning into the user-facing message,
+            # which also causes duplicate/triple replies on some platforms.
+            if part.text and not part.thought:
                 chain.append(Comp.Plain(part.text))
 
             if (