From 722afaa25a311b1f7a2ce9dfc3f3dc200f64d090 Mon Sep 17 00:00:00 2001
From: Quan Cao <quan.cao@eastagile.com>
Date: Tue, 2 Dec 2025 18:42:43 +0700
Subject: [PATCH] feat: save Gemini thought summaries during stage 3 analysis

Captures and stores AI model reasoning thoughts alongside analysis
results to improve transparency and debugging capabilities.
---
 src/processing_pipeline/stage_3.py        | 23 ++++++++++++++++++++---
 src/processing_pipeline/supabase_utils.py |  2 ++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/processing_pipeline/stage_3.py b/src/processing_pipeline/stage_3.py
index 88276ba..b7c8a84 100644
--- a/src/processing_pipeline/stage_3.py
+++ b/src/processing_pipeline/stage_3.py
@@ -80,6 +80,7 @@ def update_snippet_in_supabase(
     snippet_id,
     gemini_response,
     grounding_metadata,
+    thought_summaries,
     analyzed_by,
     status,
     error_message,
@@ -99,6 +100,7 @@ def update_snippet_in_supabase(
         context=gemini_response["context"],
         political_leaning=gemini_response["political_leaning"],
         grounding_metadata=grounding_metadata,
+        thought_summaries=thought_summaries,
         analyzed_by=analyzed_by,
         status=status,
         error_message=error_message,
@@ -215,6 +217,7 @@ def process_snippet(supabase_client, snippet, local_file, gemini_key, skip_revie
             snippet_id=snippet["id"],
             gemini_response=analyzing_response["response"],
             grounding_metadata=analyzing_response["grounding_metadata"],
+            thought_summaries=analyzing_response["thought_summaries"],
             analyzed_by=analyzing_response["analyzed_by"],
             status=status,
             error_message=None,
@@ -359,12 +362,15 @@ def run(
 
         try:
             # Step 1: Analyze with Google Search
-            analysis_text, grounding_metadata = cls.__analyze_with_search(
+            analysis_result = cls.__analyze_with_search(
                 client,
                 model_name,
                 user_prompt,
                 uploaded_audio_file,
             )
+            analysis_text = analysis_result["text"]
+            grounding_metadata = analysis_result["grounding_metadata"]
+            thought_summaries = analysis_result["thought_summaries"]
 
             # Try to validate with Pydantic model first
             validated_output = cls.__validate_with_pydantic(analysis_text)
@@ -373,12 +379,14 @@ def run(
                 return {
                     "response": validated_output,
                     "grounding_metadata": grounding_metadata,
+                    "thought_summaries": thought_summaries,
                 }
 
             # Step 2: Structure with response_schema (if validation failed)
             return {
                 "response": cls.__structure_with_schema(client, analysis_text),
                 "grounding_metadata": grounding_metadata,
+                "thought_summaries": thought_summaries,
             }
         finally:
             client.files.delete(name=uploaded_audio_file.name)
@@ -407,11 +415,16 @@ def __analyze_with_search(
                 system_instruction=cls.SYSTEM_INSTRUCTION,
                 max_output_tokens=16384,
                 tools=[Tool(google_search=GoogleSearch())],
-                thinking_config=ThinkingConfig(thinking_budget=4096),
+                thinking_config=ThinkingConfig(thinking_budget=4096, include_thoughts=True),
                 safety_settings=get_safety_settings(),
             ),
         )
 
+        thoughts = ""
+        for part in response.candidates[0].content.parts:
+            if part.thought and part.text:
+                thoughts += part.text
+
         grounding_metadata = (
             response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
         )
@@ -425,7 +438,11 @@ def __analyze_with_search(
             print(f"Response finish reason: {finish_reason}")
             raise ValueError("No response from Gemini in step 1.")
 
-        return response.text, grounding_metadata
+        return {
+            "text": response.text,
+            "grounding_metadata": grounding_metadata,
+            "thought_summaries": thoughts,
+        }
 
     @classmethod
     def __validate_with_pydantic(cls, response_text: str):
diff --git a/src/processing_pipeline/supabase_utils.py b/src/processing_pipeline/supabase_utils.py
index 199e183..634c983 100644
--- a/src/processing_pipeline/supabase_utils.py
+++ b/src/processing_pipeline/supabase_utils.py
@@ -198,6 +198,7 @@ def update_snippet(
         context,
         political_leaning,
         grounding_metadata,
+        thought_summaries,
         analyzed_by,
         status,
         error_message
@@ -219,6 +220,7 @@ def update_snippet(
                     "context": context,
                     "political_leaning": political_leaning,
                     "grounding_metadata": grounding_metadata,
+                    "thought_summaries": thought_summaries,
                     "analyzed_by": analyzed_by,
                     "previous_analysis": None,
                     "status": status,