PublicDataWorks · quancao-ea · Dec 2, 2025 · Dec 2, 2025 · gemini-code-assist · Dec 2, 2025
diff --git a/src/processing_pipeline/stage_3.py b/src/processing_pipeline/stage_3.py
@@ -80,6 +80,7 @@ def update_snippet_in_supabase(
     snippet_id,
     gemini_response,
     grounding_metadata,
+    thought_summaries,
     analyzed_by,
     status,
     error_message,
@@ -99,6 +100,7 @@ def update_snippet_in_supabase(
         context=gemini_response["context"],
         political_leaning=gemini_response["political_leaning"],
         grounding_metadata=grounding_metadata,
+        thought_summaries=thought_summaries,
         analyzed_by=analyzed_by,
         status=status,
         error_message=error_message,
@@ -215,6 +217,7 @@ def process_snippet(supabase_client, snippet, local_file, gemini_key, skip_revie
             snippet_id=snippet["id"],
             gemini_response=analyzing_response["response"],
             grounding_metadata=analyzing_response["grounding_metadata"],
+            thought_summaries=analyzing_response["thought_summaries"],
             analyzed_by=analyzing_response["analyzed_by"],
             status=status,
             error_message=None,
@@ -359,12 +362,15 @@ def run(
 
         try:
             # Step 1: Analyze with Google Search
-            analysis_text, grounding_metadata = cls.__analyze_with_search(
+            analysis_result = cls.__analyze_with_search(
                 client,
                 model_name,
                 user_prompt,
                 uploaded_audio_file,
             )
+            analysis_text = analysis_result["text"]
+            grounding_metadata = analysis_result["grounding_metadata"]
+            thought_summaries = analysis_result["thought_summaries"]
-            analysis_text = analysis_result["text"]
-            grounding_metadata = analysis_result["grounding_metadata"]
-            thought_summaries = analysis_result["thought_summaries"]
+            analysis_text = analysis_result.get("text")
+            grounding_metadata = analysis_result.get("grounding_metadata")
+            thought_summaries = analysis_result.get("thought_summaries")
-            analysis_text = analysis_result["text"]
-            grounding_metadata = analysis_result["grounding_metadata"]
-            thought_summaries = analysis_result["thought_summaries"]
+            analysis_text = analysis_result.get("text")
+            grounding_metadata = analysis_result.get("grounding_metadata")
+            thought_summaries = analysis_result.get("thought_summaries")
 
             # Try to validate with Pydantic model first
             validated_output = cls.__validate_with_pydantic(analysis_text)
@@ -373,12 +379,14 @@ def run(
                 return {
                     "response": validated_output,
                     "grounding_metadata": grounding_metadata,
+                    "thought_summaries": thought_summaries,
                 }
 
             # Step 2: Structure with response_schema (if validation failed)
             return {
                 "response": cls.__structure_with_schema(client, analysis_text),
                 "grounding_metadata": grounding_metadata,
+                "thought_summaries": thought_summaries,
             }
         finally:
             client.files.delete(name=uploaded_audio_file.name)
@@ -407,11 +415,16 @@ def __analyze_with_search(
                 system_instruction=cls.SYSTEM_INSTRUCTION,
                 max_output_tokens=16384,
                 tools=[Tool(google_search=GoogleSearch())],
-                thinking_config=ThinkingConfig(thinking_budget=4096),
+                thinking_config=ThinkingConfig(thinking_budget=4096, include_thoughts=True),
                 safety_settings=get_safety_settings(),
             ),
         )
 
+        thoughts = ""
+        for part in response.candidates[0].content.parts:
+            if part.thought and part.text:
+                thoughts += part.text
-        thoughts = ""
-        for part in response.candidates[0].content.parts:
-            if part.thought and part.text:
-                thoughts += part.text
+        thought_summaries = ""
+        if response.candidates:
+            for part in response.candidates[0].content.parts:
+                if part.thought and part.text:
+                    thought_summaries += part.text
-        thoughts = ""
-        for part in response.candidates[0].content.parts:
-            if part.thought and part.text:
-                thoughts += part.text
+        thought_summaries = ""
+        if response.candidates:
+            for part in response.candidates[0].content.parts:
+                if part.thought and part.text:
+                    thought_summaries += part.text
+
         grounding_metadata = (
             response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
         )
-        thoughts = ""
-        for part in response.candidates[0].content.parts:
-            if part.thought and part.text:
-                thoughts += part.text
-
-        grounding_metadata = (
-            response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
-        )
+        thoughts = ""
+        if response.candidates and response.candidates[0].content:
+            for part in response.candidates[0].content.parts:
+                if part.thought and part.text:
+                    thoughts += part.text
+
+        grounding_metadata = (
+            response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
+        )
-        thoughts = ""
-        for part in response.candidates[0].content.parts:
-            if part.thought and part.text:
-                thoughts += part.text
-
-        grounding_metadata = (
-            response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
-        )
+        thoughts = ""
+        if response.candidates and response.candidates[0].content:
+            for part in response.candidates[0].content.parts:
+                if part.thought and part.text:
+                    thoughts += part.text
+
+        grounding_metadata = (
+            response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
+        )
@@ -425,7 +438,11 @@ def __analyze_with_search(
             print(f"Response finish reason: {finish_reason}")
             raise ValueError("No response from Gemini in step 1.")
 
-        return response.text, grounding_metadata
+        return {
+            "text": response.text,
+            "grounding_metadata": grounding_metadata,
+            "thought_summaries": thoughts,
-            "thought_summaries": thoughts,
+            "thought_summaries": thought_summaries,
-            "thought_summaries": thoughts,
+            "thought_summaries": thought_summaries,
+        }
 
     @classmethod
     def __validate_with_pydantic(cls, response_text: str):

diff --git a/src/processing_pipeline/supabase_utils.py b/src/processing_pipeline/supabase_utils.py
@@ -198,6 +198,7 @@ def update_snippet(
         context,
         political_leaning,
         grounding_metadata,
+        thought_summaries,
         analyzed_by,
         status,
         error_message
@@ -219,6 +220,7 @@ def update_snippet(
                     "context": context,
                     "political_leaning": political_leaning,
                     "grounding_metadata": grounding_metadata,
+                    "thought_summaries": thought_summaries,
                     "analyzed_by": analyzed_by,
                     "previous_analysis": None,
                     "status": status,