Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions src/processing_pipeline/stage_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def update_snippet_in_supabase(
snippet_id,
gemini_response,
grounding_metadata,
thought_summaries,
analyzed_by,
status,
error_message,
Expand All @@ -99,6 +100,7 @@ def update_snippet_in_supabase(
context=gemini_response["context"],
political_leaning=gemini_response["political_leaning"],
grounding_metadata=grounding_metadata,
thought_summaries=thought_summaries,
analyzed_by=analyzed_by,
status=status,
error_message=error_message,
Expand Down Expand Up @@ -215,6 +217,7 @@ def process_snippet(supabase_client, snippet, local_file, gemini_key, skip_revie
snippet_id=snippet["id"],
gemini_response=analyzing_response["response"],
grounding_metadata=analyzing_response["grounding_metadata"],
thought_summaries=analyzing_response["thought_summaries"],
analyzed_by=analyzing_response["analyzed_by"],
status=status,
error_message=None,
Expand Down Expand Up @@ -359,12 +362,15 @@ def run(

try:
# Step 1: Analyze with Google Search
analysis_text, grounding_metadata = cls.__analyze_with_search(
analysis_result = cls.__analyze_with_search(
client,
model_name,
user_prompt,
uploaded_audio_file,
)
analysis_text = analysis_result["text"]
grounding_metadata = analysis_result["grounding_metadata"]
thought_summaries = analysis_result["thought_summaries"]
Comment on lines +371 to +373
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

While the current implementation of __analyze_with_search ensures these keys exist, it's safer to use the .get() method for dictionary access. This makes the code more robust against future changes where a key might be missing, preventing potential KeyError exceptions.

Suggested change
analysis_text = analysis_result["text"]
grounding_metadata = analysis_result["grounding_metadata"]
thought_summaries = analysis_result["thought_summaries"]
analysis_text = analysis_result.get("text")
grounding_metadata = analysis_result.get("grounding_metadata")
thought_summaries = analysis_result.get("thought_summaries")


# Try to validate with Pydantic model first
validated_output = cls.__validate_with_pydantic(analysis_text)
Expand All @@ -373,12 +379,14 @@ def run(
return {
"response": validated_output,
"grounding_metadata": grounding_metadata,
"thought_summaries": thought_summaries,
}

# Step 2: Structure with response_schema (if validation failed)
return {
"response": cls.__structure_with_schema(client, analysis_text),
"grounding_metadata": grounding_metadata,
"thought_summaries": thought_summaries,
}
finally:
client.files.delete(name=uploaded_audio_file.name)
Expand Down Expand Up @@ -407,11 +415,16 @@ def __analyze_with_search(
system_instruction=cls.SYSTEM_INSTRUCTION,
max_output_tokens=16384,
tools=[Tool(google_search=GoogleSearch())],
thinking_config=ThinkingConfig(thinking_budget=4096),
thinking_config=ThinkingConfig(thinking_budget=4096, include_thoughts=True),
safety_settings=get_safety_settings(),
),
)

thoughts = ""
for part in response.candidates[0].content.parts:
if part.thought and part.text:
thoughts += part.text
Comment on lines +423 to +426
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This block has a critical bug. The code accesses response.candidates[0] without checking if response.candidates is empty. If the API returns a response with no candidates, this will raise an IndexError. Please add a guard to ensure response.candidates is not empty before accessing it.

Additionally, for consistency, I'm renaming the thoughts variable to thought_summaries to match the key used in the return dictionary.

Suggested change
thoughts = ""
for part in response.candidates[0].content.parts:
if part.thought and part.text:
thoughts += part.text
thought_summaries = ""
if response.candidates:
for part in response.candidates[0].content.parts:
if part.thought and part.text:
thought_summaries += part.text


grounding_metadata = (
response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
)
Comment on lines +423 to 430
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add defensive check before accessing response.candidates[0].

The thought extraction accesses response.candidates[0].content.parts directly without checking if candidates exist. This could raise an IndexError if the response has no candidates, which would occur before reaching the error handling at line 432. The grounding_metadata extraction (line 429) already has a defensive if response.candidates check.

         thoughts = ""
-        for part in response.candidates[0].content.parts:
-            if part.thought and part.text:
-                thoughts += part.text
+        if response.candidates and response.candidates[0].content:
+            for part in response.candidates[0].content.parts:
+                if part.thought and part.text:
+                    thoughts += part.text

         grounding_metadata = (
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
thoughts = ""
for part in response.candidates[0].content.parts:
if part.thought and part.text:
thoughts += part.text
grounding_metadata = (
response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
)
thoughts = ""
if response.candidates and response.candidates[0].content:
for part in response.candidates[0].content.parts:
if part.thought and part.text:
thoughts += part.text
grounding_metadata = (
response.candidates[0].grounding_metadata.model_dump_json(indent=2) if response.candidates else None
)
🤖 Prompt for AI Agents
In src/processing_pipeline/stage_3.py around lines 423 to 430, the code accesses
response.candidates[0] without verifying that response.candidates is non-empty;
modify the block so you first check if response.candidates (and the first
candidate's content and parts) exist before iterating—if not, leave thoughts as
an empty string (or None) and proceed; keep the existing grounding_metadata
conditional as-is and ensure no IndexError can occur by guarding all direct
accesses to response.candidates[0].

Expand All @@ -425,7 +438,11 @@ def __analyze_with_search(
print(f"Response finish reason: {finish_reason}")
raise ValueError("No response from Gemini in step 1.")

return response.text, grounding_metadata
return {
"text": response.text,
"grounding_metadata": grounding_metadata,
"thought_summaries": thoughts,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To complete the variable rename for consistency as suggested in the previous comment, this should now use thought_summaries.

Suggested change
"thought_summaries": thoughts,
"thought_summaries": thought_summaries,

}

@classmethod
def __validate_with_pydantic(cls, response_text: str):
Expand Down
2 changes: 2 additions & 0 deletions src/processing_pipeline/supabase_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def update_snippet(
context,
political_leaning,
grounding_metadata,
thought_summaries,
analyzed_by,
status,
error_message
Expand All @@ -219,6 +220,7 @@ def update_snippet(
"context": context,
"political_leaning": political_leaning,
"grounding_metadata": grounding_metadata,
"thought_summaries": thought_summaries,
"analyzed_by": analyzed_by,
"previous_analysis": None,
"status": status,
Expand Down