Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/processing_pipeline/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
class GeminiModel(StrEnum):
GEMINI_1_5_PRO = "gemini-1.5-pro-002"
GEMINI_1_5_FLASH = "gemini-1.5-flash"

GEMINI_2_5_FLASH = "gemini-2.5-flash"
GEMINI_2_5_PRO = "gemini-2.5-pro"
GEMINI_2_5_FLASH_PREVIEW_09_2025 = "gemini-2.5-flash-preview-09-2025"

GEMINI_FLASH_LATEST = "gemini-flash-latest"
GEMINI_FLASH_LITE_LATEST = "gemini-flash-lite-latest"

Expand Down
4 changes: 2 additions & 2 deletions src/processing_pipeline/stage_1/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def regenerate_timestamped_transcript(stage_1_llm_response_ids):
print("No flagged snippets found during the initial detection phase.")
else:
# Timestamped transcription
transcriptor = GeminiModel.GEMINI_FLASH_LATEST
transcriptor = GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025
timestamped_transcription = transcribe_audio_file_with_timestamp_with_gemini(
gemini_client=gemini_client,
audio_file=local_file,
Expand All @@ -247,7 +247,7 @@ def regenerate_timestamped_transcript(stage_1_llm_response_ids):
timestamped_transcription=timestamped_transcription["timestamped_transcription"],
metadata=metadata,
prompt_version=detection_prompt_version,
model_name=GeminiModel.GEMINI_FLASH_LATEST,
model_name=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To improve maintainability and avoid repeating the model name, consider reusing the transcriptor variable defined on line 233. This ensures that if the model needs to be changed in the future, it only needs to be updated in one place within this block.

Suggested change
model_name=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
model_name=transcriptor,

)
print(f"Detection result:\n{json.dumps(detection_result, indent=2)}\n")
update_stage_1_llm_response_detection_result(supabase_client, id, detection_result)
Expand Down
12 changes: 6 additions & 6 deletions src/processing_pipeline/stage_1/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def initial_transcription_with_gemini(
response = Stage1PreprocessTranscriptionExecutor.run(
gemini_client=gemini_client,
audio_file=audio_file,
model_name=GeminiModel.GEMINI_FLASH_LATEST,
model_name=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
prompt_version=prompt_version,
)
return response["transcription"]
Expand All @@ -155,7 +155,7 @@ def initial_disinformation_detection_with_gemini(

response = Stage1PreprocessDetectionExecutor.run(
gemini_client=gemini_client,
model_name=GeminiModel.GEMINI_FLASH_LATEST,
model_name=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
transcription=initial_transcription,
metadata=metadata,
prompt_version=prompt_version,
Expand All @@ -168,7 +168,7 @@ def transcribe_audio_file_with_timestamp_with_gemini(
gemini_client: genai.Client | None,
audio_file: str,
prompt_version: dict,
model_name=GeminiModel.GEMINI_FLASH_LATEST,
model_name=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
):
print(f"Transcribing the audio file {audio_file} using {model_name}")
if not gemini_client:
Expand All @@ -191,7 +191,7 @@ def disinformation_detection_with_gemini(
timestamped_transcription: str,
metadata: dict,
prompt_version: dict,
model_name=GeminiModel.GEMINI_FLASH_LATEST,
model_name=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
):
print(f"Processing the timestamped transcription with {model_name}")
if not gemini_client:
Expand Down Expand Up @@ -293,7 +293,7 @@ def process_audio_file(
)
else:
# Timestamped transcription
transcriptor = GeminiModel.GEMINI_FLASH_LATEST
transcriptor = GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025
timestamped_transcription = transcribe_audio_file_with_timestamp_with_gemini(
gemini_client=gemini_client,
audio_file=local_file,
Expand All @@ -307,7 +307,7 @@ def process_audio_file(
timestamped_transcription=timestamped_transcription["timestamped_transcription"],
metadata=metadata,
prompt_version=detection_prompt_version,
model_name=GeminiModel.GEMINI_FLASH_LATEST,
model_name=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To improve maintainability and avoid repeating the model name, consider reusing the transcriptor variable defined on line 296. This ensures that if the model needs to be changed in the future, it only needs to be updated in one place within this block.

Suggested change
model_name=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
model_name=transcriptor,

)
print(f"Main detection result:\n{json.dumps(detection_result, indent=2, ensure_ascii=False)}\n")

Expand Down
2 changes: 1 addition & 1 deletion src/processing_pipeline/stage_3/executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def __structure_with_schema(
user_prompt = f"Please structure the following analysis text into the required JSON format:\n\n{analysis_text}"

response = client.models.generate_content(
model=GeminiModel.GEMINI_FLASH_LATEST,
model=GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025,
contents=[user_prompt],
config=GenerateContentConfig(
response_mime_type="application/json",
Expand Down
2 changes: 1 addition & 1 deletion src/processing_pipeline/stage_3/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def __get_metadata(snippet):
@optional_task(log_prints=True)
def analyze_snippet(gemini_key, audio_file, metadata, prompt_version: dict):
main_model = GeminiModel.GEMINI_2_5_PRO
fallback_model = GeminiModel.GEMINI_FLASH_LATEST
fallback_model = GeminiModel.GEMINI_2_5_FLASH_PREVIEW_09_2025

try:
print(f"Attempting analysis with {main_model}")
Expand Down