From 9d30c5d1207717c3f843ddce1ce392f932eb2beb Mon Sep 17 00:00:00 2001 From: Roy Date: Mon, 2 Jun 2025 10:08:05 +0000 Subject: [PATCH 1/2] Enhance transcription configuration and error handling - Added ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM environment variable to control English transcription behavior. - Updated transcribe_conversation_chunk function to conditionally use RunPod transcription based on non en language and configuration. - Improved error handling for RunPod job status checks, including logging for unexpected responses and exceptions. - Return whisper prompt to the old prompts --- echo/server/dembrane/config.py | 4 ++ echo/server/dembrane/transcribe.py | 37 +++++++++++++++---- .../default_whisper_prompt.de.jinja | 2 +- .../default_whisper_prompt.en.jinja | 2 +- .../default_whisper_prompt.es.jinja | 2 +- .../default_whisper_prompt.fr.jinja | 2 +- .../default_whisper_prompt.nl.jinja | 2 +- 7 files changed, 39 insertions(+), 12 deletions(-) diff --git a/echo/server/dembrane/config.py b/echo/server/dembrane/config.py index 48b997b0..b4a22206 100644 --- a/echo/server/dembrane/config.py +++ b/echo/server/dembrane/config.py @@ -162,6 +162,10 @@ DISABLE_CORS = os.environ.get("DISABLE_CORS", "false").lower() in ["true", "1"] logger.debug(f"DISABLE_CORS: {DISABLE_CORS}") +ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM = os.environ.get("ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM", "false").lower() in ["true", "1"] +assert ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM, "ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM environment variable is not set" +logger.debug(f"ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM: {ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM}") + ENABLE_RUNPOD_WHISPER_TRANSCRIPTION = os.environ.get( "ENABLE_RUNPOD_WHISPER_TRANSCRIPTION", "false" ).lower() in ["true", "1"] diff --git a/echo/server/dembrane/transcribe.py b/echo/server/dembrane/transcribe.py index 6088a748..85943bee 100644 --- a/echo/server/dembrane/transcribe.py +++ b/echo/server/dembrane/transcribe.py @@ -19,6 +19,7 @@ ENABLE_RUNPOD_WHISPER_TRANSCRIPTION, ENABLE_LITELLM_WHISPER_TRANSCRIPTION, RUNPOD_WHISPER_MAX_REQUEST_THRESHOLD, + ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM, ) # from dembrane.openai import client @@ -201,7 +202,8 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str | None: logger.debug(f"whisper_prompt: {whisper_prompt}") - if ENABLE_RUNPOD_WHISPER_TRANSCRIPTION: + if ENABLE_RUNPOD_WHISPER_TRANSCRIPTION and not (language == "en" and ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM): + logger.debug("Using RunPod for transcription") try: directus_response = directus.get_items( "conversation_chunk", @@ -224,12 +226,32 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str | None: } if runpod_job_status_link: - response = requests.get(runpod_job_status_link, headers=headers) - job_status = response.json()['status'] - logger.debug(f"job_status: {job_status}") - if job_status == "IN_PROGRESS": - logger.info(f"RunPod job {runpod_job_status_link} is in progress") - return None + try: + response = requests.get(runpod_job_status_link, headers=headers) + response.raise_for_status() # Raise an exception for bad status codes + + response_data = response.json() + logger.debug(f"RunPod status response: {response_data}") + + job_status = response_data.get('status') + if job_status is None: + logger.warning(f"No 'status' field in RunPod response for {runpod_job_status_link}: {response_data}") + # If no status field, assume job is not in progress and continue + else: + logger.debug(f"job_status: {job_status}") + if job_status == "IN_PROGRESS": + logger.info(f"RunPod job {runpod_job_status_link} is in progress") + return None + + except requests.RequestException as e: + logger.error(f"Failed to get RunPod job status from {runpod_job_status_link}: {e}") + # Continue with processing if status check fails + except ValueError as e: + logger.error(f"Invalid JSON response from RunPod status endpoint {runpod_job_status_link}: {e}") + # Continue with processing if JSON parsing fails + except Exception as e: + logger.error(f"Unexpected error checking RunPod job status {runpod_job_status_link}: {e}") + # Continue with processing if any other error occurs if runpod_request_count < RUNPOD_WHISPER_MAX_REQUEST_THRESHOLD: if source == "PORTAL_AUDIO": @@ -260,6 +282,7 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str | None: return None elif ENABLE_LITELLM_WHISPER_TRANSCRIPTION: + logger.debug("Using LITELLM for transcription") transcript = transcribe_audio_litellm( chunk["path"], language=language, whisper_prompt=whisper_prompt ) diff --git a/echo/server/prompt_templates/default_whisper_prompt.de.jinja b/echo/server/prompt_templates/default_whisper_prompt.de.jinja index 1cb52926..dc01ee87 100644 --- a/echo/server/prompt_templates/default_whisper_prompt.de.jinja +++ b/echo/server/prompt_templates/default_whisper_prompt.de.jinja @@ -1 +1 @@ -Hier is een transcriptie van een technische presentatie. De spreker gebruikt natuurlijk Nederlands met enkele Engelse technical terms zoals gebruikelijk. De presentator zegt: "In een vrij gemakkelijk network is het mogelijk om de requirements voor een change te evalueren. We gebruiken modern tooling voor ons system management. Het is quite possible dat een enkele wijziging impact heeft, maar we blijven altijd in het Nederlands praten over onze processes en workflows. De change management procedure helpt ons om disruptions te voorkomen en stability te behouden in ons environment. \ No newline at end of file +Hallo, lasst uns beginnen. Zuerst ein paar Einführungen und dann können wir mit dem Thema des Tages beginnen. \ No newline at end of file diff --git a/echo/server/prompt_templates/default_whisper_prompt.en.jinja b/echo/server/prompt_templates/default_whisper_prompt.en.jinja index 12b94957..a078f04c 100644 --- a/echo/server/prompt_templates/default_whisper_prompt.en.jinja +++ b/echo/server/prompt_templates/default_whisper_prompt.en.jinja @@ -1 +1 @@ -Here is a transcript from a business presentation about technology and change management. The speaker maintains English throughout while occasionally using technical terms. The presenter explains: "In our network environment, it's essential to carefully plan any changes. Our system administrators use modern tools for network management. The change management process helps us prevent disruptions and maintain system stability. We've implemented quality assurance procedures and risk assessment protocols. Even when discussing complex technical concepts, we continue speaking in English. \ No newline at end of file +Hi, lets get started. First we'll have a round of introductions and then we can get into the topic for today. \ No newline at end of file diff --git a/echo/server/prompt_templates/default_whisper_prompt.es.jinja b/echo/server/prompt_templates/default_whisper_prompt.es.jinja index fe09a0a5..d372cbdc 100644 --- a/echo/server/prompt_templates/default_whisper_prompt.es.jinja +++ b/echo/server/prompt_templates/default_whisper_prompt.es.jinja @@ -1 +1 @@ -Aquí hay una transcripción de una presentación técnica. El orador usa naturalmente español con algunos términos técnicos en inglés como es habitual. El presentador explica: "En nuestro network environment, es esencial planificar cuidadosamente cualquier change. Nuestros administradores usan tools modernos para el management. El process de gestión nos ayuda a prevenir disruptions pero siempre seguimos hablando en español sobre nuestros procedures y workflows. Es quite importante mantener la stability de nuestro sistema. \ No newline at end of file +Hola, comencemos. Primero, un round de introducción y luego podremos empezar con el tema de hoy. \ No newline at end of file diff --git a/echo/server/prompt_templates/default_whisper_prompt.fr.jinja b/echo/server/prompt_templates/default_whisper_prompt.fr.jinja index c8f7ab03..78283cb4 100644 --- a/echo/server/prompt_templates/default_whisper_prompt.fr.jinja +++ b/echo/server/prompt_templates/default_whisper_prompt.fr.jinja @@ -1 +1 @@ -Voici une transcription d'une présentation technique. L'orateur utilise naturellement le français avec quelques termes anglais techniques comme c'est habituel. Le présentateur explique : "Dans notre network environment, il est essentiel de planifier soigneusement tout changement. Nos administrateurs utilisent des tools modernes pour le management. Le process de gestion nous aide à prévenir les disruptions mais nous continuons toujours à parler en français de nos procédures et workflows. C'est quite important de maintenir la stability de notre système. \ No newline at end of file +Bonjour, commençons. D'abord un tour de table et ensuite nous pourrons aborder le sujet du jour. \ No newline at end of file diff --git a/echo/server/prompt_templates/default_whisper_prompt.nl.jinja b/echo/server/prompt_templates/default_whisper_prompt.nl.jinja index 1cb52926..5aa43822 100644 --- a/echo/server/prompt_templates/default_whisper_prompt.nl.jinja +++ b/echo/server/prompt_templates/default_whisper_prompt.nl.jinja @@ -1 +1 @@ -Hier is een transcriptie van een technische presentatie. De spreker gebruikt natuurlijk Nederlands met enkele Engelse technical terms zoals gebruikelijk. De presentator zegt: "In een vrij gemakkelijk network is het mogelijk om de requirements voor een change te evalueren. We gebruiken modern tooling voor ons system management. Het is quite possible dat een enkele wijziging impact heeft, maar we blijven altijd in het Nederlands praten over onze processes en workflows. De change management procedure helpt ons om disruptions te voorkomen en stability te behouden in ons environment. \ No newline at end of file +Hallo, laten we beginnen. Eerst even een introductieronde en dan kunnen we aan de slag met de thema van vandaag. \ No newline at end of file From 9af3ec91107f10ea04ad98994e5064333a7e2eda Mon Sep 17 00:00:00 2001 From: Roy Date: Mon, 2 Jun 2025 10:25:39 +0000 Subject: [PATCH 2/2] Refactor transcription configuration and improve logging - Made ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM optional, defaulting to false, and updated related logging. - Enhanced formatting in the queue_transcribe_audio_runpod function for better readability. --- echo/server/dembrane/config.py | 8 ++-- echo/server/dembrane/transcribe.py | 59 +++++++++++++++++++----------- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/echo/server/dembrane/config.py b/echo/server/dembrane/config.py index b4a22206..6e279054 100644 --- a/echo/server/dembrane/config.py +++ b/echo/server/dembrane/config.py @@ -162,9 +162,11 @@ DISABLE_CORS = os.environ.get("DISABLE_CORS", "false").lower() in ["true", "1"] logger.debug(f"DISABLE_CORS: {DISABLE_CORS}") -ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM = os.environ.get("ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM", "false").lower() in ["true", "1"] -assert ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM, "ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM environment variable is not set" -logger.debug(f"ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM: {ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM}") +ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM = os.environ.get( + "ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM", "false" +).lower() in ["true", "1"] +# ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM is optional and defaults to false +logger.debug("ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM: %s", ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM) ENABLE_RUNPOD_WHISPER_TRANSCRIPTION = os.environ.get( "ENABLE_RUNPOD_WHISPER_TRANSCRIPTION", "false" diff --git a/echo/server/dembrane/transcribe.py b/echo/server/dembrane/transcribe.py index 85943bee..ee8e42a1 100644 --- a/echo/server/dembrane/transcribe.py +++ b/echo/server/dembrane/transcribe.py @@ -34,7 +34,10 @@ class TranscriptionError(Exception): def queue_transcribe_audio_runpod( - audio_file_uri: str, language: Optional[str], whisper_prompt: Optional[str], is_priority: bool = False + audio_file_uri: str, + language: Optional[str], + whisper_prompt: Optional[str], + is_priority: bool = False, ) -> str: """Transcribe audio using RunPod""" logger = logging.getLogger("transcribe.transcribe_audio_runpod") @@ -66,9 +69,7 @@ def queue_transcribe_audio_runpod( return job_id except Exception as e: logger.error(f"Failed to queue transcription job for RunPod: {e}") - raise TranscriptionError( - f"Failed to queue transcription job for RunPod: {e}" - ) from e + raise TranscriptionError(f"Failed to queue transcription job for RunPod: {e}") from e except Exception as e: logger.error(f"Failed to get signed url for {audio_file_uri}: {e}") raise TranscriptionError(f"Failed to get signed url for {audio_file_uri}: {e}") from e @@ -191,8 +192,7 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str | None: if conversation["project_id"]["default_conversation_transcript_prompt"]: prompt_parts.append( - ' ' + conversation["project_id"]["default_conversation_transcript_prompt"] - + "." + " " + conversation["project_id"]["default_conversation_transcript_prompt"] + "." ) # if previous_chunk_transcript: @@ -202,23 +202,29 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str | None: logger.debug(f"whisper_prompt: {whisper_prompt}") - if ENABLE_RUNPOD_WHISPER_TRANSCRIPTION and not (language == "en" and ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM): + if ENABLE_RUNPOD_WHISPER_TRANSCRIPTION and not ( + language == "en" and ENABLE_ENGLISH_TRANSCRIPTION_WITH_LITELLM + ): logger.debug("Using RunPod for transcription") try: directus_response = directus.get_items( "conversation_chunk", { - "query": {"filter": {"id": {"_eq": conversation_chunk_id}}, - "fields": ["source","runpod_job_status_link","runpod_request_count"]}, + "query": { + "filter": {"id": {"_eq": conversation_chunk_id}}, + "fields": ["source", "runpod_job_status_link", "runpod_request_count"], + }, }, ) except Exception as e: logger.error(f"Failed to get conversation chunk for {conversation_chunk_id}: {e}") - raise ValueError(f"Failed to get conversation chunk for {conversation_chunk_id}: {e}") from e + raise ValueError( + f"Failed to get conversation chunk for {conversation_chunk_id}: {e}" + ) from e - runpod_request_count = (directus_response[0]["runpod_request_count"]) - source = (directus_response[0]["source"]) - runpod_job_status_link = (directus_response[0]["runpod_job_status_link"]) + runpod_request_count = directus_response[0]["runpod_request_count"] + source = directus_response[0]["source"] + runpod_job_status_link = directus_response[0]["runpod_job_status_link"] headers = { "Content-Type": "application/json", @@ -227,30 +233,36 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str | None: if runpod_job_status_link: try: - response = requests.get(runpod_job_status_link, headers=headers) + response = requests.get(runpod_job_status_link, headers=headers, timeout=30) response.raise_for_status() # Raise an exception for bad status codes - + response_data = response.json() logger.debug(f"RunPod status response: {response_data}") - - job_status = response_data.get('status') + + job_status = response_data.get("status") if job_status is None: - logger.warning(f"No 'status' field in RunPod response for {runpod_job_status_link}: {response_data}") + logger.warning( + f"No 'status' field in RunPod response for {runpod_job_status_link}: {response_data}" + ) # If no status field, assume job is not in progress and continue else: logger.debug(f"job_status: {job_status}") if job_status == "IN_PROGRESS": logger.info(f"RunPod job {runpod_job_status_link} is in progress") return None - + except requests.RequestException as e: logger.error(f"Failed to get RunPod job status from {runpod_job_status_link}: {e}") # Continue with processing if status check fails except ValueError as e: - logger.error(f"Invalid JSON response from RunPod status endpoint {runpod_job_status_link}: {e}") + logger.error( + f"Invalid JSON response from RunPod status endpoint {runpod_job_status_link}: {e}" + ) # Continue with processing if JSON parsing fails except Exception as e: - logger.error(f"Unexpected error checking RunPod job status {runpod_job_status_link}: {e}") + logger.error( + f"Unexpected error checking RunPod job status {runpod_job_status_link}: {e}" + ) # Continue with processing if any other error occurs if runpod_request_count < RUNPOD_WHISPER_MAX_REQUEST_THRESHOLD: @@ -259,7 +271,10 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str | None: else: is_priority = False job_id = queue_transcribe_audio_runpod( - chunk["path"], language=language, whisper_prompt=whisper_prompt, is_priority=is_priority + chunk["path"], + language=language, + whisper_prompt=whisper_prompt, + is_priority=is_priority, ) # Update job_id on directus directus.update_item(