diff --git a/echo/server/dembrane/config.py b/echo/server/dembrane/config.py index 99f3db5a..112fda7f 100644 --- a/echo/server/dembrane/config.py +++ b/echo/server/dembrane/config.py @@ -142,6 +142,20 @@ logger.debug(f"DISABLE_CORS: {DISABLE_CORS}") +LITELLM_WHISPER_URL = os.environ.get("LITELLM_WHISPER_URL") +LITELLM_WHISPER_API_KEY = os.environ.get("LITELLM_WHISPER_API_KEY", OPENAI_API_KEY) +LITELLM_WHISPER_API_VERSION = os.environ.get("LITELLM_WHISPER_API_VERSION", "2024-06-01") +LITELLM_WHISPER_MODEL = os.environ.get("LITELLM_WHISPER_MODEL", "whisper-1") +assert LITELLM_WHISPER_API_KEY, "LITELLM_WHISPER_API_KEY environment variable is not set" +logger.debug("LITELLM_WHISPER_API_KEY: set") +assert LITELLM_WHISPER_API_VERSION, "LITELLM_WHISPER_API_VERSION environment variable is not set" +logger.debug("LITELLM_WHISPER_API_VERSION: set") +assert LITELLM_WHISPER_MODEL, "LITELLM_WHISPER_MODEL environment variable is not set" +logger.debug("LITELLM_WHISPER_MODEL: set") +if LITELLM_WHISPER_MODEL != "whisper-1": + assert LITELLM_WHISPER_URL, "LITELLM_WHISPER_URL environment variable is not set" + logger.debug("LITELLM_WHISPER_URL: set") + # *****************LIGHTRAG CONFIGURATIONS***************** # ---------------Secrets--------------- diff --git a/echo/server/dembrane/transcribe.py b/echo/server/dembrane/transcribe.py index 714d0ad8..2bdd2072 100644 --- a/echo/server/dembrane/transcribe.py +++ b/echo/server/dembrane/transcribe.py @@ -1,8 +1,20 @@ +import io +import os import logging +import mimetypes from typing import Optional -from dembrane.s3 import get_stream_from_s3, get_sanitized_s3_key -from dembrane.openai import client +from litellm import transcription + +from dembrane.s3 import get_stream_from_s3 +from dembrane.config import ( + LITELLM_WHISPER_URL, + LITELLM_WHISPER_MODEL, + LITELLM_WHISPER_API_KEY, + LITELLM_WHISPER_API_VERSION, +) + +# from dembrane.openai import client from dembrane.directus import directus logger = logging.getLogger("transcribe") @@ -12,121 +24,38 @@ class TranscriptionError(Exception): pass -def transcribe_audio( - audio_file_path: str, language: Optional[str], whisper_prompt: Optional[str] -) -> str: - return transcribe_audio_openai(audio_file_path, language, whisper_prompt) - - -# def transcribe_audio_aiconl( -# audio_file_path: str, -# language: Optional[str], # noqa -# whisper_prompt: Optional[str], # noqa -# ) -> str: -# import requests - -# API_BASE_URL = "https://whisper.ai-hackathon.haven.vng.cloud" -# API_KEY = "JOUW_VEILIGE_API_SLEUTEL" - -# try: -# with open(audio_file_path, "rb") as f: -# headers = {"accept": "application/json", "access_token": API_KEY} -# files = {"file": f} - -# response = requests.post(f"{API_BASE_URL}/transcribe", headers=headers, files=files) -# response.raise_for_status() - -# result = response.json() -# transcription = result.get("text", "") - -# if not transcription: -# logger.info("Transcription is empty!") - -# return transcription -# except FileNotFoundError as exc: -# logger.error(f"File not found: {audio_file_path}") -# raise FileNotFoundError from exc -# except requests.RequestException as exc: -# logger.error(f"Failed to transcribe audio: {exc}") -# raise TranscriptionError(f"Failed to transcribe audio: {exc}") from exc -# except Exception as exc: -# logger.error(f"Unexpected error: {exc}") -# raise TranscriptionError(f"Unexpected error: {exc}") from exc - - -def transcribe_audio_openai( +def transcribe_audio_litellm( audio_file_uri: str, language: Optional[str], whisper_prompt: Optional[str] ) -> str: - logger = logging.getLogger("transcribe.transcribe_audio_openai") + """Transcribe audio using Azure ML Whisper""" + logger = logging.getLogger("transcribe.transcribe_audio_litellm") try: audio_stream = get_stream_from_s3(audio_file_uri) + audio_bytes = audio_stream.read() + filename = os.path.basename(audio_file_uri) + mime_type, _ = mimetypes.guess_type(filename) + file_upload = (filename, io.BytesIO(audio_bytes), mime_type) except Exception as exc: logger.error(f"Failed to get audio stream from S3 for {audio_file_uri}: {exc}") raise TranscriptionError(f"Failed to get audio stream from S3: {exc}") from exc - - with audio_stream as f: - logger.info(f"Transcribing audio from {audio_file_uri}") - - options = { - "model": "whisper-1", - "file": (get_sanitized_s3_key(audio_file_uri), f.read()), - "response_format": "text", - "language": language if language not in [None, "multi", ""] else None, - "prompt": whisper_prompt if whisper_prompt else None, - } - - try: - transcription = client.audio.transcriptions.create(**options) # type: ignore - except Exception as exc: - logger.error(f"Failed to transcribe audio: {exc}") - raise TranscriptionError(f"Failed to transcribe audio: {exc}") from exc - - if transcription is None or transcription == "": - logger.info("Transcription is empty!") - - return str(transcription) - - -# def transcribe_audio_azure_whisper( -# audio_file_path: str, language: Optional[str], whisper_prompt: Optional[str] -# ) -> str: -# base_url = "https://whisper-asr-service.westeurope.azurecontainer.io/v1" -# endpoint = f"{base_url}/asr" - -# try: -# with open(audio_file_path, "rb") as audio_file: -# files = {"audio_file": audio_file} -# params = { -# "output": "json", -# "task": "transcribe", -# "language": language if language not in [None, "multi", ""] else None, -# "word_timestamps": "false", -# "encode": "true", -# } - -# response = requests.post(endpoint, files=files, data=params) -# response.raise_for_status() - -# result = response.json() -# transcription = result.get("text", "") - -# if not transcription: -# logger.info("Transcription is empty!") - -# return transcription - -# except FileNotFoundError as exc: -# logger.error(f"File not found: {audio_file_path}") -# raise FileNotFoundError from exc -# except requests.RequestException as exc: -# logger.error(f"Failed to transcribe audio: {exc}") -# raise TranscriptionError(f"Failed to transcribe audio: {exc}") from exc -# except Exception as exc: -# logger.error(f"Unexpected error: {exc}") -# raise TranscriptionError(f"Unexpected error: {exc}") from exc - + + try: + response = transcription( + model=LITELLM_WHISPER_MODEL, + file=file_upload, + api_key=LITELLM_WHISPER_API_KEY, + api_base=LITELLM_WHISPER_URL, + api_version=LITELLM_WHISPER_API_VERSION, + language=language, + prompt=whisper_prompt + ) + return response["text"] + except Exception as e: + logger.error(f"LiteLLM transcription failed: {e}") + raise TranscriptionError(f"LiteLLM transcription failed: {e}") from e + DEFAULT_WHISPER_PROMPTS = { "en": "Hi, lets get started. First we'll have a round of introductions and then we can get into the topic for today.", @@ -240,3 +169,47 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str: logger.info(f"Processed chunk for transcription: {conversation_chunk_id}") return conversation_chunk_id + + +def transcribe_audio( + audio_file_path: str, language: Optional[str], whisper_prompt: Optional[str] +) -> str: + return transcribe_audio_litellm(audio_file_path, language, whisper_prompt) + + + +# def transcribe_audio_aiconl( +# audio_file_path: str, +# language: Optional[str], # noqa +# whisper_prompt: Optional[str], # noqa +# ) -> str: +# import requests + +# API_BASE_URL = "https://whisper.ai-hackathon.haven.vng.cloud" +# API_KEY = "JOUW_VEILIGE_API_SLEUTEL" + +# try: +# with open(audio_file_path, "rb") as f: +# headers = {"accept": "application/json", "access_token": API_KEY} +# files = {"file": f} + +# response = requests.post(f"{API_BASE_URL}/transcribe", headers=headers, files=files) +# response.raise_for_status() + +# result = response.json() +# transcription = result.get("text", "") + +# if not transcription: +# logger.info("Transcription is empty!") + +# return transcription + +# except FileNotFoundError as exc: +# logger.error(f"File not found: {audio_file_path}") +# raise FileNotFoundError from exc +# except requests.RequestException as exc: +# logger.error(f"Failed to transcribe audio: {exc}") +# raise TranscriptionError(f"Failed to transcribe audio: {exc}") from exc +# except Exception as exc: +# logger.error(f"Unexpected error: {exc}") +# raise TranscriptionError(f"Unexpected error: {exc}") from exc \ No newline at end of file