Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions echo/server/dembrane/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,20 @@
logger.debug(f"DISABLE_CORS: {DISABLE_CORS}")


LITELLM_WHISPER_URL = os.environ.get("LITELLM_WHISPER_URL")
LITELLM_WHISPER_API_KEY = os.environ.get("LITELLM_WHISPER_API_KEY", OPENAI_API_KEY)
LITELLM_WHISPER_API_VERSION = os.environ.get("LITELLM_WHISPER_API_VERSION", "2024-06-01")
LITELLM_WHISPER_MODEL = os.environ.get("LITELLM_WHISPER_MODEL", "whisper-1")
assert LITELLM_WHISPER_API_KEY, "LITELLM_WHISPER_API_KEY environment variable is not set"
logger.debug("LITELLM_WHISPER_API_KEY: set")
assert LITELLM_WHISPER_API_VERSION, "LITELLM_WHISPER_API_VERSION environment variable is not set"
logger.debug("LITELLM_WHISPER_API_VERSION: set")
assert LITELLM_WHISPER_MODEL, "LITELLM_WHISPER_MODEL environment variable is not set"
logger.debug("LITELLM_WHISPER_MODEL: set")
if LITELLM_WHISPER_MODEL != "whisper-1":
assert LITELLM_WHISPER_URL, "LITELLM_WHISPER_URL environment variable is not set"
logger.debug("LITELLM_WHISPER_URL: set")

# *****************LIGHTRAG CONFIGURATIONS*****************

# ---------------Secrets---------------
Expand Down
189 changes: 81 additions & 108 deletions echo/server/dembrane/transcribe.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
import io
import os
import logging
import mimetypes
from typing import Optional

from dembrane.s3 import get_stream_from_s3, get_sanitized_s3_key
from dembrane.openai import client
from litellm import transcription

from dembrane.s3 import get_stream_from_s3
from dembrane.config import (
LITELLM_WHISPER_URL,
LITELLM_WHISPER_MODEL,
LITELLM_WHISPER_API_KEY,
LITELLM_WHISPER_API_VERSION,
)

# from dembrane.openai import client
from dembrane.directus import directus

logger = logging.getLogger("transcribe")
Expand All @@ -12,121 +24,38 @@ class TranscriptionError(Exception):
pass


def transcribe_audio(
audio_file_path: str, language: Optional[str], whisper_prompt: Optional[str]
) -> str:
return transcribe_audio_openai(audio_file_path, language, whisper_prompt)


# def transcribe_audio_aiconl(
# audio_file_path: str,
# language: Optional[str], # noqa
# whisper_prompt: Optional[str], # noqa
# ) -> str:
# import requests

# API_BASE_URL = "https://whisper.ai-hackathon.haven.vng.cloud"
# API_KEY = "JOUW_VEILIGE_API_SLEUTEL"

# try:
# with open(audio_file_path, "rb") as f:
# headers = {"accept": "application/json", "access_token": API_KEY}
# files = {"file": f}

# response = requests.post(f"{API_BASE_URL}/transcribe", headers=headers, files=files)
# response.raise_for_status()

# result = response.json()
# transcription = result.get("text", "")

# if not transcription:
# logger.info("Transcription is empty!")

# return transcription

# except FileNotFoundError as exc:
# logger.error(f"File not found: {audio_file_path}")
# raise FileNotFoundError from exc
# except requests.RequestException as exc:
# logger.error(f"Failed to transcribe audio: {exc}")
# raise TranscriptionError(f"Failed to transcribe audio: {exc}") from exc
# except Exception as exc:
# logger.error(f"Unexpected error: {exc}")
# raise TranscriptionError(f"Unexpected error: {exc}") from exc


def transcribe_audio_openai(
def transcribe_audio_litellm(
audio_file_uri: str, language: Optional[str], whisper_prompt: Optional[str]
) -> str:
logger = logging.getLogger("transcribe.transcribe_audio_openai")
"""Transcribe audio using Azure ML Whisper"""
logger = logging.getLogger("transcribe.transcribe_audio_litellm")

try:
audio_stream = get_stream_from_s3(audio_file_uri)
audio_bytes = audio_stream.read()
filename = os.path.basename(audio_file_uri)
mime_type, _ = mimetypes.guess_type(filename)
file_upload = (filename, io.BytesIO(audio_bytes), mime_type)
except Exception as exc:
logger.error(f"Failed to get audio stream from S3 for {audio_file_uri}: {exc}")
raise TranscriptionError(f"Failed to get audio stream from S3: {exc}") from exc

with audio_stream as f:
logger.info(f"Transcribing audio from {audio_file_uri}")

options = {
"model": "whisper-1",
"file": (get_sanitized_s3_key(audio_file_uri), f.read()),
"response_format": "text",
"language": language if language not in [None, "multi", ""] else None,
"prompt": whisper_prompt if whisper_prompt else None,
}

try:
transcription = client.audio.transcriptions.create(**options) # type: ignore
except Exception as exc:
logger.error(f"Failed to transcribe audio: {exc}")
raise TranscriptionError(f"Failed to transcribe audio: {exc}") from exc

if transcription is None or transcription == "":
logger.info("Transcription is empty!")

return str(transcription)


# def transcribe_audio_azure_whisper(
# audio_file_path: str, language: Optional[str], whisper_prompt: Optional[str]
# ) -> str:
# base_url = "https://whisper-asr-service.westeurope.azurecontainer.io/v1"
# endpoint = f"{base_url}/asr"

# try:
# with open(audio_file_path, "rb") as audio_file:
# files = {"audio_file": audio_file}
# params = {
# "output": "json",
# "task": "transcribe",
# "language": language if language not in [None, "multi", ""] else None,
# "word_timestamps": "false",
# "encode": "true",
# }

# response = requests.post(endpoint, files=files, data=params)
# response.raise_for_status()

# result = response.json()
# transcription = result.get("text", "")

# if not transcription:
# logger.info("Transcription is empty!")

# return transcription

# except FileNotFoundError as exc:
# logger.error(f"File not found: {audio_file_path}")
# raise FileNotFoundError from exc
# except requests.RequestException as exc:
# logger.error(f"Failed to transcribe audio: {exc}")
# raise TranscriptionError(f"Failed to transcribe audio: {exc}") from exc
# except Exception as exc:
# logger.error(f"Unexpected error: {exc}")
# raise TranscriptionError(f"Unexpected error: {exc}") from exc


try:
response = transcription(
model=LITELLM_WHISPER_MODEL,
file=file_upload,
api_key=LITELLM_WHISPER_API_KEY,
api_base=LITELLM_WHISPER_URL,
api_version=LITELLM_WHISPER_API_VERSION,
language=language,
prompt=whisper_prompt
)
return response["text"]
except Exception as e:
logger.error(f"LiteLLM transcription failed: {e}")
raise TranscriptionError(f"LiteLLM transcription failed: {e}") from e


DEFAULT_WHISPER_PROMPTS = {
"en": "Hi, lets get started. First we'll have a round of introductions and then we can get into the topic for today.",
Expand Down Expand Up @@ -240,3 +169,47 @@ def transcribe_conversation_chunk(conversation_chunk_id: str) -> str:

logger.info(f"Processed chunk for transcription: {conversation_chunk_id}")
return conversation_chunk_id


def transcribe_audio(
audio_file_path: str, language: Optional[str], whisper_prompt: Optional[str]
) -> str:
return transcribe_audio_litellm(audio_file_path, language, whisper_prompt)



# def transcribe_audio_aiconl(
# audio_file_path: str,
# language: Optional[str], # noqa
# whisper_prompt: Optional[str], # noqa
# ) -> str:
# import requests

# API_BASE_URL = "https://whisper.ai-hackathon.haven.vng.cloud"
# API_KEY = "JOUW_VEILIGE_API_SLEUTEL"

# try:
# with open(audio_file_path, "rb") as f:
# headers = {"accept": "application/json", "access_token": API_KEY}
# files = {"file": f}

# response = requests.post(f"{API_BASE_URL}/transcribe", headers=headers, files=files)
# response.raise_for_status()

# result = response.json()
# transcription = result.get("text", "")

# if not transcription:
# logger.info("Transcription is empty!")

# return transcription

# except FileNotFoundError as exc:
# logger.error(f"File not found: {audio_file_path}")
# raise FileNotFoundError from exc
# except requests.RequestException as exc:
# logger.error(f"Failed to transcribe audio: {exc}")
# raise TranscriptionError(f"Failed to transcribe audio: {exc}") from exc
# except Exception as exc:
# logger.error(f"Unexpected error: {exc}")
# raise TranscriptionError(f"Unexpected error: {exc}") from exc