diff --git a/wavefront/client/src/config/voice-providers.ts b/wavefront/client/src/config/voice-providers.ts index 252f0e87..de6ee1d1 100644 --- a/wavefront/client/src/config/voice-providers.ts +++ b/wavefront/client/src/config/voice-providers.ts @@ -220,7 +220,7 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = { }, }, stt: { - providers: ['deepgram', 'sarvam'] as const, + providers: ['deepgram', 'sarvam', 'elevenlabs'] as const, configs: { deepgram: { name: 'Deepgram', @@ -325,6 +325,33 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = { }, }, }, + elevenlabs: { + name: 'ElevenLabs', + badge: { + bg: 'bg-purple-100', + text: 'text-purple-800', + }, + parameters: { + model: { + type: 'string', + default: 'scribe_v2_realtime', + description: 'ElevenLabs STT model', + options: ['scribe_v2_realtime'], + }, + language: { + type: 'string', + default: '', + description: 'Language code (ISO-639-1, e.g., en, hi)', + placeholder: 'en', + }, + sample_rate: { + type: 'number', + default: 8000, + description: 'Audio sample rate in Hz', + placeholder: '8000', + }, + }, + }, }, }, }; diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx index 2d3a76d4..dd9e48c9 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx @@ -22,6 +22,7 @@ import { Input } from '@app/components/ui/input'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; import { Textarea } from '@app/components/ui/textarea'; import { VOICE_PROVIDERS_CONFIG, getProviderConfig } from '@app/config/voice-providers'; +import { SttProvider } from '@app/types/stt-config'; import { extractErrorMessage } from '@app/lib/utils'; import { useNotifyStore } from '@app/store'; import { zodResolver } from '@hookform/resolvers/zod'; @@ -32,7 +33,7 @@ import { z } from 'zod'; const createSttConfigSchema = z.object({ display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'), description: z.string().max(500, 'Description must be 500 characters or less').optional(), - provider: z.enum(['deepgram', 'sarvam'] as [string, ...string[]]), + provider: z.enum(['deepgram', 'sarvam', 'elevenlabs'] as [string, ...string[]]), api_key: z.string().min(1, 'API key is required'), }); @@ -76,7 +77,7 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o await floConsoleService.sttConfigService.createSttConfig({ display_name: data.display_name.trim(), description: data.description?.trim() || null, - provider: data.provider as 'deepgram', + provider: data.provider as SttProvider, api_key: data.api_key.trim(), }); notifySuccess('STT configuration created successfully'); diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx index 4a4c7eb5..8213a558 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx @@ -33,7 +33,7 @@ import { z } from 'zod'; const updateSttConfigSchema = z.object({ display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'), description: z.string().max(500, 'Description must be 500 characters or less').optional(), - provider: z.enum(['deepgram', 'sarvam'] as [string, ...string[]]), + provider: z.enum(['deepgram', 'sarvam', 'elevenlabs'] as [string, ...string[]]), api_key: z.string().optional(), }); diff --git a/wavefront/client/src/types/stt-config.ts b/wavefront/client/src/types/stt-config.ts index 13935869..4f197840 100644 --- a/wavefront/client/src/types/stt-config.ts +++ b/wavefront/client/src/types/stt-config.ts @@ -1,6 +1,6 @@ import { IApiResponse } from '@app/lib/axios'; -export type SttProvider = 'deepgram' | 'sarvam'; +export type SttProvider = 'deepgram' | 'sarvam' | 'elevenlabs'; export interface SttConfig { id: string; @@ -60,3 +60,10 @@ export interface SarvamSttParameters { vad_signals?: boolean; high_vad_sensitivity?: boolean; } + +// ElevenLabs STT specific parameters +export interface ElevenLabsSttParameters { + model?: string; // default: 'scribe_v2_realtime' + language?: string; // ISO-639-1 code e.g. 'en', 'hi' + sample_rate?: number; // default: 8000 +} diff --git a/wavefront/server/apps/call_processing/call_processing/services/stt_service.py b/wavefront/server/apps/call_processing/call_processing/services/stt_service.py index 6a81ee73..e1d77623 100644 --- a/wavefront/server/apps/call_processing/call_processing/services/stt_service.py +++ b/wavefront/server/apps/call_processing/call_processing/services/stt_service.py @@ -1,7 +1,7 @@ """ STT (Speech-to-Text) service factory -Supports multiple providers: Deepgram, AssemblyAI, Whisper, Google, Azure +Supports multiple providers: Deepgram, Sarvam, ElevenLabs """ from typing import Dict, Any @@ -10,6 +10,7 @@ # Pipecat STT services from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.sarvam.stt import SarvamSTTService +from pipecat.services.elevenlabs.stt import ElevenLabsRealtimeSTTService # Pipecat language enum from pipecat.transcriptions.language import Language @@ -57,6 +58,8 @@ def create_stt_service(stt_config: Dict[str, Any]): return STTServiceFactory._create_deepgram_stt(api_key, parameters) elif provider == 'sarvam': return STTServiceFactory._create_sarvam_stt(api_key, parameters) + elif provider == 'elevenlabs': + return STTServiceFactory._create_elevenlabs_stt(api_key, parameters) elif provider == 'assemblyai': return STTServiceFactory._create_assemblyai_stt(api_key, parameters) elif provider == 'whisper': @@ -162,6 +165,55 @@ def _create_sarvam_stt(api_key: str, parameters: Dict[str, Any]): params=input_params, ) + # Mapping of short language codes to ElevenLabs ISO-639-3 language codes + ELEVENLABS_LANGUAGE_MAP = { + 'en': 'eng', + 'hi': 'hin', + 'ta': 'tam', + 'te': 'tel', + 'kn': 'kan', + 'ml': 'mal', + 'gu': 'guj', + 'bn': 'ben', + 'mr': 'mar', + 'pa': 'pan', + 'or': 'ori', + } + + @staticmethod + def _create_elevenlabs_stt(api_key: str, parameters: Dict[str, Any]): + """Create ElevenLabs Realtime STT service (WebSocket streaming, scribe_v2_realtime)""" + params_dict = {} + + # Map language code to ElevenLabs ISO-639-3 code + if 'language' in parameters and parameters['language']: + lang_code = parameters['language'] + elevenlabs_lang = STTServiceFactory.ELEVENLABS_LANGUAGE_MAP.get(lang_code) + if elevenlabs_lang: + params_dict['language_code'] = elevenlabs_lang + else: + logger.warning( + f"Unknown ElevenLabs language '{lang_code}', skipping (auto-detect will be used)" + ) + + model = parameters.get('model', 'scribe_v2_realtime') + sample_rate = parameters.get('sample_rate', 8000) + + input_params = ( + ElevenLabsRealtimeSTTService.InputParams(**params_dict) + if params_dict + else None + ) + + logger.info(f'ElevenLabs STT config: model={model}, sample_rate={sample_rate}') + + return ElevenLabsRealtimeSTTService( + api_key=api_key, + model=model, + sample_rate=sample_rate, + params=input_params, + ) + @staticmethod def _create_assemblyai_stt(api_key: str, parameters: Dict[str, Any]): """Create AssemblyAI STT service""" diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py index 298931f6..ae3fb05d 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py @@ -15,6 +15,7 @@ class SttProvider(str, Enum): GOOGLE = 'google' AZURE = 'azure' SARVAM = 'sarvam' + ELEVENLABS = 'elevenlabs' class CreateSttConfigPayload(BaseModel):