Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 91 additions & 2 deletions wavefront/client/src/config/voice-providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export interface VoiceProvidersConfig {
*/
export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = {
tts: {
providers: ['elevenlabs', 'deepgram', 'cartesia'] as const,
providers: ['elevenlabs', 'deepgram', 'cartesia', 'sarvam'] as const,
configs: {
elevenlabs: {
name: 'ElevenLabs',
Expand Down Expand Up @@ -159,10 +159,68 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = {
},
},
},
sarvam: {
name: 'Sarvam',
badge: {
bg: 'bg-orange-100',
text: 'text-orange-800',
},
parameters: {
model: {
type: 'string',
default: 'bulbul:v2',
options: ['bulbul:v2', 'bulbul:v3'],
description: 'Sarvam TTS model',
},
language: {
type: 'string',
default: '',
description: 'Language code',
placeholder: 'hi',
},
pitch: {
type: 'number',
default: 0.0,
min: -0.75,
max: 0.75,
step: 0.05,
description: 'Voice pitch (-0.75 to 0.75)',
},
pace: {
type: 'number',
default: 1.0,
min: 0.3,
max: 3.0,
step: 0.1,
description: 'Speech pace (0.3-3.0)',
},
loudness: {
type: 'number',
default: 1.0,
min: 0.1,
max: 3.0,
step: 0.1,
description: 'Volume (0.1-3.0)',
},
enable_preprocessing: {
type: 'boolean',
default: false,
description: 'Enable text preprocessing',
},
temperature: {
type: 'number',
default: 0.6,
min: 0.01,
max: 1.0,
step: 0.05,
description: 'Randomness for bulbul v3 (0.01-1.0)',
},
},
},
},
},
stt: {
providers: ['deepgram'] as const,
providers: ['deepgram', 'sarvam'] as const,
configs: {
deepgram: {
name: 'Deepgram',
Expand Down Expand Up @@ -236,6 +294,37 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = {
},
},
},
sarvam: {
name: 'Sarvam',
badge: {
bg: 'bg-orange-100',
text: 'text-orange-800',
},
parameters: {
model: {
type: 'string',
default: 'saarika:v2.5',
options: ['saarika:v2.5', 'saaras:v2'],
description: 'Sarvam STT model',
},
language: {
type: 'string',
default: '',
description: 'Language code',
placeholder: 'hi',
},
vad_signals: {
type: 'boolean',
default: true,
description: 'Enable VAD signals',
},
high_vad_sensitivity: {
type: 'boolean',
default: false,
description: 'High VAD sensitivity',
},
},
},
},
},
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import { z } from 'zod';
const createSttConfigSchema = z.object({
display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'),
description: z.string().max(500, 'Description must be 500 characters or less').optional(),
provider: z.enum(['deepgram'] as [string, ...string[]]),
provider: z.enum(['deepgram', 'sarvam'] as [string, ...string[]]),
api_key: z.string().min(1, 'API key is required'),
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import { z } from 'zod';
const updateSttConfigSchema = z.object({
display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'),
description: z.string().max(500, 'Description must be 500 characters or less').optional(),
provider: z.enum(['deepgram'] as [string, ...string[]]),
provider: z.enum(['deepgram', 'sarvam'] as [string, ...string[]]),
api_key: z.string().optional(),
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import { z } from 'zod';
const createTtsConfigSchema = z.object({
display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'),
description: z.string().max(500, 'Description must be 500 characters or less').optional(),
provider: z.enum(['elevenlabs', 'deepgram', 'cartesia'] as [string, ...string[]]),
provider: z.enum(['elevenlabs', 'deepgram', 'cartesia', 'sarvam'] as [string, ...string[]]),
api_key: z.string().min(1, 'API key is required'),
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import { z } from 'zod';
const updateTtsConfigSchema = z.object({
display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'),
description: z.string().max(500, 'Description must be 500 characters or less').optional(),
provider: z.enum(['elevenlabs', 'deepgram', 'cartesia'] as [string, ...string[]]),
provider: z.enum(['elevenlabs', 'deepgram', 'cartesia', 'sarvam'] as [string, ...string[]]),
api_key: z.string().optional(),
});

Expand Down
10 changes: 9 additions & 1 deletion wavefront/client/src/types/stt-config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { IApiResponse } from '@app/lib/axios';

export type SttProvider = 'deepgram';
export type SttProvider = 'deepgram' | 'sarvam';

export interface SttConfig {
id: string;
Expand Down Expand Up @@ -52,3 +52,11 @@ export interface DeepgramSttParameters {
profanity_filter?: boolean;
vad_events?: boolean;
}

// Sarvam STT specific parameters
export interface SarvamSttParameters {
model?: string; // default: 'saarika:v2.5'
language?: string;
vad_signals?: boolean;
high_vad_sensitivity?: boolean;
}
13 changes: 12 additions & 1 deletion wavefront/client/src/types/tts-config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { IApiResponse } from '@app/lib/axios';

export type TtsProvider = 'elevenlabs' | 'deepgram' | 'cartesia';
export type TtsProvider = 'elevenlabs' | 'deepgram' | 'cartesia' | 'sarvam';

export interface TtsConfig {
id: string;
Expand Down Expand Up @@ -62,3 +62,14 @@ export interface CartesiaParameters {
language?: string; // Language enum
speed?: number;
}

// Sarvam TTS specific parameters
export interface SarvamTtsParameters {
model?: string; // default: 'bulbul:v2'
language?: string;
pitch?: number; // -0.75 to 0.75
pace?: number; // 0.3 to 3.0
loudness?: number; // 0.1 to 3.0
enable_preprocessing?: boolean;
temperature?: number; // 0.01 to 1.0
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@

# Pipecat STT services
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.sarvam.stt import SarvamSTTService

# Pipecat language enum
from pipecat.transcriptions.language import Language

# Deepgram options
from deepgram import LiveOptions
Expand Down Expand Up @@ -51,6 +55,8 @@ def create_stt_service(stt_config: Dict[str, Any]):

if provider == 'deepgram':
return STTServiceFactory._create_deepgram_stt(api_key, parameters)
elif provider == 'sarvam':
return STTServiceFactory._create_sarvam_stt(api_key, parameters)
elif provider == 'assemblyai':
return STTServiceFactory._create_assemblyai_stt(api_key, parameters)
elif provider == 'whisper':
Expand Down Expand Up @@ -106,6 +112,56 @@ def _create_deepgram_stt(api_key: str, parameters: Dict[str, Any]):

return DeepgramSTTService(api_key=api_key, live_options=live_options)

# Mapping of short language codes to pipecat Language enum for Sarvam
SARVAM_LANGUAGE_MAP = {
'bn': Language.BN_IN,
'en': Language.EN_IN,
'gu': Language.GU_IN,
'hi': Language.HI_IN,
'kn': Language.KN_IN,
'ml': Language.ML_IN,
'mr': Language.MR_IN,
'or': Language.OR_IN,
'pa': Language.PA_IN,
'ta': Language.TA_IN,
'te': Language.TE_IN,
}

@staticmethod
def _create_sarvam_stt(api_key: str, parameters: Dict[str, Any]):
"""Create Sarvam STT service"""
params_dict = {}

# Map language code to pipecat Language enum
if 'language' in parameters and parameters['language']:
lang_code = parameters['language']
lang_enum = STTServiceFactory.SARVAM_LANGUAGE_MAP.get(lang_code)
if lang_enum:
params_dict['language'] = lang_enum
else:
logger.warning(f"Unknown Sarvam language '{lang_code}', skipping")

if 'vad_signals' in parameters:
params_dict['vad_signals'] = parameters['vad_signals']
if 'high_vad_sensitivity' in parameters:
params_dict['high_vad_sensitivity'] = parameters['high_vad_sensitivity']

model = parameters.get('model', 'saarika:v2.5')
sample_rate = parameters.get('sample_rate', 8000)

input_params = (
SarvamSTTService.InputParams(**params_dict) if params_dict else None
)

logger.info(f'Sarvam STT config: model={model}, sample_rate={sample_rate}')

return SarvamSTTService(
api_key=api_key,
model=model,
sample_rate=sample_rate,
params=input_params,
)

@staticmethod
def _create_assemblyai_stt(api_key: str, parameters: Dict[str, Any]):
"""Create AssemblyAI STT service"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
from pipecat.services.deepgram.tts import DeepgramTTSService
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.sarvam.tts import SarvamTTSService

# Language for params
from pipecat.transcriptions.language import Language
Expand Down Expand Up @@ -62,6 +63,8 @@ def create_tts_service(tts_config: Dict[str, Any]):
return TTSServiceFactory._create_deepgram_tts(api_key, voice_id, parameters)
elif provider == 'cartesia':
return TTSServiceFactory._create_cartesia_tts(api_key, voice_id, parameters)
elif provider == 'sarvam':
return TTSServiceFactory._create_sarvam_tts(api_key, voice_id, parameters)
else:
raise ValueError(f'Unsupported TTS provider: {provider}')

Expand Down Expand Up @@ -162,3 +165,55 @@ def _create_cartesia_tts(api_key: str, voice_id: str, parameters: Dict[str, Any]
return CartesiaTTSService(
api_key=api_key, voice_id=voice_id, model=model, params=input_params
)

# Mapping of short language codes to pipecat Language enum for Sarvam
SARVAM_LANGUAGE_MAP = {
'bn': Language.BN_IN,
'en': Language.EN_IN,
'gu': Language.GU_IN,
'hi': Language.HI_IN,
'kn': Language.KN_IN,
'ml': Language.ML_IN,
'mr': Language.MR_IN,
'or': Language.OR_IN,
'pa': Language.PA_IN,
'ta': Language.TA_IN,
'te': Language.TE_IN,
}

@staticmethod
def _create_sarvam_tts(api_key: str, voice_id: str, parameters: Dict[str, Any]):
"""Create Sarvam TTS service (WebSocket-based streaming)"""
model = parameters.get('model', 'bulbul:v2')

# Build InputParams from the parameters dict
params_dict = {}

if 'language' in parameters and parameters['language']:
lang_code = parameters['language']
lang_enum = TTSServiceFactory.SARVAM_LANGUAGE_MAP.get(lang_code)
if lang_enum:
params_dict['language'] = lang_enum
else:
logger.warning(f"Unknown Sarvam language '{lang_code}', skipping")

if 'pitch' in parameters:
params_dict['pitch'] = parameters['pitch']
if 'pace' in parameters:
params_dict['pace'] = parameters['pace']
if 'loudness' in parameters:
params_dict['loudness'] = parameters['loudness']
if 'enable_preprocessing' in parameters:
params_dict['enable_preprocessing'] = parameters['enable_preprocessing']
if 'temperature' in parameters:
params_dict['temperature'] = parameters['temperature']

input_params = (
SarvamTTSService.InputParams(**params_dict) if params_dict else None
)

logger.info(f'Sarvam TTS config: voice={voice_id}, model={model}')

return SarvamTTSService(
api_key=api_key, voice_id=voice_id, model=model, params=input_params
)
2 changes: 1 addition & 1 deletion wavefront/server/apps/call_processing/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
"redis>=5.0.0",
"tenacity>=8.0.0",
# Pipecat and voice processing
"pipecat-ai[websocket,cartesia,google,silero,deepgram,groq,runner,azure,local-smart-turn-v3]==0.0.100",
"pipecat-ai[websocket,cartesia,google,silero,deepgram,groq,runner,azure,local-smart-turn-v3,sarvam]==0.0.100",
# Twilio
"twilio>=8.0.0",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class SttProvider(str, Enum):
WHISPER = 'whisper'
GOOGLE = 'google'
AZURE = 'azure'
SARVAM = 'sarvam'


class CreateSttConfigPayload(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class TtsProvider(str, Enum):
AZURE = 'azure'
GOOGLE = 'google'
AWS = 'aws'
SARVAM = 'sarvam'


class CreateTtsConfigPayload(BaseModel):
Expand Down
Loading