From e08f5182af7ff6c39e6b082884e4cc4c699d9e85 Mon Sep 17 00:00:00 2001 From: yunyaozhou Date: Tue, 14 Apr 2026 19:15:10 +0800 Subject: [PATCH] fix(voice): create ElevenLabs tools before agent setup --- hub/src/web/routes/voice.ts | 130 ++++++++++++++++++++++++++++++++++-- shared/src/voice.ts | 38 +++++++++-- web/src/api/voice.ts | 87 +++++++++++++++++++++++- 3 files changed, 244 insertions(+), 11 deletions(-) diff --git a/hub/src/web/routes/voice.ts b/hub/src/web/routes/voice.ts index 1a55f8363..c741ee4e5 100644 --- a/hub/src/web/routes/voice.ts +++ b/hub/src/web/routes/voice.ts @@ -4,7 +4,9 @@ import type { WebAppEnv } from '../middleware/auth' import { ELEVENLABS_API_BASE, VOICE_AGENT_NAME, - buildVoiceAgentConfig + buildVoiceAgentConfig, + buildVoiceToolRequests, + type VoiceToolConfig } from '@hapi/protocol/voice' const tokenRequestSchema = z.object({ @@ -20,6 +22,14 @@ interface ElevenLabsAgent { name: string } +interface ElevenLabsTool { + id: string + tool_config?: { + name?: string + type?: string + } +} + /** * Find an existing "Hapi Voice Assistant" agent */ @@ -50,7 +60,7 @@ async function findHapiAgent(apiKey: string): Promise { /** * Create a new "Hapi Voice Assistant" agent */ -async function createHapiAgent(apiKey: string): Promise { +async function createHapiAgent(apiKey: string, toolIds: string[]): Promise { try { const response = await fetch(`${ELEVENLABS_API_BASE}/convai/agents/create`, { method: 'POST', @@ -59,7 +69,7 @@ async function createHapiAgent(apiKey: string): Promise { 'Content-Type': 'application/json', 'Accept': 'application/json' }, - body: JSON.stringify(buildVoiceAgentConfig()) + body: JSON.stringify(buildVoiceAgentConfig(toolIds)) }) if (!response.ok) { @@ -79,6 +89,106 @@ async function createHapiAgent(apiKey: string): Promise { } } +async function updateHapiAgent(apiKey: string, agentId: string, toolIds: string[]): Promise { + try { + const response = await fetch(`${ELEVENLABS_API_BASE}/convai/agents/${agentId}`, { + method: 'PATCH', + headers: { + 'xi-api-key': apiKey, + 'Content-Type': 'application/json', + 'Accept': 'application/json' + }, + body: JSON.stringify(buildVoiceAgentConfig(toolIds)) + }) + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})) as { detail?: { message?: string } | string } + const errorMessage = typeof errorData.detail === 'string' + ? errorData.detail + : (errorData.detail as { message?: string })?.message || `API error: ${response.status}` + console.error('[Voice] Failed to update agent:', errorMessage) + return false + } + + return true + } catch (error) { + console.error('[Voice] Error updating agent:', error) + return false + } +} + +async function listTools(apiKey: string): Promise { + const response = await fetch(`${ELEVENLABS_API_BASE}/convai/tools`, { + method: 'GET', + headers: { + 'xi-api-key': apiKey, + 'Accept': 'application/json' + } + }) + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})) as { detail?: { message?: string } | string } + const errorMessage = typeof errorData.detail === 'string' + ? errorData.detail + : (errorData.detail as { message?: string })?.message || `API error: ${response.status}` + throw new Error(errorMessage) + } + + const data = await response.json() as { tools?: ElevenLabsTool[] } + return data.tools || [] +} + +async function createTool(apiKey: string, toolConfig: VoiceToolConfig): Promise { + const response = await fetch(`${ELEVENLABS_API_BASE}/convai/tools`, { + method: 'POST', + headers: { + 'xi-api-key': apiKey, + 'Content-Type': 'application/json', + 'Accept': 'application/json' + }, + body: JSON.stringify({ tool_config: toolConfig }) + }) + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})) as { detail?: { message?: string } | string } + const errorMessage = typeof errorData.detail === 'string' + ? errorData.detail + : (errorData.detail as { message?: string })?.message || `API error: ${response.status}` + throw new Error(errorMessage) + } + + const data = await response.json() as { id?: string } + if (!data.id) { + throw new Error('No tool id in ElevenLabs response') + } + + return data.id +} + +async function ensureHapiToolIds(apiKey: string): Promise { + const existingTools = await listTools(apiKey) + const toolIdByName = new Map( + existingTools + .filter((tool) => tool.tool_config?.type === 'client' && typeof tool.tool_config?.name === 'string') + .map((tool) => [tool.tool_config!.name!, tool.id]) + ) + + const toolIds: string[] = [] + for (const request of buildVoiceToolRequests()) { + const existingId = toolIdByName.get(request.tool_config.name) + if (existingId) { + toolIds.push(existingId) + continue + } + + const createdId = await createTool(apiKey, request.tool_config) + toolIds.push(createdId) + toolIdByName.set(request.tool_config.name, createdId) + } + + return toolIds +} + /** * Get or create agent ID - finds existing or creates new "Hapi Voice Assistant" agent */ @@ -90,16 +200,28 @@ async function getOrCreateAgentId(apiKey: string): Promise { return cached } + let toolIds: string[] + try { + toolIds = await ensureHapiToolIds(apiKey) + } catch (error) { + console.error('[Voice] Failed to ensure Hapi tools:', error) + return null + } + // Try to find existing agent console.log('[Voice] No agent ID configured, searching for existing agent...') let agentId = await findHapiAgent(apiKey) if (agentId) { console.log('[Voice] Found existing agent:', agentId) + const updated = await updateHapiAgent(apiKey, agentId, toolIds) + if (!updated) { + return null + } } else { // Create new agent console.log('[Voice] No existing agent found, creating new one...') - agentId = await createHapiAgent(apiKey) + agentId = await createHapiAgent(apiKey, toolIds) if (agentId) { console.log('[Voice] Created new agent:', agentId) } diff --git a/shared/src/voice.ts b/shared/src/voice.ts index 6751f0eba..d2b30cfcb 100644 --- a/shared/src/voice.ts +++ b/shared/src/voice.ts @@ -140,7 +140,24 @@ For builds, tests, or large file operations: export const VOICE_FIRST_MESSAGE = "Hey! Hapi here." -export const VOICE_TOOLS = [ +export interface VoiceToolConfig { + type: 'client' + name: string + description: string + expects_response: boolean + response_timeout_secs: number + parameters: { + type: 'object' + required: string[] + properties: Record + } +} + +export const VOICE_TOOLS: VoiceToolConfig[] = [ { type: 'client' as const, name: 'messageCodingAgent', @@ -170,7 +187,8 @@ export const VOICE_TOOLS = [ properties: { decision: { type: 'string', - description: "The user's decision: must be either 'allow' or 'deny'" + description: "The user's decision: must be either 'allow' or 'deny'", + enum: ['allow', 'deny'] } } } @@ -188,7 +206,7 @@ export interface VoiceAgentConfig { llm: string temperature: number max_tokens: number - tools: typeof VOICE_TOOLS + tool_ids: string[] } } turn: { @@ -213,11 +231,21 @@ export interface VoiceAgentConfig { } } +export interface VoiceToolCreateRequest { + tool_config: VoiceToolConfig +} + +export function buildVoiceToolRequests(): VoiceToolCreateRequest[] { + return VOICE_TOOLS.map(tool => ({ + tool_config: tool + })) +} + /** * Build the agent configuration for Hapi Voice Assistant. * Used by both server-side auto-creation and client-side configuration. */ -export function buildVoiceAgentConfig(): VoiceAgentConfig { +export function buildVoiceAgentConfig(toolIds: string[] = []): VoiceAgentConfig { return { name: VOICE_AGENT_NAME, conversation_config: { @@ -229,7 +257,7 @@ export function buildVoiceAgentConfig(): VoiceAgentConfig { llm: 'gemini-2.5-flash', temperature: 0.7, max_tokens: 1024, - tools: VOICE_TOOLS + tool_ids: toolIds } }, turn: { diff --git a/web/src/api/voice.ts b/web/src/api/voice.ts index 66cee443f..e84924584 100644 --- a/web/src/api/voice.ts +++ b/web/src/api/voice.ts @@ -13,7 +13,9 @@ import type { ApiClient } from './client' import { ELEVENLABS_API_BASE, VOICE_AGENT_NAME, - buildVoiceAgentConfig + buildVoiceAgentConfig, + buildVoiceToolRequests, + type VoiceToolConfig } from '@hapi/protocol/voice' export interface VoiceTokenResponse { @@ -55,6 +57,14 @@ export interface ElevenLabsAgent { name: string } +interface ElevenLabsTool { + id: string + tool_config?: { + name?: string + type?: string + } +} + export interface FindAgentResult { success: boolean agentId?: string @@ -68,6 +78,78 @@ export interface CreateAgentResult { created?: boolean } +async function listTools(apiKey: string): Promise { + const response = await fetch(`${ELEVENLABS_API_BASE}/convai/tools`, { + method: 'GET', + headers: { + 'xi-api-key': apiKey, + 'Accept': 'application/json' + } + }) + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})) as { detail?: { message?: string } | string } + const errorMessage = typeof errorData.detail === 'string' + ? errorData.detail + : errorData.detail?.message || `API error: ${response.status}` + throw new Error(errorMessage) + } + + const data = await response.json() as { tools?: ElevenLabsTool[] } + return data.tools || [] +} + +async function createTool(apiKey: string, toolConfig: VoiceToolConfig): Promise { + const response = await fetch(`${ELEVENLABS_API_BASE}/convai/tools`, { + method: 'POST', + headers: { + 'xi-api-key': apiKey, + 'Content-Type': 'application/json', + 'Accept': 'application/json' + }, + body: JSON.stringify({ tool_config: toolConfig }) + }) + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})) as { detail?: { message?: string } | string } + const errorMessage = typeof errorData.detail === 'string' + ? errorData.detail + : errorData.detail?.message || `API error: ${response.status}` + throw new Error(errorMessage) + } + + const data = await response.json() as { id?: string } + if (!data.id) { + throw new Error('Failed to get tool ID from response') + } + + return data.id +} + +async function ensureHapiToolIds(apiKey: string): Promise { + const existingTools = await listTools(apiKey) + const toolIdByName = new Map( + existingTools + .filter((tool) => tool.tool_config?.type === 'client' && typeof tool.tool_config?.name === 'string') + .map((tool) => [tool.tool_config!.name!, tool.id]) + ) + + const toolIds: string[] = [] + for (const request of buildVoiceToolRequests()) { + const existingId = toolIdByName.get(request.tool_config.name) + if (existingId) { + toolIds.push(existingId) + continue + } + + const createdId = await createTool(apiKey, request.tool_config) + toolIds.push(createdId) + toolIdByName.set(request.tool_config.name, createdId) + } + + return toolIds +} + /** * Find an existing "Hapi Voice Assistant" agent using the provided API key. */ @@ -112,7 +194,8 @@ export async function createOrUpdateHapiAgent(apiKey: string): Promise