Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
1c7b33e
wip
longcw Dec 19, 2025
8eba4e9
add logs
longcw Dec 19, 2025
de63ced
add sms-console
longcw Dec 22, 2025
fedadd3
Merge remote-tracking branch 'origin/main' into longc/text-mode
longcw Dec 22, 2025
e0f387b
fix types
longcw Dec 22, 2025
4e2b715
fix agent handoff
longcw Dec 22, 2025
d8bf768
fix wrapped entrypoint
longcw Dec 22, 2025
5d05117
Merge remote-tracking branch 'origin/main' into longc/text-mode
longcw Jan 5, 2026
a092a42
save session after the text handler
longcw Jan 5, 2026
d6c7065
add multiple responses
longcw Jan 6, 2026
4f22115
add e2e encryption
longcw Jan 8, 2026
30f9711
add get_init_kwargs
longcw Jan 9, 2026
6084764
Merge remote-tracking branch 'origin/main' into longc/text-mode
longcw Jan 13, 2026
fcc3b92
serialize old_agent for AgentTask
longcw Jan 13, 2026
03ac592
add Agent.configure
longcw Jan 16, 2026
7bdae50
Merge remote-tracking branch 'origin/main' into longc/text-mode
longcw Jan 19, 2026
1eff439
hand text message request from lk server (#4553)
longcw Jan 20, 2026
12c6b88
durable scheduler WIP
theomonnom Jan 21, 2026
685e7b8
contextvars already handled
theomonnom Jan 21, 2026
8495cea
export session state as db and compute delta for sync (#4604)
longcw Jan 28, 2026
52f1eb2
durable functions integration (#4647)
longcw Jan 30, 2026
4db5018
update sms cli
longcw Jan 30, 2026
cb731e4
Merge remote-tracking branch 'origin/main' into longc/text-mode
longcw Feb 2, 2026
214f5f7
clean and update session store versions
longcw Feb 2, 2026
09a2026
fix types
longcw Feb 2, 2026
29f13e4
save init kwargs as blob
longcw Feb 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions examples/voice_agents/sms_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import logging
from typing import Any, override

from dotenv import load_dotenv

from livekit.agents import (
Agent,
AgentServer,
AgentSession,
JobContext,
RunContext,
TextMessageContext,
cli,
)
from livekit.agents.beta.workflows import GetEmailTask
from livekit.agents.llm import ToolFlag, function_tool
from livekit.durable import EffectCall
from livekit.plugins import silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel

# uncomment to enable Krisp background voice/noise cancellation
# from livekit.plugins import noise_cancellation

logger = logging.getLogger("basic-agent")

load_dotenv()


class MyAgent(Agent):
def __init__(self, *, text_mode: bool) -> None:
super().__init__(
instructions="Your name is Kelly. You would interact with users via voice."
"with that in mind keep your responses concise and to the point."
"do not use emojis, asterisks, markdown, or other special characters in your responses."
"You are curious and friendly, and have a sense of humor."
"you will speak english to the user",
)
self._text_mode = text_mode

@override
def get_init_kwargs(self) -> dict[str, Any]:
return {
"text_mode": self._text_mode,
}

async def on_enter(self):
if not self._text_mode:
logger.debug("greeting the user")
self.session.generate_reply(allow_interruptions=False)

# all functions annotated with @function_tool will be passed to the LLM when this
# agent is active
@function_tool
async def lookup_weather(
self, context: RunContext, location: str, latitude: str, longitude: str
):
"""Called when the user asks for weather related information.
Ensure the user's location (city or region) is provided.
When given a location, please estimate the latitude and longitude of the location and
do not ask the user for them.

Args:
location: The location they are asking for
latitude: The latitude of the location, do not ask user for it
longitude: The longitude of the location, do not ask user for it
"""

logger.info(f"Looking up weather for {location}")

# this will create multiple responses to the user
context.session.say("Let me check the weather for you")

return "sunny with a temperature of 70 degrees."

@function_tool(flags=ToolFlag.DURABLE)
async def register_for_weather(self, context: RunContext):
"""Called when the user wants to register for the weather event."""
logger.info("register_for_weather called")

get_email_task = GetEmailTask(
extra_instructions=(
"You are communicate to the user via text messages, "
"so there is no need to verify the email address with the user multiple times."
)
if self._text_mode
else ""
)
get_email_task.configure(llm="openai/gpt-4.1")

email_result = await EffectCall(get_email_task)
email_address = email_result.email_address

logger.info(f"User's email address: {email_address}")

return "You are now registered for the weather event."


server = AgentServer()


@server.text_handler()
async def text_handler(ctx: TextMessageContext):
logger.info(f"text message received: {ctx.text}")

session = AgentSession(
llm="openai/gpt-4.1-mini",
# state_passphrase="my-secret-passphrase",
)
if ctx.session_data:
await session.rehydrate(ctx.session_data)
else:
await session.start(agent=MyAgent(text_mode=True))

async for ev in session.run(user_input=ctx.text):
await ctx.send_response(ev)


@server.rtc_session()
async def entrypoint(ctx: JobContext):
session = AgentSession(
stt="deepgram/nova-3",
llm="openai/gpt-4.1-mini",
tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
turn_detection=MultilingualModel(),
vad=silero.VAD.load(),
preemptive_generation=True,
)

await session.start(agent=MyAgent(text_mode=False), room=ctx.room)


if __name__ == "__main__":
cli.run_app(server)
12 changes: 11 additions & 1 deletion livekit-agents/livekit/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
JobExecutorType,
JobProcess,
JobRequest,
TextMessageContext,
get_job_context,
)
from .llm.chat_context import (
Expand All @@ -47,7 +48,14 @@
FunctionCall,
FunctionCallOutput,
)
from .llm.tool_context import FunctionTool, ProviderTool, StopResponse, ToolError, function_tool
from .llm.tool_context import (
FunctionTool,
ProviderTool,
StopResponse,
ToolError,
ToolFlag,
function_tool,
)
from .plugin import Plugin
from .types import (
DEFAULT_API_CONNECT_OPTIONS,
Expand Down Expand Up @@ -123,11 +131,13 @@ def __getattr__(name: str) -> typing.Any:
"JobProcess",
"JobContext",
"JobRequest",
"TextMessageContext",
"get_job_context",
"JobExecutorType",
"AutoSubscribe",
"FunctionTool",
"function_tool",
"ToolFlag",
"ProviderTool",
"ChatContext",
"ChatItem",
Expand Down
19 changes: 18 additions & 1 deletion livekit-agents/livekit/agents/beta/workflows/address.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

from ... import llm, stt, tts, vad
from ...llm.tool_context import ToolError, ToolFlag, function_tool
Expand All @@ -11,6 +11,7 @@
from ...voice.speech_handle import SpeechHandle

if TYPE_CHECKING:
from ...voice.agent import _AgentState
from ...voice.audio_recognition import TurnDetectionMode


Expand All @@ -32,6 +33,10 @@ def __init__(
tts: NotGivenOr[tts.TTS | None] = NOT_GIVEN,
allow_interruptions: NotGivenOr[bool] = NOT_GIVEN,
) -> None:
self._init_kwargs = {
"extra_instructions": extra_instructions,
"allow_interruptions": allow_interruptions,
}
super().__init__(
instructions=(
"You are only a single step in a broader system, responsible solely for capturing an address.\n"
Expand Down Expand Up @@ -77,6 +82,18 @@ def __init__(

self._address_update_speech_handle: SpeechHandle | None = None

def get_init_kwargs(self) -> dict[str, Any]:
return self._init_kwargs

def _get_state(self) -> _AgentState:
state = super()._get_state()
state.extra_state["current_address"] = self._current_address
return state

def _set_state(self, state: _AgentState) -> None:
super()._set_state(state)
self._current_address = state.extra_state["current_address"]

async def on_enter(self) -> None:
self.session.generate_reply(instructions="Ask the user to provide their address.")

Expand Down
19 changes: 18 additions & 1 deletion livekit-agents/livekit/agents/beta/workflows/email_address.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import re
from dataclasses import dataclass
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

from ... import llm, stt, tts, vad
from ...llm.tool_context import ToolError, ToolFlag, function_tool
Expand All @@ -12,6 +12,7 @@
from ...voice.speech_handle import SpeechHandle

if TYPE_CHECKING:
from ...voice.agent import _AgentState
from ...voice.audio_recognition import TurnDetectionMode

EMAIL_REGEX = (
Expand All @@ -37,6 +38,10 @@ def __init__(
tts: NotGivenOr[tts.TTS | None] = NOT_GIVEN,
allow_interruptions: NotGivenOr[bool] = NOT_GIVEN,
) -> None:
self._init_kwargs = {
"extra_instructions": extra_instructions,
"allow_interruptions": allow_interruptions,
}
super().__init__(
instructions=(
"You are only a single step in a broader system, responsible solely for capturing an email address.\n"
Expand Down Expand Up @@ -78,6 +83,9 @@ def __init__(
# used to ignore the call to confirm_email_address in case the LLM is hallucinating and not asking for user confirmation
self._email_update_speech_handle: SpeechHandle | None = None

def get_init_kwargs(self) -> dict[str, Any]:
return self._init_kwargs

async def on_enter(self) -> None:
self.session.generate_reply(instructions="Ask the user to provide an email address.")

Expand Down Expand Up @@ -128,3 +136,12 @@ async def decline_email_capture(self, reason: str) -> None:
"""
if not self.done():
self.complete(ToolError(f"couldn't get the email address: {reason}"))

def _get_state(self) -> _AgentState:
state = super()._get_state()
state.extra_state["current_email"] = self._current_email
return state

def _set_state(self, state: _AgentState) -> None:
super()._set_state(state)
self._current_email = state.extra_state["current_email"]
Loading
Loading