livekit · chenghao-mou · Apr 3, 2026 · Feb 5, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/examples/dtmf/README.md → examples/telephony/README.md b/examples/dtmf/README.md → examples/telephony/README.md
diff --git a/examples/telephony/amd.py b/examples/telephony/amd.py
@@ -0,0 +1,87 @@
+import logging
+
+from dotenv import load_dotenv
+
+from livekit.agents import (
+    AMD,
+    Agent,
+    AgentServer,
+    AgentSession,
+    JobContext,
+    JobProcess,
+    cli,
+    inference,
+)
+from livekit.plugins import silero
+from livekit.plugins.turn_detector.multilingual import MultilingualModel
+
+logger = logging.getLogger("basic-agent")
+
+load_dotenv("../agents/.env")
+
+
+class MyAgent(Agent):
+    def __init__(self) -> None:
+        super().__init__(
+            instructions=(
+                "You are reaching out to a customer with a phone call. "
+                "You are calling to see if they are home. "
+                "You might encounter an answering machine with a DTMF menu or IVR system. "
+                "If you do, you will try to leave a message to ask them to call back."
+            ),
+        )
+
+
+server = AgentServer()
+
+
+def prewarm(proc: JobProcess):
+    proc.userdata["vad"] = silero.VAD.load()
+
+
+server.setup_fnc = prewarm
+
+
+@server.rtc_session()
+async def entrypoint(ctx: JobContext):
+    ctx.log_context_fields = {
+        "room": ctx.room.name,
+    }
+    session = AgentSession(
+        stt=inference.STT("deepgram/nova-3", language="multi"),
+        llm=inference.LLM("openai/gpt-4.1-mini"),
+        tts=inference.TTS("cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"),
+        turn_detection=MultilingualModel(),
+        vad=ctx.proc.userdata["vad"],
+        preemptive_generation=True,
+    )
+
+    await session.start(
+        agent=MyAgent(),
+        room=ctx.room,
+    )
+
+    async with AMD(session, llm="openai/gpt-5-mini") as detector:
+        result = await detector.execute()
+
+        if result.category == "human":
+            logger.info("human answered the call, proceeding with normal conversation")
+        elif result.category == "machine-ivr":
+            logger.info("ivr menu detected, starting navigation")
+        elif result.category == "machine-vm":
+            logger.info("voicemail detected, leaving a message")
+            speech_handle = session.generate_reply(
+                instructions=(
+                    "You've reached voicemail. Leave a brief message asking "
+                    "the customer to call back."
+                ),
+            )
+            await speech_handle.wait_for_playout()
+            session.shutdown()
+        elif result.category == "machine-unavailable":
+            logger.info("mailbox unavailable, ending call")
+            session.shutdown()
+
+
+if __name__ == "__main__":
+    cli.run_app(server)
diff --git a/examples/bank-ivr/README.md → examples/telephony/bank-ivr/README.md b/examples/bank-ivr/README.md → examples/telephony/bank-ivr/README.md
diff --git a/...vr/__snapshots__/format_transactions.snap → ...vr/__snapshots__/format_transactions.snap b/...vr/__snapshots__/format_transactions.snap → ...vr/__snapshots__/format_transactions.snap
diff --git a/examples/bank-ivr/data.json → examples/telephony/bank-ivr/data.json b/examples/bank-ivr/data.json → examples/telephony/bank-ivr/data.json
diff --git a/examples/bank-ivr/dial_bank_agent.py → ...les/telephony/bank-ivr/dial_bank_agent.py b/examples/bank-ivr/dial_bank_agent.py → ...les/telephony/bank-ivr/dial_bank_agent.py
diff --git a/examples/bank-ivr/ivr_navigator_agent.py → ...telephony/bank-ivr/ivr_navigator_agent.py b/examples/bank-ivr/ivr_navigator_agent.py → ...telephony/bank-ivr/ivr_navigator_agent.py
diff --git a/examples/bank-ivr/ivr_system_agent.py → ...es/telephony/bank-ivr/ivr_system_agent.py b/examples/bank-ivr/ivr_system_agent.py → ...es/telephony/bank-ivr/ivr_system_agent.py
diff --git a/examples/bank-ivr/mock_bank_service.py → ...s/telephony/bank-ivr/mock_bank_service.py b/examples/bank-ivr/mock_bank_service.py → ...s/telephony/bank-ivr/mock_bank_service.py
diff --git a/examples/bank-ivr/test_mock_bank_service.py → ...ephony/bank-ivr/test_mock_bank_service.py b/examples/bank-ivr/test_mock_bank_service.py → ...ephony/bank-ivr/test_mock_bank_service.py
diff --git a/examples/dtmf/basic_dtmf_agent.py → examples/telephony/basic_dtmf_agent.py b/examples/dtmf/basic_dtmf_agent.py → examples/telephony/basic_dtmf_agent.py
diff --git a/livekit-agents/livekit/agents/__init__.py b/livekit-agents/livekit/agents/__init__.py
@@ -92,6 +92,11 @@
     room_io,
     text_transforms,
 )
+from .voice.amd import (
+    AMD,
+    AMDCategory,
+    AMDResult,
+)
 from .voice.background_audio import AudioConfig, BackgroundAudioPlayer, BuiltinAudioClip, PlayHandle
 from .voice.room_io import RoomInputOptions, RoomIO, RoomOutputOptions
 from .voice.run_result import (
@@ -220,6 +225,9 @@ def __getattr__(name: str) -> typing.Any:
     "FunctionCallEvent",
     "FunctionCallOutputEvent",
     "AgentHandoffEvent",
+    "AMD",
+    "AMDCategory",
+    "AMDResult",
     "TurnHandlingOptions",
     "EndpointingOptions",
     "InterruptionOptions",

diff --git a/livekit-agents/livekit/agents/voice/agent_activity.py b/livekit-agents/livekit/agents/voice/agent_activity.py
@@ -152,6 +152,8 @@ def __init__(self, agent: Agent, sess: AgentSession) -> None:
         self._speech_tasks: list[asyncio.Task[Any]] = []
 
         self._preemptive_generation: _PreemptiveGeneration | None = None
+        self._authorization_allowed = asyncio.Event()
+        self._authorization_allowed.set()
 
         self._drain_blocked_tasks: list[asyncio.Task[Any]] = []
         self._mcp_tools: list[mcp.MCPToolset] = []
@@ -1075,6 +1077,12 @@ def _cancel_preemptive_generation(self) -> None:
             self._preemptive_generation.speech_handle._cancel()
             self._preemptive_generation = None
 
+    def _pause_authorization(self) -> None:
+        self._authorization_allowed.clear()
+
+    def _resume_authorization(self) -> None:
+        self._authorization_allowed.set()
+
     def _interrupt_background_speeches(self, force: bool = False) -> list[SpeechHandle]:
         interrupted_speeches: list[SpeechHandle] = []
         for speech in self._background_speeches:
@@ -1977,7 +1985,8 @@ async def _tts_task_impl(
 
         # See discussion in https://github.com/livekit/agents/issues/4432
         authorization_tasks: list[asyncio.Future[Any]] = [
-            asyncio.ensure_future(speech_handle._wait_for_authorization())
+            asyncio.ensure_future(speech_handle._wait_for_authorization()),
+            asyncio.ensure_future(self._authorization_allowed.wait()),
         ]
         if speech_handle.allow_interruptions:
             authorization_tasks.append(asyncio.ensure_future(self._user_silence_event.wait()))
@@ -2279,7 +2288,8 @@ async def _pipeline_reply_task_impl(
         self._session._update_agent_state("thinking")
 
         authorization_tasks: list[asyncio.Future[Any]] = [
-            asyncio.ensure_future(speech_handle._wait_for_authorization())
+            asyncio.ensure_future(speech_handle._wait_for_authorization()),
+            asyncio.ensure_future(self._authorization_allowed.wait()),
         ]
         if speech_handle.allow_interruptions:
             authorization_tasks.append(asyncio.ensure_future(self._user_silence_event.wait()))
@@ -2605,7 +2615,8 @@ async def _realtime_reply_task(
 
         # realtime_reply_task is called only when there's text input, native audio input is handled by _realtime_generation_task
         authorization_tasks: list[asyncio.Future[Any]] = [
-            asyncio.ensure_future(speech_handle._wait_for_authorization())
+            asyncio.ensure_future(speech_handle._wait_for_authorization()),
+            asyncio.ensure_future(self._authorization_allowed.wait()),
         ]
         if speech_handle.allow_interruptions:
             authorization_tasks.append(asyncio.ensure_future(self._user_silence_event.wait()))
@@ -2721,7 +2732,8 @@ async def _realtime_generation_task_impl(
         tool_ctx = llm.ToolContext(self.tools)
 
         authorization_tasks: list[asyncio.Future[Any]] = [
-            asyncio.ensure_future(speech_handle._wait_for_authorization())
+            asyncio.ensure_future(speech_handle._wait_for_authorization()),
+            asyncio.ensure_future(self._authorization_allowed.wait()),
         ]
         if speech_handle.allow_interruptions:
             authorization_tasks.append(asyncio.ensure_future(self._user_silence_event.wait()))

diff --git a/livekit-agents/livekit/agents/voice/agent_session.py b/livekit-agents/livekit/agents/voice/agent_session.py
@@ -43,6 +43,7 @@
 from ._utils import _set_participant_attributes
 from .agent import Agent, AgentTask
 from .agent_activity import AgentActivity
+from .amd import AMD
 from .events import (
     AgentEvent,
     AgentState,
@@ -450,8 +451,14 @@ def __init__(
         self._started_at: float | None = None
         self._usage_collector = ModelUsageCollector()
 
-        # ivr activity
+        # ivr and AMD
         self._ivr_activity: IVRActivity | None = None
+        self._amd: AMD | None = None
+
+    @property
+    def amd(self) -> AMD | None:
+        """The Answering Machine Detection (AMD) instance, or ``None`` if AMD is disabled."""
+        return self._amd
 
     def on(self, event: EventTypes, callback: Callable | None = None) -> Callable:
         if event == "metrics_collected" and callback is not None:
@@ -736,13 +743,8 @@ async def start(
                             tasks.append(task)
 
                 if self.options.ivr_detection:
-                    self._ivr_activity = IVRActivity(self)
-
-                    # inject the IVR activity tools into the session tools
-                    self._tools.extend(self._ivr_activity.tools)
-
                     tasks.append(
-                        asyncio.create_task(self._ivr_activity.start(), name="_ivr_activity_start")
+                        asyncio.create_task(self._start_ivr_detection(), name="_ivr_activity_start")
                     )
 
                 current_span.set_attribute(trace_types.ATTR_ROOM_NAME, job_ctx.room.name)
@@ -908,6 +910,10 @@ async def _aclose_impl(
             self._cancel_user_away_timer()
             self._on_aec_warmup_expired()  # always clear aec warmup when closing the session
 
+            if self._amd is not None:
+                await self._amd.aclose()
+                self._amd = None
+
             activity = self._activity
             while activity and isinstance(agent_task := activity.agent, AgentTask):
                 # notify AgentTask to complete and wait it to resume the parent agent
@@ -1059,6 +1065,28 @@ def update_options(
                 turn_detection=turn_detection,
             )
 
+    async def _start_ivr_detection(self, transcript: str | None = None) -> None:
+        """Start IVR detection on this session.
+
+        This method injects the DTMF tool and enables loop/silence detection,
+        allowing the agent to navigate IVR phone trees. Safe to call after AMD resolves.
+
+        Args:
+            transcript (str | None, optional): The transcript to start IVR detection with.
+        """
+        if self._ivr_activity is not None:
+            logger.warning("IVR detection already started, skipping")
+            return
+
+        self._ivr_activity = IVRActivity(self)
+        self._tools.extend(self._ivr_activity.tools)
+        await self._ivr_activity.start()
+        if transcript is not None:
+            logger.debug("IVR detection started with transcript", extra={"transcript": transcript})
+            self._ivr_activity._on_user_input_transcribed(
+                UserInputTranscribedEvent(transcript=transcript, is_final=True)
+            )
+
     def say(
         self,
         text: str | AsyncIterable[str],

diff --git a/livekit-agents/livekit/agents/voice/amd/__init__.py b/livekit-agents/livekit/agents/voice/amd/__init__.py
@@ -0,0 +1,4 @@
+from .classifier import AMDCategory, AMDResult
+from .detector import AMD
+
+__all__ = ["AMD", "AMDCategory", "AMDResult"]