Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 37 additions & 13 deletions codex-rs/tui/src/chatwidget/realtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use codex_protocol::protocol::RealtimeConversationClosedEvent;
use codex_protocol::protocol::RealtimeConversationRealtimeEvent;
use codex_protocol::protocol::RealtimeConversationStartedEvent;
use codex_protocol::protocol::RealtimeEvent;
#[cfg(not(target_os = "linux"))]
use std::sync::atomic::AtomicUsize;

const REALTIME_CONVERSATION_PROMPT: &str = "You are in a realtime voice conversation in the Codex TUI. Respond conversationally and concisely.";

Expand All @@ -30,6 +32,10 @@ pub(super) struct RealtimeConversationUiState {
capture: Option<crate::voice::VoiceCapture>,
#[cfg(not(target_os = "linux"))]
audio_player: Option<crate::voice::RealtimeAudioPlayer>,
#[cfg(not(target_os = "linux"))]
// Shared queue depth lets capture suppress echoed speaker audio without
// taking the playback queue lock from the input callback.
playback_queued_samples: Arc<AtomicUsize>,
}

impl RealtimeConversationUiState {
Expand Down Expand Up @@ -192,16 +198,15 @@ impl ChatWidget {
None
}

fn realtime_footer_hint_items() -> Vec<(String, String)> {
vec![("/realtime".to_string(), "stop live voice".to_string())]
}

pub(super) fn start_realtime_conversation(&mut self) {
self.realtime_conversation.phase = RealtimeConversationPhase::Starting;
self.realtime_conversation.requested_close = false;
self.realtime_conversation.session_id = None;
self.realtime_conversation.warned_audio_only_submission = false;
self.set_footer_hint_override(Some(Self::realtime_footer_hint_items()));
self.set_footer_hint_override(Some(vec![(
"/realtime".to_string(),
"stop live voice".to_string(),
)]));
self.submit_op(Op::RealtimeConversationStart(ConversationStartParams {
prompt: REALTIME_CONVERSATION_PROMPT.to_string(),
session_id: None,
Expand Down Expand Up @@ -251,7 +256,10 @@ impl ChatWidget {
self.realtime_conversation.phase = RealtimeConversationPhase::Active;
self.realtime_conversation.session_id = ev.session_id;
self.realtime_conversation.warned_audio_only_submission = false;
self.set_footer_hint_override(Some(Self::realtime_footer_hint_items()));
self.set_footer_hint_override(Some(vec![(
"/realtime".to_string(),
"stop live voice".to_string(),
)]));
self.start_realtime_local_audio();
self.request_redraw();
}
Expand All @@ -264,11 +272,17 @@ impl ChatWidget {
RealtimeEvent::SessionUpdated { session_id, .. } => {
self.realtime_conversation.session_id = Some(session_id);
}
RealtimeEvent::InputAudioSpeechStarted(_) => {}
RealtimeEvent::InputAudioSpeechStarted(_) | RealtimeEvent::ResponseCancelled(_) => {
#[cfg(not(target_os = "linux"))]
if let Some(player) = &self.realtime_conversation.audio_player {
// Once the server detects user speech or the current response is cancelled,
// any buffered assistant audio is stale and should stop gating mic input.
player.clear();
}
}
RealtimeEvent::InputTranscriptDelta(_) => {}
RealtimeEvent::OutputTranscriptDelta(_) => {}
RealtimeEvent::AudioOut(frame) => self.enqueue_realtime_audio_out(&frame),
RealtimeEvent::ResponseCancelled(_) => {}
RealtimeEvent::ConversationItemAdded(_item) => {}
RealtimeEvent::ConversationItemDone { .. } => {}
RealtimeEvent::HandoffRequested(_) => {}
Expand Down Expand Up @@ -296,8 +310,11 @@ impl ChatWidget {
#[cfg(not(target_os = "linux"))]
{
if self.realtime_conversation.audio_player.is_none() {
self.realtime_conversation.audio_player =
crate::voice::RealtimeAudioPlayer::start(&self.config).ok();
self.realtime_conversation.audio_player = crate::voice::RealtimeAudioPlayer::start(
&self.config,
Arc::clone(&self.realtime_conversation.playback_queued_samples),
)
.ok();
}
if let Some(player) = &self.realtime_conversation.audio_player
&& let Err(err) = player.enqueue_frame(frame)
Expand All @@ -324,6 +341,7 @@ impl ChatWidget {
let capture = match crate::voice::VoiceCapture::start_realtime(
&self.config,
self.app_event_tx.clone(),
Arc::clone(&self.realtime_conversation.playback_queued_samples),
) {
Ok(capture) => capture,
Err(err) => {
Expand All @@ -342,8 +360,11 @@ impl ChatWidget {
self.realtime_conversation.capture_stop_flag = Some(stop_flag.clone());
self.realtime_conversation.capture = Some(capture);
if self.realtime_conversation.audio_player.is_none() {
self.realtime_conversation.audio_player =
crate::voice::RealtimeAudioPlayer::start(&self.config).ok();
self.realtime_conversation.audio_player = crate::voice::RealtimeAudioPlayer::start(
&self.config,
Arc::clone(&self.realtime_conversation.playback_queued_samples),
)
.ok();
}

std::thread::spawn(move || {
Expand Down Expand Up @@ -381,7 +402,10 @@ impl ChatWidget {
}
RealtimeAudioDeviceKind::Speaker => {
self.stop_realtime_speaker();
match crate::voice::RealtimeAudioPlayer::start(&self.config) {
match crate::voice::RealtimeAudioPlayer::start(
&self.config,
Arc::clone(&self.realtime_conversation.playback_queued_samples),
) {
Ok(player) => {
self.realtime_conversation.audio_player = Some(player);
}
Expand Down
12 changes: 10 additions & 2 deletions codex-rs/tui/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ mod voice {
use std::sync::Mutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU16;
use std::sync::atomic::AtomicUsize;

pub struct RecordedAudio {
pub data: Vec<i16>,
Expand All @@ -158,7 +159,11 @@ mod voice {
Err("voice input is unavailable in this build".to_string())
}

pub fn start_realtime(_config: &Config, _tx: AppEventSender) -> Result<Self, String> {
pub fn start_realtime(
_config: &Config,
_tx: AppEventSender,
_playback_queued_samples: Arc<AtomicUsize>,
) -> Result<Self, String> {
Err("voice input is unavailable in this build".to_string())
}

Expand Down Expand Up @@ -198,7 +203,10 @@ mod voice {
}

impl RealtimeAudioPlayer {
pub(crate) fn start(_config: &Config) -> Result<Self, String> {
pub(crate) fn start(
_config: &Config,
_queued_samples: Arc<AtomicUsize>,
) -> Result<Self, String> {
Err("voice output is unavailable in this build".to_string())
}

Expand Down
Loading
Loading