From 3ca5cd04f2787e1f41145d7b642d5526c9a17f84 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Sun, 14 Sep 2025 14:50:48 -0700 Subject: [PATCH 1/4] Fix flaky --- .github/workflows/rust-ci.yml | 74 ++++++++++++++-------------- codex-rs/core/tests/suite/compact.rs | 57 +++++++++++++++++++-- codex-rs/core/tests/suite/review.rs | 37 +++++++++++--- 3 files changed, 121 insertions(+), 47 deletions(-) diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 280939c611d..18ffe887999 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -85,7 +85,7 @@ jobs: # --- CI to validate on different os/targets -------------------------------- lint_build_test: - name: ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }} + name: ${{ matrix.runner }} - ${{ matrix.target }} [run ${{ matrix.repeat }}]${{ matrix.profile == 'release' && ' (release)' || '' }} runs-on: ${{ matrix.runner }} timeout-minutes: 30 needs: changed @@ -99,47 +99,49 @@ jobs: fail-fast: false matrix: include: - - runner: macos-14 - target: aarch64-apple-darwin - profile: dev - - runner: macos-14 - target: x86_64-apple-darwin - profile: dev - - runner: ubuntu-24.04 - target: x86_64-unknown-linux-musl - profile: dev - - runner: ubuntu-24.04 - target: x86_64-unknown-linux-gnu - profile: dev - - runner: ubuntu-24.04-arm - target: aarch64-unknown-linux-musl - profile: dev - - runner: ubuntu-24.04-arm - target: aarch64-unknown-linux-gnu - profile: dev + # - runner: macos-14 + # target: aarch64-apple-darwin + # profile: dev + # - runner: macos-14 + # target: x86_64-apple-darwin + # profile: dev + # - runner: ubuntu-24.04 + # target: x86_64-unknown-linux-musl + # profile: dev + # - runner: ubuntu-24.04 + # target: x86_64-unknown-linux-gnu + # profile: dev + # - runner: ubuntu-24.04-arm + # target: aarch64-unknown-linux-musl + # profile: dev + # - runner: ubuntu-24.04-arm + # target: aarch64-unknown-linux-gnu + # profile: dev - runner: windows-latest target: x86_64-pc-windows-msvc profile: dev - - runner: windows-11-arm - target: aarch64-pc-windows-msvc - profile: dev + # - runner: windows-11-arm + # target: aarch64-pc-windows-msvc + # profile: dev # Also run representative release builds on Mac and Linux because # there could be release-only build errors we want to catch. # Hopefully this also pre-populates the build cache to speed up # releases. - - runner: macos-14 - target: aarch64-apple-darwin - profile: release - - runner: ubuntu-24.04 - target: x86_64-unknown-linux-musl - profile: release - - runner: windows-latest - target: x86_64-pc-windows-msvc - profile: release - - runner: windows-11-arm - target: aarch64-pc-windows-msvc - profile: release + # - runner: macos-14 + # target: aarch64-apple-darwin + # profile: release + # - runner: ubuntu-24.04 + # target: x86_64-unknown-linux-musl + # profile: release + # - runner: windows-latest + # target: x86_64-pc-windows-msvc + # profile: release + # - runner: windows-11-arm + # target: aarch64-pc-windows-msvc + # profile: release + + repeat: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] steps: - uses: actions/checkout@v5 @@ -165,7 +167,7 @@ jobs: - name: cargo clippy id: clippy - run: cargo clippy --target ${{ matrix.target }} --all-features --tests --profile ${{ matrix.profile }} -- -D warnings + run: cargo clippy --target ${{ matrix.target }} -p codex-core --tests --profile ${{ matrix.profile }} # Running `cargo build` from the workspace root builds the workspace using # the union of all features from third-party crates. This can mask errors @@ -190,7 +192,7 @@ jobs: # Tests take too long for release builds to run them on every PR. if: ${{ matrix.profile != 'release' }} continue-on-error: true - run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} + run: cargo nextest run -p codex-core --no-fail-fast --target ${{ matrix.target }} env: RUST_BACKTRACE: 1 diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 72f4021e83c..a8bd9bfbd47 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -1,6 +1,7 @@ #![expect(clippy::unwrap_used)] use codex_core::CodexAuth; +use codex_core::CodexConversation; use codex_core::ConversationManager; use codex_core::ModelProviderInfo; use codex_core::NewConversation; @@ -16,6 +17,8 @@ use core_test_support::load_default_config_for_test; use core_test_support::wait_for_event; use serde_json::Value; use tempfile::TempDir; +use tokio::time::Duration; +use tokio::time::timeout; use wiremock::BodyPrintLimit; use wiremock::Mock; use wiremock::MockServer; @@ -126,6 +129,22 @@ async fn start_mock_server() -> MockServer { .await } +async fn wait_for_non_auto_task_complete(codex: &CodexConversation, wait_time: Duration) { + loop { + let wait_budget = wait_time.max(Duration::from_secs(5)); + let event = match timeout(wait_budget, codex.next_event()).await { + Ok(Ok(ev)) => ev, + Ok(Err(_)) => panic!("stream ended unexpectedly"), + Err(_) => panic!("timeout waiting for event"), + }; + if let EventMsg::TaskComplete(_) = &event.msg + && !event.id.starts_with("auto-compact-") + { + break; + } + } +} + pub(super) const FIRST_REPLY: &str = "FIRST_REPLY"; pub(super) const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT"; pub(super) const SUMMARIZE_TRIGGER: &str = "Start Summarization"; @@ -366,7 +385,8 @@ async fn summarize_context_three_requests_and_instructions() { ); } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))] +#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn auto_compact_runs_after_token_limit_hit() { if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { println!( @@ -453,7 +473,8 @@ async fn auto_compact_runs_after_token_limit_hit() { }) .await .unwrap(); - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + wait_for_non_auto_task_complete(&codex, Duration::from_secs(20)).await; codex .submit(Op::UserInput { @@ -463,13 +484,39 @@ async fn auto_compact_runs_after_token_limit_hit() { }) .await .unwrap(); - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + wait_for_non_auto_task_complete(&codex, Duration::from_secs(20)).await; // wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; let requests = server.received_requests().await.unwrap(); - assert_eq!(requests.len(), 3, "auto compact should add a third request"); + assert!( + requests.len() >= 3, + "auto compact should add at least a third request, got {}", + requests.len() + ); + let is_auto_compact = |req: &wiremock::Request| { + std::str::from_utf8(&req.body) + .unwrap_or("") + .contains("You have exceeded the maximum number of tokens") + }; + let auto_compact_count = requests.iter().filter(|req| is_auto_compact(req)).count(); + assert_eq!( + auto_compact_count, 1, + "expected exactly one auto compact request" + ); + let auto_compact_index = requests + .iter() + .enumerate() + .find_map(|(idx, req)| is_auto_compact(req).then_some(idx)) + .expect("auto compact request missing"); + assert_eq!( + auto_compact_index, 2, + "auto compact should add a third request" + ); - let body3 = requests[2].body_json::().unwrap(); + let body3 = requests[auto_compact_index] + .body_json::() + .unwrap(); let instructions = body3 .get("instructions") .and_then(|v| v.as_str()) diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index 21d447e25d2..cf5d650ee86 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -16,6 +16,7 @@ use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use core_test_support::load_default_config_for_test; use core_test_support::load_sse_fixture_with_id_from_str; use core_test_support::wait_for_event; +use core_test_support::wait_for_event_with_timeout; use pretty_assertions::assert_eq; use std::path::PathBuf; use std::sync::Arc; @@ -118,7 +119,8 @@ async fn review_op_emits_lifecycle_and_review_output() { /// When the model returns plain text that is not JSON, ensure the child /// lifecycle still occurs and the plain text is surfaced via /// ExitedReviewMode(Some(..)) as the overall_explanation. -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))] +#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn review_op_with_plain_text_emits_review_fallback() { if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { println!( @@ -168,7 +170,8 @@ async fn review_op_with_plain_text_emits_review_fallback() { /// When the model returns structured JSON in a review, ensure no AgentMessage /// is emitted; the UI consumes the structured result via ExitedReviewMode. -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))] +#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn review_does_not_emit_agent_message_on_structured_output() { if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { println!( @@ -293,7 +296,8 @@ async fn review_uses_custom_review_model_from_config() { /// When a review session begins, it must not prepend prior chat history from /// the parent session. The request `input` should contain only the review /// prompt from the user. -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))] +#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn review_input_isolated_from_parent_history() { if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { println!( @@ -393,9 +397,26 @@ async fn review_input_isolated_from_parent_history() { .await .unwrap(); - let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await; - let _closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(None))).await; - let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + let _entered = wait_for_event_with_timeout( + &codex, + |ev| matches!(ev, EventMsg::EnteredReviewMode(_)), + tokio::time::Duration::from_secs(20), + ) + .await; + + let _closed = wait_for_event_with_timeout( + &codex, + |ev| matches!(ev, EventMsg::ExitedReviewMode(_)), // or ExitedReviewMode(None) as appropriate + tokio::time::Duration::from_secs(20), + ) + .await; + + let _complete = wait_for_event_with_timeout( + &codex, + |ev| matches!(ev, EventMsg::TaskComplete(_)), + tokio::time::Duration::from_secs(20), + ) + .await; // Assert the request `input` contains only the single review user message. let request = &server.received_requests().await.unwrap()[0]; @@ -507,6 +528,8 @@ async fn start_responses_server_with_sse(sse_raw: &str, expected_requests: usize .respond_with( ResponseTemplate::new(200) .insert_header("content-type", "text/event-stream") + .insert_header("connection", "close") + .insert_header("content-length", sse.len().to_string()) .set_body_raw(sse.clone(), "text/event-stream"), ) .expect(expected_requests as u64) @@ -527,6 +550,8 @@ where { let model_provider = ModelProviderInfo { base_url: Some(format!("{}/v1", server.uri())), + // Give Windows runners breathing room. + stream_idle_timeout_ms: Some(if cfg!(windows) { 10_000 } else { 1_000 }), ..built_in_model_providers()["openai"].clone() }; let mut config = load_default_config_for_test(codex_home); From c5ad3cb029dfe5296a2a86901413752263f19b73 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Sun, 14 Sep 2025 14:57:28 -0700 Subject: [PATCH 2/4] rev --- codex-rs/core/tests/suite/review.rs | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index cf5d650ee86..b79c59ba5f1 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -22,6 +22,7 @@ use std::path::PathBuf; use std::sync::Arc; use tempfile::TempDir; use tokio::io::AsyncWriteExt as _; +use tokio::time::Duration; use uuid::Uuid; use wiremock::Mock; use wiremock::MockServer; @@ -397,26 +398,9 @@ async fn review_input_isolated_from_parent_history() { .await .unwrap(); - let _entered = wait_for_event_with_timeout( - &codex, - |ev| matches!(ev, EventMsg::EnteredReviewMode(_)), - tokio::time::Duration::from_secs(20), - ) - .await; - - let _closed = wait_for_event_with_timeout( - &codex, - |ev| matches!(ev, EventMsg::ExitedReviewMode(_)), // or ExitedReviewMode(None) as appropriate - tokio::time::Duration::from_secs(20), - ) - .await; - - let _complete = wait_for_event_with_timeout( - &codex, - |ev| matches!(ev, EventMsg::TaskComplete(_)), - tokio::time::Duration::from_secs(20), - ) - .await; + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await; + let _closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(None))).await; + let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; // Assert the request `input` contains only the single review user message. let request = &server.received_requests().await.unwrap()[0]; @@ -528,8 +512,6 @@ async fn start_responses_server_with_sse(sse_raw: &str, expected_requests: usize .respond_with( ResponseTemplate::new(200) .insert_header("content-type", "text/event-stream") - .insert_header("connection", "close") - .insert_header("content-length", sse.len().to_string()) .set_body_raw(sse.clone(), "text/event-stream"), ) .expect(expected_requests as u64) @@ -550,8 +532,6 @@ where { let model_provider = ModelProviderInfo { base_url: Some(format!("{}/v1", server.uri())), - // Give Windows runners breathing room. - stream_idle_timeout_ms: Some(if cfg!(windows) { 10_000 } else { 1_000 }), ..built_in_model_providers()["openai"].clone() }; let mut config = load_default_config_for_test(codex_home); From d6d52b5b873d400dd7d6a5524c125e2ab377982b Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Sun, 14 Sep 2025 15:07:37 -0700 Subject: [PATCH 3/4] Clean up --- codex-rs/core/tests/suite/compact.rs | 23 ++--------------------- codex-rs/core/tests/suite/review.rs | 2 -- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index a8bd9bfbd47..fdf1a4e48bd 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -1,7 +1,6 @@ #![expect(clippy::unwrap_used)] use codex_core::CodexAuth; -use codex_core::CodexConversation; use codex_core::ConversationManager; use codex_core::ModelProviderInfo; use codex_core::NewConversation; @@ -17,8 +16,6 @@ use core_test_support::load_default_config_for_test; use core_test_support::wait_for_event; use serde_json::Value; use tempfile::TempDir; -use tokio::time::Duration; -use tokio::time::timeout; use wiremock::BodyPrintLimit; use wiremock::Mock; use wiremock::MockServer; @@ -129,22 +126,6 @@ async fn start_mock_server() -> MockServer { .await } -async fn wait_for_non_auto_task_complete(codex: &CodexConversation, wait_time: Duration) { - loop { - let wait_budget = wait_time.max(Duration::from_secs(5)); - let event = match timeout(wait_budget, codex.next_event()).await { - Ok(Ok(ev)) => ev, - Ok(Err(_)) => panic!("stream ended unexpectedly"), - Err(_) => panic!("timeout waiting for event"), - }; - if let EventMsg::TaskComplete(_) = &event.msg - && !event.id.starts_with("auto-compact-") - { - break; - } - } -} - pub(super) const FIRST_REPLY: &str = "FIRST_REPLY"; pub(super) const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT"; pub(super) const SUMMARIZE_TRIGGER: &str = "Start Summarization"; @@ -474,7 +455,7 @@ async fn auto_compact_runs_after_token_limit_hit() { .await .unwrap(); - wait_for_non_auto_task_complete(&codex, Duration::from_secs(20)).await; + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; codex .submit(Op::UserInput { @@ -485,7 +466,7 @@ async fn auto_compact_runs_after_token_limit_hit() { .await .unwrap(); - wait_for_non_auto_task_complete(&codex, Duration::from_secs(20)).await; + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; // wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; let requests = server.received_requests().await.unwrap(); diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index b79c59ba5f1..9891c536e70 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -16,13 +16,11 @@ use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use core_test_support::load_default_config_for_test; use core_test_support::load_sse_fixture_with_id_from_str; use core_test_support::wait_for_event; -use core_test_support::wait_for_event_with_timeout; use pretty_assertions::assert_eq; use std::path::PathBuf; use std::sync::Arc; use tempfile::TempDir; use tokio::io::AsyncWriteExt as _; -use tokio::time::Duration; use uuid::Uuid; use wiremock::Mock; use wiremock::MockServer; From a898da72399e55d2f974d4a597c0e22a3999c4a9 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Sun, 14 Sep 2025 16:11:47 -0700 Subject: [PATCH 4/4] Add comments, and revert rust-ci changes. --- .github/workflows/rust-ci.yml | 74 ++++++++++++++-------------- codex-rs/core/tests/suite/compact.rs | 1 + codex-rs/core/tests/suite/review.rs | 3 ++ 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 18ffe887999..280939c611d 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -85,7 +85,7 @@ jobs: # --- CI to validate on different os/targets -------------------------------- lint_build_test: - name: ${{ matrix.runner }} - ${{ matrix.target }} [run ${{ matrix.repeat }}]${{ matrix.profile == 'release' && ' (release)' || '' }} + name: ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }} runs-on: ${{ matrix.runner }} timeout-minutes: 30 needs: changed @@ -99,49 +99,47 @@ jobs: fail-fast: false matrix: include: - # - runner: macos-14 - # target: aarch64-apple-darwin - # profile: dev - # - runner: macos-14 - # target: x86_64-apple-darwin - # profile: dev - # - runner: ubuntu-24.04 - # target: x86_64-unknown-linux-musl - # profile: dev - # - runner: ubuntu-24.04 - # target: x86_64-unknown-linux-gnu - # profile: dev - # - runner: ubuntu-24.04-arm - # target: aarch64-unknown-linux-musl - # profile: dev - # - runner: ubuntu-24.04-arm - # target: aarch64-unknown-linux-gnu - # profile: dev + - runner: macos-14 + target: aarch64-apple-darwin + profile: dev + - runner: macos-14 + target: x86_64-apple-darwin + profile: dev + - runner: ubuntu-24.04 + target: x86_64-unknown-linux-musl + profile: dev + - runner: ubuntu-24.04 + target: x86_64-unknown-linux-gnu + profile: dev + - runner: ubuntu-24.04-arm + target: aarch64-unknown-linux-musl + profile: dev + - runner: ubuntu-24.04-arm + target: aarch64-unknown-linux-gnu + profile: dev - runner: windows-latest target: x86_64-pc-windows-msvc profile: dev - # - runner: windows-11-arm - # target: aarch64-pc-windows-msvc - # profile: dev + - runner: windows-11-arm + target: aarch64-pc-windows-msvc + profile: dev # Also run representative release builds on Mac and Linux because # there could be release-only build errors we want to catch. # Hopefully this also pre-populates the build cache to speed up # releases. - # - runner: macos-14 - # target: aarch64-apple-darwin - # profile: release - # - runner: ubuntu-24.04 - # target: x86_64-unknown-linux-musl - # profile: release - # - runner: windows-latest - # target: x86_64-pc-windows-msvc - # profile: release - # - runner: windows-11-arm - # target: aarch64-pc-windows-msvc - # profile: release - - repeat: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] + - runner: macos-14 + target: aarch64-apple-darwin + profile: release + - runner: ubuntu-24.04 + target: x86_64-unknown-linux-musl + profile: release + - runner: windows-latest + target: x86_64-pc-windows-msvc + profile: release + - runner: windows-11-arm + target: aarch64-pc-windows-msvc + profile: release steps: - uses: actions/checkout@v5 @@ -167,7 +165,7 @@ jobs: - name: cargo clippy id: clippy - run: cargo clippy --target ${{ matrix.target }} -p codex-core --tests --profile ${{ matrix.profile }} + run: cargo clippy --target ${{ matrix.target }} --all-features --tests --profile ${{ matrix.profile }} -- -D warnings # Running `cargo build` from the workspace root builds the workspace using # the union of all features from third-party crates. This can mask errors @@ -192,7 +190,7 @@ jobs: # Tests take too long for release builds to run them on every PR. if: ${{ matrix.profile != 'release' }} continue-on-error: true - run: cargo nextest run -p codex-core --no-fail-fast --target ${{ matrix.target }} + run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} env: RUST_BACKTRACE: 1 diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index fdf1a4e48bd..361315f7243 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -366,6 +366,7 @@ async fn summarize_context_three_requests_and_instructions() { ); } +// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts. #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))] #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn auto_compact_runs_after_token_limit_hit() { diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index 9891c536e70..26e0f1107a5 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -118,6 +118,7 @@ async fn review_op_emits_lifecycle_and_review_output() { /// When the model returns plain text that is not JSON, ensure the child /// lifecycle still occurs and the plain text is surfaced via /// ExitedReviewMode(Some(..)) as the overall_explanation. +// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts. #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))] #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn review_op_with_plain_text_emits_review_fallback() { @@ -169,6 +170,7 @@ async fn review_op_with_plain_text_emits_review_fallback() { /// When the model returns structured JSON in a review, ensure no AgentMessage /// is emitted; the UI consumes the structured result via ExitedReviewMode. +// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts. #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))] #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn review_does_not_emit_agent_message_on_structured_output() { @@ -295,6 +297,7 @@ async fn review_uses_custom_review_model_from_config() { /// When a review session begins, it must not prepend prior chat history from /// the parent session. The request `input` should contain only the review /// prompt from the user. +// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts. #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))] #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn review_input_isolated_from_parent_history() {