diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index e9eb27dd96527..9768d475c9e8c 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -66,9 +66,10 @@ jobs: # Check crate compiles and base cargo check passes linux-build-lib: name: linux build test - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=8,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }} # note: do not use amd/rust container to preserve disk space steps: + - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3 - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push @@ -80,7 +81,9 @@ jobs: source $HOME/.cargo/env rustup toolchain install - name: Install Protobuf Compiler - run: sudo apt-get install -y protobuf-compiler + run: | + sudo apt-get update + sudo apt-get install -y protobuf-compiler - name: Prepare cargo build run: | cargo check --profile ci --all-targets @@ -90,9 +93,11 @@ jobs: linux-test-extended: name: cargo test 'extended_tests' (amd64) needs: [linux-build-lib] - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=32,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }} + # spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing # note: do not use amd/rust container to preserve disk space steps: + - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3 - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push @@ -106,7 +111,9 @@ jobs: source $HOME/.cargo/env rustup toolchain install - name: Install Protobuf Compiler - run: sudo apt-get install -y protobuf-compiler + run: | + sudo apt-get update + sudo apt-get install -y protobuf-compiler # For debugging, test binaries can be large. - name: Show available disk space run: | @@ -133,10 +140,11 @@ jobs: # Check answers are correct when hash values collide hash-collisions: name: cargo test hash collisions (amd64) - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }} container: image: amd64/rust steps: + - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3 - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push @@ -154,10 +162,12 @@ jobs: sqllogictest-sqlite: name: "Run sqllogictests with the sqlite test suite" - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=48,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }} + # spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing container: image: amd64/rust steps: + - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3 - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index 3571377354eb4..463b7b03a760c 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -44,9 +44,11 @@ use datafusion::common::runtime::SpawnedTask; use futures::FutureExt; use std::ffi::OsStr; use std::fs; -use std::io::{IsTerminal, stdout}; +use std::io::{IsTerminal, stderr, stdout}; use std::path::{Path, PathBuf}; use std::str::FromStr; +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; #[cfg(feature = "postgres")] mod postgres_container; @@ -110,6 +112,13 @@ async fn run_tests() -> Result<()> { options.warn_on_ignored(); + // Print parallelism info for debugging CI performance + eprintln!( + "Running with {} test threads (available parallelism: {})", + options.test_threads, + get_available_parallelism() + ); + #[cfg(feature = "postgres")] initialize_postgres_container(&options).await?; @@ -147,6 +156,10 @@ async fn run_tests() -> Result<()> { } let num_tests = test_files.len(); + // For CI environments without TTY, print progress periodically + let is_ci = !stderr().is_terminal(); + let completed_count = Arc::new(AtomicUsize::new(0)); + let errors: Vec<_> = futures::stream::iter(test_files) .map(|test_file| { let validator = if options.include_sqlite @@ -162,10 +175,12 @@ async fn run_tests() -> Result<()> { let filters = options.filters.clone(); let relative_path = test_file.relative_path.clone(); + let relative_path_for_timing = test_file.relative_path.clone(); let currently_running_sql_tracker = CurrentlyExecutingSqlTracker::new(); let currently_running_sql_tracker_clone = currently_running_sql_tracker.clone(); + let file_start = Instant::now(); SpawnedTask::spawn(async move { match ( options.postgres_runner, @@ -227,14 +242,38 @@ async fn run_tests() -> Result<()> { ) .await? } + }; + // Log slow files (>30s) for CI debugging + let elapsed = file_start.elapsed(); + if elapsed.as_secs() > 30 { + eprintln!( + "Slow file: {} took {:.1}s", + relative_path_for_timing.display(), + elapsed.as_secs_f64() + ); } - Ok(()) as Result<()> + Ok(()) }) .join() .map(move |result| (result, relative_path, currently_running_sql_tracker)) }) // run up to num_cpus streams in parallel .buffer_unordered(options.test_threads) + .inspect({ + let completed_count = Arc::clone(&completed_count); + move |_| { + let completed = completed_count.fetch_add(1, Ordering::Relaxed) + 1; + // In CI (no TTY), print progress every 10% or every 50 files + if is_ci && (completed.is_multiple_of(50) || completed == num_tests) { + eprintln!( + "Progress: {}/{} files completed ({:.0}%)", + completed, + num_tests, + (completed as f64 / num_tests as f64) * 100.0 + ); + } + } + }) .flat_map(|(result, test_file_path, current_sql)| { // Filter out any Ok() leaving only the DataFusionErrors futures::stream::iter(match result {