Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions .github/workflows/extended.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ jobs:
# Check crate compiles and base cargo check passes
linux-build-lib:
name: linux build test
runs-on: ubuntu-latest
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=8,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
# note: do not use amd/rust container to preserve disk space
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
Expand All @@ -80,7 +81,9 @@ jobs:
source $HOME/.cargo/env
rustup toolchain install
- name: Install Protobuf Compiler
run: sudo apt-get install -y protobuf-compiler
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler
- name: Prepare cargo build
run: |
cargo check --profile ci --all-targets
Expand All @@ -90,9 +93,11 @@ jobs:
linux-test-extended:
name: cargo test 'extended_tests' (amd64)
needs: [linux-build-lib]
runs-on: ubuntu-latest
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=32,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }}
# spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing
# note: do not use amd/rust container to preserve disk space
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
Expand All @@ -106,7 +111,9 @@ jobs:
source $HOME/.cargo/env
rustup toolchain install
- name: Install Protobuf Compiler
run: sudo apt-get install -y protobuf-compiler
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler
# For debugging, test binaries can be large.
- name: Show available disk space
run: |
Expand All @@ -133,10 +140,11 @@ jobs:
# Check answers are correct when hash values collide
hash-collisions:
name: cargo test hash collisions (amd64)
runs-on: ubuntu-latest
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
container:
image: amd64/rust
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
Expand All @@ -154,10 +162,12 @@ jobs:

sqllogictest-sqlite:
name: "Run sqllogictests with the sqlite test suite"
runs-on: ubuntu-latest
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=48,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }}
# spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing
container:
image: amd64/rust
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
Expand Down
43 changes: 41 additions & 2 deletions datafusion/sqllogictest/bin/sqllogictests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ use datafusion::common::runtime::SpawnedTask;
use futures::FutureExt;
use std::ffi::OsStr;
use std::fs;
use std::io::{IsTerminal, stdout};
use std::io::{IsTerminal, stderr, stdout};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};

#[cfg(feature = "postgres")]
mod postgres_container;
Expand Down Expand Up @@ -110,6 +112,13 @@ async fn run_tests() -> Result<()> {

options.warn_on_ignored();

// Print parallelism info for debugging CI performance
eprintln!(
"Running with {} test threads (available parallelism: {})",
options.test_threads,
get_available_parallelism()
);

#[cfg(feature = "postgres")]
initialize_postgres_container(&options).await?;

Expand Down Expand Up @@ -147,6 +156,10 @@ async fn run_tests() -> Result<()> {
}

let num_tests = test_files.len();
// For CI environments without TTY, print progress periodically
let is_ci = !stderr().is_terminal();
let completed_count = Arc::new(AtomicUsize::new(0));

let errors: Vec<_> = futures::stream::iter(test_files)
.map(|test_file| {
let validator = if options.include_sqlite
Expand All @@ -162,10 +175,12 @@ async fn run_tests() -> Result<()> {
let filters = options.filters.clone();

let relative_path = test_file.relative_path.clone();
let relative_path_for_timing = test_file.relative_path.clone();

let currently_running_sql_tracker = CurrentlyExecutingSqlTracker::new();
let currently_running_sql_tracker_clone =
currently_running_sql_tracker.clone();
let file_start = Instant::now();
SpawnedTask::spawn(async move {
match (
options.postgres_runner,
Expand Down Expand Up @@ -227,14 +242,38 @@ async fn run_tests() -> Result<()> {
)
.await?
}
};
// Log slow files (>30s) for CI debugging
let elapsed = file_start.elapsed();
if elapsed.as_secs() > 30 {
eprintln!(
"Slow file: {} took {:.1}s",
relative_path_for_timing.display(),
elapsed.as_secs_f64()
);
}
Ok(()) as Result<()>
Ok(())
})
.join()
.map(move |result| (result, relative_path, currently_running_sql_tracker))
})
// run up to num_cpus streams in parallel
.buffer_unordered(options.test_threads)
.inspect({
let completed_count = Arc::clone(&completed_count);
move |_| {
let completed = completed_count.fetch_add(1, Ordering::Relaxed) + 1;
// In CI (no TTY), print progress every 10% or every 50 files
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// In CI (no TTY), print progress every 10% or every 50 files
// In CI (no TTY), print progress after every 50 files and at the end

there is no logic for every 10%

if is_ci && (completed.is_multiple_of(50) || completed == num_tests) {
eprintln!(
"Progress: {}/{} files completed ({:.0}%)",
completed,
num_tests,
(completed as f64 / num_tests as f64) * 100.0
);
}
}
})
.flat_map(|(result, test_file_path, current_sql)| {
// Filter out any Ok() leaving only the DataFusionErrors
futures::stream::iter(match result {
Expand Down