diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 84eb8bd596e14..1b7958542966c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -314,18 +314,6 @@ jobs: fetch-depth: 1 - name: Setup Rust toolchain run: rustup toolchain install stable - - name: Setup Minio - S3-compatible storage - run: | - docker run -d --name minio-container \ - -p 9000:9000 \ - -e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ - -v $(pwd)/datafusion/core/tests/data:/source quay.io/minio/minio \ - server /data - docker exec minio-container /bin/sh -c "\ - mc ready local - mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ - mc mb localminio/data && \ - mc cp -r /source/* localminio/data" - name: Run tests (excluding doctests) env: RUST_BACKTRACE: 1 @@ -337,9 +325,6 @@ jobs: run: cargo test --profile ci -p datafusion-cli --lib --tests --bins - name: Verify Working Directory Clean run: git diff --exit-code - - name: Minio Output - if: ${{ !cancelled() }} - run: docker logs minio-container linux-test-example: diff --git a/Cargo.lock b/Cargo.lock index e9750cc0e2a7b..cb13a55b56dae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1983,6 +1983,8 @@ dependencies = [ "regex", "rstest", "rustyline", + "testcontainers", + "testcontainers-modules", "tokio", "url", ] diff --git a/Cargo.toml b/Cargo.toml index 434e608b49dc2..29534cbdb38b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -174,6 +174,8 @@ rstest = "0.25.0" serde_json = "1" sqlparser = { version = "0.55.0", default-features = false, features = ["std", "visitor"] } tempfile = "3" +testcontainers = { version = "0.24", features = ["default"] } +testcontainers-modules = { version = "0.12" } tokio = { version = "1.46", features = ["macros", "rt", "sync"] } url = "2.5.4" diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index 4b464dffc57ce..3e72214f6c226 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -29,47 +29,26 @@ cargo test ## Running Storage Integration Tests -By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and -then provide the necessary configuration for that object store. +By default, storage integration tests are not run. These test use the `testcontainers` crate to start up a local MinIO server using docker on port 9000. -For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used. - -### AWS - -To test the S3 integration against [Minio](https://github.com/minio/minio) - -First start up a container with Minio and load test files. +To run them you will need to set `TEST_STORAGE_INTEGRATION`: ```shell -docker run -d \ - --name datafusion-test-minio \ - -p 9000:9000 \ - -e MINIO_ROOT_USER=TEST-DataFusionLogin \ - -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ - -v $(pwd)/../datafusion/core/tests/data:/source \ - quay.io/minio/minio server /data - -docker exec datafusion-test-minio /bin/sh -c "\ - mc ready local - mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ - mc mb localminio/data && \ - mc cp -r /source/* localminio/data" +TEST_STORAGE_INTEGRATION=1 cargo test ``` -Setup environment +For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used. -```shell -export TEST_STORAGE_INTEGRATION=1 -export AWS_ACCESS_KEY_ID=TEST-DataFusionLogin -export AWS_SECRET_ACCESS_KEY=TEST-DataFusionPassword -export AWS_ENDPOINT=http://127.0.0.1:9000 -export AWS_ALLOW_HTTP=true -``` +### AWS -Note that `AWS_ENDPOINT` is set without slash at the end. +S3 integration is tested against [Minio](https://github.com/minio/minio) with [TestContainers](https://github.com/testcontainers/testcontainers-rs) +This requires Docker to be running on your machine and port 9000 to be free. -Run tests +If you see an error mentioning "failed to load IMDS session token" such as -```shell -cargo test -``` +> ---- object_storage::tests::s3_object_store_builder_resolves_region_when_none_provided stdout ---- +> Error: ObjectStore(Generic { store: "S3", source: "Error getting credentials from provider: an error occurred while loading credentials: failed to load IMDS session token" }) + +You my need to disable trying to fetch S3 credentials from the environment using the `AWS_EC2_METADATA_DISABLED`, for example: + +> $ AWS_EC2_METADATA_DISABLED=true TEST_STORAGE_INTEGRATION=1 cargo test diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index f4e33fe2c19c3..28e56f493f659 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -72,3 +72,5 @@ insta = { workspace = true } insta-cmd = "0.6.0" predicates = "3.0" rstest = { workspace = true } +testcontainers = { workspace = true } +testcontainers-modules = { workspace = true, features = ["minio"] } diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 108651281dfcc..e6ba7d9a9d87b 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -21,7 +21,12 @@ use rstest::rstest; use insta::{glob, Settings}; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; +use std::path::PathBuf; use std::{env, fs}; +use testcontainers::core::{CmdWaitFor, ExecCommand, Mount}; +use testcontainers::runners::AsyncRunner; +use testcontainers::{ContainerAsync, ImageExt, TestcontainersError}; +use testcontainers_modules::minio; fn cli() -> Command { Command::new(get_cargo_bin("datafusion-cli")) @@ -35,6 +40,83 @@ fn make_settings() -> Settings { settings } +async fn setup_minio_container() -> ContainerAsync { + const MINIO_ROOT_USER: &str = "TEST-DataFusionLogin"; + const MINIO_ROOT_PASSWORD: &str = "TEST-DataFusionPassword"; + + let data_path = + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../datafusion/core/tests/data"); + + let absolute_data_path = data_path + .canonicalize() + .expect("Failed to get absolute path for test data"); + + let container = minio::MinIO::default() + .with_env_var("MINIO_ROOT_USER", MINIO_ROOT_USER) + .with_env_var("MINIO_ROOT_PASSWORD", MINIO_ROOT_PASSWORD) + .with_mount(Mount::bind_mount( + absolute_data_path.to_str().unwrap(), + "/source", + )) + .start() + .await; + + match container { + Ok(container) => { + // We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency + let commands = [ + ExecCommand::new(["/usr/bin/mc", "ready", "local"]), + ExecCommand::new([ + "/usr/bin/mc", + "alias", + "set", + "localminio", + "http://localhost:9000", + MINIO_ROOT_USER, + MINIO_ROOT_PASSWORD, + ]), + ExecCommand::new(["/usr/bin/mc", "mb", "localminio/data"]), + ExecCommand::new([ + "/usr/bin/mc", + "cp", + "-r", + "/source/", + "localminio/data/", + ]), + ]; + + for command in commands { + let command = + command.with_cmd_ready_condition(CmdWaitFor::Exit { code: Some(0) }); + + let cmd_ref = format!("{command:?}"); + + if let Err(e) = container.exec(command).await { + let stdout = container.stdout_to_vec().await.unwrap_or_default(); + let stderr = container.stderr_to_vec().await.unwrap_or_default(); + + panic!( + "Failed to execute command: {}\nError: {}\nStdout: {:?}\nStderr: {:?}", + cmd_ref, + e, + String::from_utf8_lossy(&stdout), + String::from_utf8_lossy(&stderr) + ); + } + } + + container + } + + Err(TestcontainersError::Client(e)) => { + panic!("Failed to start MinIO container. Ensure Docker is running and accessible: {e}"); + } + Err(e) => { + panic!("Failed to start MinIO container: {e}"); + } + } +} + #[cfg(test)] #[ctor::ctor] fn init() { @@ -165,12 +247,22 @@ async fn test_cli() { return; } + let container = setup_minio_container().await; + let settings = make_settings(); let _bound = settings.bind_to_scope(); + let port = container.get_host_port_ipv4(9000).await.unwrap(); + glob!("sql/integration/*.sql", |path| { let input = fs::read_to_string(path).unwrap(); - assert_cmd_snapshot!(cli().pass_stdin(input)) + assert_cmd_snapshot!(cli() + .env_clear() + .env("AWS_ACCESS_KEY_ID", "TEST-DataFusionLogin") + .env("AWS_SECRET_ACCESS_KEY", "TEST-DataFusionPassword") + .env("AWS_ENDPOINT", format!("http://localhost:{port}")) + .env("AWS_ALLOW_HTTP", "true") + .pass_stdin(input)) }); } @@ -186,20 +278,17 @@ async fn test_aws_options() { let settings = make_settings(); let _bound = settings.bind_to_scope(); - let access_key_id = - env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); - let secret_access_key = - env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); - let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); + let container = setup_minio_container().await; + let port = container.get_host_port_ipv4(9000).await.unwrap(); let input = format!( r#"CREATE EXTERNAL TABLE CARS STORED AS CSV LOCATION 's3://data/cars.csv' OPTIONS( - 'aws.access_key_id' '{access_key_id}', - 'aws.secret_access_key' '{secret_access_key}', - 'aws.endpoint' '{endpoint_url}', + 'aws.access_key_id' 'TEST-DataFusionLogin', + 'aws.secret_access_key' 'TEST-DataFusionPassword', + 'aws.endpoint' 'http://localhost:{port}', 'aws.allow_http' 'true' ); diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index 1ac4efac554f7..950bf5fabd70e 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -60,8 +60,8 @@ rust_decimal = { version = "1.37.2", features = ["tokio-pg"] } sqllogictest = "0.28.3" sqlparser = { workspace = true } tempfile = { workspace = true } -testcontainers = { version = "0.24", features = ["default"], optional = true } -testcontainers-modules = { version = "0.12", features = ["postgres"], optional = true } +testcontainers = { workspace = true, optional = true } +testcontainers-modules = { workspace = true, features = ["postgres"], optional = true } thiserror = "2.0.12" tokio = { workspace = true } tokio-postgres = { version = "0.7.12", optional = true }