From 5021bc5ccd23614261af5f5b1ed10f51ffb60b63 Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 27 Jun 2025 19:55:00 +0100 Subject: [PATCH 01/11] WIP: Testcontainers minio --- Cargo.lock | 2 + Cargo.toml | 2 + datafusion-cli/Cargo.toml | 3 ++ datafusion-cli/tests/cli_integration.rs | 61 ++++++++++++++++++++++++- datafusion/sqllogictest/Cargo.toml | 4 +- 5 files changed, 69 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a623a57a1accc..1175662b123e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1981,6 +1981,8 @@ dependencies = [ "regex", "rstest", "rustyline", + "testcontainers", + "testcontainers-modules", "tokio", "url", ] diff --git a/Cargo.toml b/Cargo.toml index 41a739f2afc2a..02a06319ce622 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -174,6 +174,8 @@ sqlparser = { version = "0.55.0", default-features = false, features = ["std", " tempfile = "3" tokio = { version = "1.45", features = ["macros", "rt", "sync"] } url = "2.5.4" +testcontainers = { version = "0.24", features = ["default"] } +testcontainers-modules = { version = "0.12"} [profile.release] codegen-units = 1 diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 63662e56ca756..e06b19751eab5 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -65,6 +65,7 @@ rustyline = "16.0" tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] } url = { workspace = true } + [dev-dependencies] assert_cmd = "2.0" ctor = { workspace = true } @@ -72,3 +73,5 @@ insta = { workspace = true } insta-cmd = "0.6.0" predicates = "3.0" rstest = { workspace = true } +testcontainers = { workspace = true } +testcontainers-modules = { workspace = true, features = ["minio"] } \ No newline at end of file diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 108651281dfcc..d1f1b2253ef90 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -21,7 +21,12 @@ use rstest::rstest; use insta::{glob, Settings}; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; +use std::path::PathBuf; use std::{env, fs}; +use testcontainers::core::{ExecCommand, IntoContainerPort, Mount}; +use testcontainers::runners::AsyncRunner; +use testcontainers::{ContainerAsync, Image, ImageExt}; +use testcontainers_modules::minio; fn cli() -> Command { Command::new(get_cargo_bin("datafusion-cli")) @@ -35,6 +40,52 @@ fn make_settings() -> Settings { settings } +async fn setup_minio_container() -> ContainerAsync { + let data_path = + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../datafusion/core/tests/data"); + + let absolute_data_path = data_path + .canonicalize() + .expect("Failed to get absolute path for test data"); + + let container = minio::MinIO::default() + .with_mapped_port(16433, 9000.tcp()) + .with_env_var("MINIO_ROOT_USER", "TEST-DataFusionLogin") + .with_env_var("MINIO_ROOT_PASSWORD", "TEST-DataFusionPassword") + .with_mount(Mount::bind_mount( + absolute_data_path.to_str().unwrap(), + "/source", + )) + .start() + .await + .expect("Failed to start MinIO container"); + + for command in [ + "mc ready local", + "mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword", + "mc mb localminio/data", + "mc cp -r /source/* localminio/data/"] { + let mut res = container.exec( + ExecCommand::new(["/bin/sh", "-c", command]) + ).await.unwrap(); + + let status_code = res.exit_code().await.unwrap().unwrap_or(0); // todo: is this correct? + if status_code == 0 { + continue; + } + + let stdout = res.stdout_to_vec().await.unwrap(); + let stderr = res.stderr_to_vec().await.unwrap(); + + panic!("Command `{}` failed with status code: {:?}\nstdout: {}\nstderr: {}", + command, status_code, String::from_utf8_lossy(&stdout), String::from_utf8_lossy(&stderr)); + } + + // print stdout and stderr of the container + + container +} + #[cfg(test)] #[ctor::ctor] fn init() { @@ -165,12 +216,20 @@ async fn test_cli() { return; } + let _container = setup_minio_container().await; + let settings = make_settings(); let _bound = settings.bind_to_scope(); glob!("sql/integration/*.sql", |path| { let input = fs::read_to_string(path).unwrap(); - assert_cmd_snapshot!(cli().pass_stdin(input)) + assert_cmd_snapshot!(cli() + .env_clear() + .env("AWS_ACCESS_KEY_ID", "TEST-DataFusionLogin") + .env("AWS_SECRET_ACCESS_KEY", "TEST-DataFusionPassword") + .env("AWS_ENDPOINT", "http://localhost:16433") + .env("AWS_ALLOW_HTTP", "true") + .pass_stdin(input)) }); } diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index 54c53f7375c4f..55fc7b44c323f 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -60,8 +60,8 @@ rust_decimal = { version = "1.37.2", features = ["tokio-pg"] } sqllogictest = "0.28.3" sqlparser = { workspace = true } tempfile = { workspace = true } -testcontainers = { version = "0.24", features = ["default"], optional = true } -testcontainers-modules = { version = "0.12", features = ["postgres"], optional = true } +testcontainers = { workspace = true, optional = true } +testcontainers-modules = { workspace = true, features = ["postgres"], optional = true } thiserror = "2.0.12" tokio = { workspace = true } tokio-postgres = { version = "0.7.12", optional = true } From 9f987fd59d7db45b9fea9f3faf1154becfe5a3ef Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 1 Jul 2025 19:49:51 +0100 Subject: [PATCH 02/11] Fix wait conditions & update docs --- datafusion-cli/CONTRIBUTING.md | 44 ++----------- datafusion-cli/tests/cli_integration.rs | 83 +++++++++++++++---------- 2 files changed, 55 insertions(+), 72 deletions(-) diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index 4b464dffc57ce..a7d958b423c28 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -29,47 +29,15 @@ cargo test ## Running Storage Integration Tests -By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and -then provide the necessary configuration for that object store. - -For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used. - -### AWS - -To test the S3 integration against [Minio](https://github.com/minio/minio) - -First start up a container with Minio and load test files. - +By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION`: ```shell -docker run -d \ - --name datafusion-test-minio \ - -p 9000:9000 \ - -e MINIO_ROOT_USER=TEST-DataFusionLogin \ - -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ - -v $(pwd)/../datafusion/core/tests/data:/source \ - quay.io/minio/minio server /data - -docker exec datafusion-test-minio /bin/sh -c "\ - mc ready local - mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ - mc mb localminio/data && \ - mc cp -r /source/* localminio/data" +TEST_STORAGE_INTEGRATION=1 cargo test ``` -Setup environment -```shell -export TEST_STORAGE_INTEGRATION=1 -export AWS_ACCESS_KEY_ID=TEST-DataFusionLogin -export AWS_SECRET_ACCESS_KEY=TEST-DataFusionPassword -export AWS_ENDPOINT=http://127.0.0.1:9000 -export AWS_ALLOW_HTTP=true -``` - -Note that `AWS_ENDPOINT` is set without slash at the end. +For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used. -Run tests +### AWS -```shell -cargo test -``` +S3 integration is tested against [Minio](https://github.com/minio/minio) with [TestContainers](https://github.com/testcontainers/testcontainers-rs) +This requires Docker to be running on your machine. diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index d1f1b2253ef90..cbf7512c7fe91 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -22,8 +22,9 @@ use rstest::rstest; use insta::{glob, Settings}; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; use std::path::PathBuf; +use std::time::Duration; use std::{env, fs}; -use testcontainers::core::{ExecCommand, IntoContainerPort, Mount}; +use testcontainers::core::{CmdWaitFor, ExecCommand, IntoContainerPort, Mount}; use testcontainers::runners::AsyncRunner; use testcontainers::{ContainerAsync, Image, ImageExt}; use testcontainers_modules::minio; @@ -41,6 +42,9 @@ fn make_settings() -> Settings { } async fn setup_minio_container() -> ContainerAsync { + const MINIO_ROOT_USER: &str = "TEST-DataFusionLogin"; + const MINIO_ROOT_PASSWORD: &str = "TEST-DataFusionPassword"; + let data_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../datafusion/core/tests/data"); @@ -49,9 +53,8 @@ async fn setup_minio_container() -> ContainerAsync { .expect("Failed to get absolute path for test data"); let container = minio::MinIO::default() - .with_mapped_port(16433, 9000.tcp()) - .with_env_var("MINIO_ROOT_USER", "TEST-DataFusionLogin") - .with_env_var("MINIO_ROOT_PASSWORD", "TEST-DataFusionPassword") + .with_env_var("MINIO_ROOT_USER", MINIO_ROOT_USER) + .with_env_var("MINIO_ROOT_PASSWORD", MINIO_ROOT_PASSWORD) .with_mount(Mount::bind_mount( absolute_data_path.to_str().unwrap(), "/source", @@ -60,29 +63,42 @@ async fn setup_minio_container() -> ContainerAsync { .await .expect("Failed to start MinIO container"); - for command in [ - "mc ready local", - "mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword", - "mc mb localminio/data", - "mc cp -r /source/* localminio/data/"] { - let mut res = container.exec( - ExecCommand::new(["/bin/sh", "-c", command]) - ).await.unwrap(); - - let status_code = res.exit_code().await.unwrap().unwrap_or(0); // todo: is this correct? - if status_code == 0 { - continue; + // We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency + let commands = [ + ExecCommand::new(["/usr/bin/mc", "ready", "local"]), + ExecCommand::new([ + "/usr/bin/mc", + "alias", + "set", + "localminio", + "http://localhost:9000", + MINIO_ROOT_USER, + MINIO_ROOT_PASSWORD, + ]), + ExecCommand::new(["/usr/bin/mc", "mb", "localminio/data"]), + ExecCommand::new(["/usr/bin/mc", "cp", "-r", "/source/", "localminio/data/"]), + ]; + + for command in commands { + let command = + command.with_cmd_ready_condition(CmdWaitFor::Exit { code: Some(0) }); + + let cmd_ref = format!("{:?}", command); + + if let Err(e) = container.exec(command).await { + let stdout = container.stdout_to_vec().await.unwrap_or_default(); + let stderr = container.stderr_to_vec().await.unwrap_or_default(); + + panic!( + "Failed to execute command: {}\nError: {}\nStdout: {:?}\nStderr: {:?}", + cmd_ref, + e, + String::from_utf8_lossy(&stdout), + String::from_utf8_lossy(&stderr) + ); } - - let stdout = res.stdout_to_vec().await.unwrap(); - let stderr = res.stderr_to_vec().await.unwrap(); - - panic!("Command `{}` failed with status code: {:?}\nstdout: {}\nstderr: {}", - command, status_code, String::from_utf8_lossy(&stdout), String::from_utf8_lossy(&stderr)); } - // print stdout and stderr of the container - container } @@ -216,18 +232,20 @@ async fn test_cli() { return; } - let _container = setup_minio_container().await; + let container = setup_minio_container().await; let settings = make_settings(); let _bound = settings.bind_to_scope(); + let port = container.get_host_port_ipv4(9000).await.unwrap(); + glob!("sql/integration/*.sql", |path| { let input = fs::read_to_string(path).unwrap(); assert_cmd_snapshot!(cli() .env_clear() .env("AWS_ACCESS_KEY_ID", "TEST-DataFusionLogin") .env("AWS_SECRET_ACCESS_KEY", "TEST-DataFusionPassword") - .env("AWS_ENDPOINT", "http://localhost:16433") + .env("AWS_ENDPOINT", format!("http://localhost:{port}")) .env("AWS_ALLOW_HTTP", "true") .pass_stdin(input)) }); @@ -245,20 +263,17 @@ async fn test_aws_options() { let settings = make_settings(); let _bound = settings.bind_to_scope(); - let access_key_id = - env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); - let secret_access_key = - env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); - let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); + let container = setup_minio_container().await; + let port = container.get_host_port_ipv4(9000).await.unwrap(); let input = format!( r#"CREATE EXTERNAL TABLE CARS STORED AS CSV LOCATION 's3://data/cars.csv' OPTIONS( - 'aws.access_key_id' '{access_key_id}', - 'aws.secret_access_key' '{secret_access_key}', - 'aws.endpoint' '{endpoint_url}', + 'aws.access_key_id' 'TEST-DataFusionLogin', + 'aws.secret_access_key' 'TEST-DataFusionPassword', + 'aws.endpoint' 'http://localhost:{port}', 'aws.allow_http' 'true' ); From ed8c85e080e1bed5ee63b393a4285660862f2ef9 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 1 Jul 2025 19:56:35 +0100 Subject: [PATCH 03/11] Prettier --- datafusion-cli/CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index a7d958b423c28..64ac3dbeb204d 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -30,11 +30,11 @@ cargo test ## Running Storage Integration Tests By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION`: + ```shell TEST_STORAGE_INTEGRATION=1 cargo test ``` - For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used. ### AWS From 6dba785bf5b07d4562e2ffcdaab35331679df8d5 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 1 Jul 2025 23:01:49 +0100 Subject: [PATCH 04/11] Remove CI step + fmt --- .github/workflows/rust.yml | 12 ------------ Cargo.toml | 4 ++-- datafusion-cli/Cargo.toml | 3 +-- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ecb25483ce07e..f59287c29ff1b 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -286,18 +286,6 @@ jobs: fetch-depth: 1 - name: Setup Rust toolchain run: rustup toolchain install stable - - name: Setup Minio - S3-compatible storage - run: | - docker run -d --name minio-container \ - -p 9000:9000 \ - -e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ - -v $(pwd)/datafusion/core/tests/data:/source quay.io/minio/minio \ - server /data - docker exec minio-container /bin/sh -c "\ - mc ready local - mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ - mc mb localminio/data && \ - mc cp -r /source/* localminio/data" - name: Run tests (excluding doctests) env: RUST_BACKTRACE: 1 diff --git a/Cargo.toml b/Cargo.toml index 12ed239970c1c..b9d2ea255326b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -173,10 +173,10 @@ rstest = "0.25.0" serde_json = "1" sqlparser = { version = "0.55.0", default-features = false, features = ["std", "visitor"] } tempfile = "3" +testcontainers = { version = "0.24", features = ["default"] } +testcontainers-modules = { version = "0.12" } tokio = { version = "1.45", features = ["macros", "rt", "sync"] } url = "2.5.4" -testcontainers = { version = "0.24", features = ["default"] } -testcontainers-modules = { version = "0.12"} [profile.release] codegen-units = 1 diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index e06b19751eab5..773c43ce695dd 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -65,7 +65,6 @@ rustyline = "16.0" tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] } url = { workspace = true } - [dev-dependencies] assert_cmd = "2.0" ctor = { workspace = true } @@ -74,4 +73,4 @@ insta-cmd = "0.6.0" predicates = "3.0" rstest = { workspace = true } testcontainers = { workspace = true } -testcontainers-modules = { workspace = true, features = ["minio"] } \ No newline at end of file +testcontainers-modules = { workspace = true, features = ["minio"] } From 3b414439318eb1f98ae278cf1dd69e661b51f057 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 1 Jul 2025 23:06:59 +0100 Subject: [PATCH 05/11] Unused imports --- datafusion-cli/tests/cli_integration.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index cbf7512c7fe91..7dfc978779703 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -22,11 +22,10 @@ use rstest::rstest; use insta::{glob, Settings}; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; use std::path::PathBuf; -use std::time::Duration; use std::{env, fs}; -use testcontainers::core::{CmdWaitFor, ExecCommand, IntoContainerPort, Mount}; +use testcontainers::core::{CmdWaitFor, ExecCommand, Mount}; use testcontainers::runners::AsyncRunner; -use testcontainers::{ContainerAsync, Image, ImageExt}; +use testcontainers::{ContainerAsync, ImageExt}; use testcontainers_modules::minio; fn cli() -> Command { From 42ca948fcf9c90db4657b19cc9c1082f410246c3 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 1 Jul 2025 23:25:18 +0100 Subject: [PATCH 06/11] Remove minio output --- .github/workflows/rust.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f59287c29ff1b..bf4a87a17fa41 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -297,9 +297,6 @@ jobs: run: cargo test --profile ci -p datafusion-cli --lib --tests --bins - name: Verify Working Directory Clean run: git diff --exit-code - - name: Minio Output - if: ${{ !cancelled() }} - run: docker logs minio-container linux-test-example: From fa9d1701e0c3affbe63a8cb3cdc9573facb29a0b Mon Sep 17 00:00:00 2001 From: blaginin Date: Mon, 7 Jul 2025 21:48:31 +0100 Subject: [PATCH 07/11] Clippy --- datafusion-cli/tests/cli_integration.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 7dfc978779703..8c191d4bb9125 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -82,7 +82,7 @@ async fn setup_minio_container() -> ContainerAsync { let command = command.with_cmd_ready_condition(CmdWaitFor::Exit { code: Some(0) }); - let cmd_ref = format!("{:?}", command); + let cmd_ref = format!("{command:?}"); if let Err(e) = container.exec(command).await { let stdout = container.stdout_to_vec().await.unwrap_or_default(); From bcddd992fce2de56881c7798c448c7206c7ac180 Mon Sep 17 00:00:00 2001 From: Dmitrii Blaginin Date: Mon, 14 Jul 2025 20:33:02 +0100 Subject: [PATCH 08/11] Update datafusion-cli/CONTRIBUTING.md Co-authored-by: Andrew Lamb --- datafusion-cli/CONTRIBUTING.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index 64ac3dbeb204d..5b49af62efbcb 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -29,7 +29,9 @@ cargo test ## Running Storage Integration Tests -By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION`: +By default, storage integration tests are not run. These test use the `testcontainers` crate to start up a local MinIO server using docker on port 9000. + +To run them you will need to set `TEST_STORAGE_INTEGRATION`: ```shell TEST_STORAGE_INTEGRATION=1 cargo test From d941ae6829e5ee67f1b1af4675b41f05591c0cc0 Mon Sep 17 00:00:00 2001 From: Dmitrii Blaginin Date: Mon, 14 Jul 2025 20:33:41 +0100 Subject: [PATCH 09/11] Update datafusion-cli/CONTRIBUTING.md Co-authored-by: Andrew Lamb --- datafusion-cli/CONTRIBUTING.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index 5b49af62efbcb..18eef83303669 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -42,4 +42,14 @@ For some of the tests, [snapshots](https://datafusion.apache.org/contributor-gui ### AWS S3 integration is tested against [Minio](https://github.com/minio/minio) with [TestContainers](https://github.com/testcontainers/testcontainers-rs) -This requires Docker to be running on your machine. +This requires Docker to be running on your machine and port 9000 to be free. + +If you see an error about " failed to load IMDS session token" such as + +> ---- object_storage::tests::s3_object_store_builder_resolves_region_when_none_provided stdout ---- +> Error: ObjectStore(Generic { store: "S3", source: "Error getting credentials from provider: an error occurred while loading credentials: failed to load IMDS session token" }) + +You my need to disable trying to fetch S3 credentials from the environment using the `AWS_EC2_METADATA_DISABLED`, for example + + +$ AWS_EC2_METADATA_DISABLED=true TEST_STORAGE_INTEGRATION=1 cargo test From bf7f8575bb0df970e49495563b17c2fdeaba1b5d Mon Sep 17 00:00:00 2001 From: blaginin Date: Mon, 14 Jul 2025 20:34:37 +0100 Subject: [PATCH 10/11] Some typos --- datafusion-cli/CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index 18eef83303669..2862d725aa0f1 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -44,12 +44,12 @@ For some of the tests, [snapshots](https://datafusion.apache.org/contributor-gui S3 integration is tested against [Minio](https://github.com/minio/minio) with [TestContainers](https://github.com/testcontainers/testcontainers-rs) This requires Docker to be running on your machine and port 9000 to be free. -If you see an error about " failed to load IMDS session token" such as +If you see an error mentioning "failed to load IMDS session token" such as > ---- object_storage::tests::s3_object_store_builder_resolves_region_when_none_provided stdout ---- > Error: ObjectStore(Generic { store: "S3", source: "Error getting credentials from provider: an error occurred while loading credentials: failed to load IMDS session token" }) -You my need to disable trying to fetch S3 credentials from the environment using the `AWS_EC2_METADATA_DISABLED`, for example +You my need to disable trying to fetch S3 credentials from the environment using the `AWS_EC2_METADATA_DISABLED`, for example: -$ AWS_EC2_METADATA_DISABLED=true TEST_STORAGE_INTEGRATION=1 cargo test +> $ AWS_EC2_METADATA_DISABLED=true TEST_STORAGE_INTEGRATION=1 cargo test From 4deb0eb096b2da3eea39179a4e969f88a316c59a Mon Sep 17 00:00:00 2001 From: blaginin Date: Mon, 14 Jul 2025 20:44:13 +0100 Subject: [PATCH 11/11] Better error when docker isn't running --- datafusion-cli/CONTRIBUTING.md | 1 - datafusion-cli/tests/cli_integration.rs | 94 +++++++++++++++---------- 2 files changed, 55 insertions(+), 40 deletions(-) diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index 2862d725aa0f1..3e72214f6c226 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -51,5 +51,4 @@ If you see an error mentioning "failed to load IMDS session token" such as You my need to disable trying to fetch S3 credentials from the environment using the `AWS_EC2_METADATA_DISABLED`, for example: - > $ AWS_EC2_METADATA_DISABLED=true TEST_STORAGE_INTEGRATION=1 cargo test diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 8c191d4bb9125..e6ba7d9a9d87b 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -25,7 +25,7 @@ use std::path::PathBuf; use std::{env, fs}; use testcontainers::core::{CmdWaitFor, ExecCommand, Mount}; use testcontainers::runners::AsyncRunner; -use testcontainers::{ContainerAsync, ImageExt}; +use testcontainers::{ContainerAsync, ImageExt, TestcontainersError}; use testcontainers_modules::minio; fn cli() -> Command { @@ -59,46 +59,62 @@ async fn setup_minio_container() -> ContainerAsync { "/source", )) .start() - .await - .expect("Failed to start MinIO container"); - - // We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency - let commands = [ - ExecCommand::new(["/usr/bin/mc", "ready", "local"]), - ExecCommand::new([ - "/usr/bin/mc", - "alias", - "set", - "localminio", - "http://localhost:9000", - MINIO_ROOT_USER, - MINIO_ROOT_PASSWORD, - ]), - ExecCommand::new(["/usr/bin/mc", "mb", "localminio/data"]), - ExecCommand::new(["/usr/bin/mc", "cp", "-r", "/source/", "localminio/data/"]), - ]; - - for command in commands { - let command = - command.with_cmd_ready_condition(CmdWaitFor::Exit { code: Some(0) }); - - let cmd_ref = format!("{command:?}"); - - if let Err(e) = container.exec(command).await { - let stdout = container.stdout_to_vec().await.unwrap_or_default(); - let stderr = container.stderr_to_vec().await.unwrap_or_default(); - - panic!( - "Failed to execute command: {}\nError: {}\nStdout: {:?}\nStderr: {:?}", - cmd_ref, - e, - String::from_utf8_lossy(&stdout), - String::from_utf8_lossy(&stderr) - ); + .await; + + match container { + Ok(container) => { + // We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency + let commands = [ + ExecCommand::new(["/usr/bin/mc", "ready", "local"]), + ExecCommand::new([ + "/usr/bin/mc", + "alias", + "set", + "localminio", + "http://localhost:9000", + MINIO_ROOT_USER, + MINIO_ROOT_PASSWORD, + ]), + ExecCommand::new(["/usr/bin/mc", "mb", "localminio/data"]), + ExecCommand::new([ + "/usr/bin/mc", + "cp", + "-r", + "/source/", + "localminio/data/", + ]), + ]; + + for command in commands { + let command = + command.with_cmd_ready_condition(CmdWaitFor::Exit { code: Some(0) }); + + let cmd_ref = format!("{command:?}"); + + if let Err(e) = container.exec(command).await { + let stdout = container.stdout_to_vec().await.unwrap_or_default(); + let stderr = container.stderr_to_vec().await.unwrap_or_default(); + + panic!( + "Failed to execute command: {}\nError: {}\nStdout: {:?}\nStderr: {:?}", + cmd_ref, + e, + String::from_utf8_lossy(&stdout), + String::from_utf8_lossy(&stderr) + ); + } + } + + container } - } - container + Err(TestcontainersError::Client(e)) => { + panic!("Failed to start MinIO container. Ensure Docker is running and accessible: {e}"); + } + Err(e) => { + panic!("Failed to start MinIO container: {e}"); + } + } } #[cfg(test)]