Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -314,18 +314,6 @@ jobs:
fetch-depth: 1
- name: Setup Rust toolchain
run: rustup toolchain install stable
- name: Setup Minio - S3-compatible storage
run: |
docker run -d --name minio-container \
-p 9000:9000 \
-e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \
-v $(pwd)/datafusion/core/tests/data:/source quay.io/minio/minio \
server /data
docker exec minio-container /bin/sh -c "\
mc ready local
mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \
mc mb localminio/data && \
mc cp -r /source/* localminio/data"
- name: Run tests (excluding doctests)
env:
RUST_BACKTRACE: 1
Expand All @@ -337,9 +325,6 @@ jobs:
run: cargo test --profile ci -p datafusion-cli --lib --tests --bins
- name: Verify Working Directory Clean
run: git diff --exit-code
- name: Minio Output
if: ${{ !cancelled() }}
run: docker logs minio-container


linux-test-example:
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ rstest = "0.25.0"
serde_json = "1"
sqlparser = { version = "0.55.0", default-features = false, features = ["std", "visitor"] }
tempfile = "3"
testcontainers = { version = "0.24", features = ["default"] }
testcontainers-modules = { version = "0.12" }
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fyi, I am not sure why but locally I am unable to properly run minio with version 0.12, I have this in my Cargo.toml:

# version 0.12.0 seems to cause minio to hang
testcontainers-modules = { version = "=0.11.6", features = ["postgres", "minio", "blocking"] }

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unable to properly run minio

can you please post the error?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It just hangs locally (NOT this code, but my own tests). I think it's because of something related to waiting for minio to come up - I'm not doing the mc commands like you are.

tokio = { version = "1.46", features = ["macros", "rt", "sync"] }
url = "2.5.4"

Expand Down
49 changes: 14 additions & 35 deletions datafusion-cli/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,47 +29,26 @@ cargo test

## Running Storage Integration Tests

By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and
then provide the necessary configuration for that object store.
By default, storage integration tests are not run. These test use the `testcontainers` crate to start up a local MinIO server using docker on port 9000.

For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used.

### AWS

To test the S3 integration against [Minio](https://github.com/minio/minio)

First start up a container with Minio and load test files.
To run them you will need to set `TEST_STORAGE_INTEGRATION`:

```shell
docker run -d \
--name datafusion-test-minio \
-p 9000:9000 \
-e MINIO_ROOT_USER=TEST-DataFusionLogin \
-e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \
-v $(pwd)/../datafusion/core/tests/data:/source \
quay.io/minio/minio server /data

docker exec datafusion-test-minio /bin/sh -c "\
mc ready local
mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \
mc mb localminio/data && \
mc cp -r /source/* localminio/data"
TEST_STORAGE_INTEGRATION=1 cargo test
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I first ran this command without docker running, several commands failed

Once I started docker it worked great

TEST_STORAGE_INTEGRATION=1 cargo test

---- test_aws_options stdout ----

thread 'test_aws_options' panicked at datafusion-cli/tests/cli_integration.rs:63:10:
Failed to start MinIO container: Client(CreateContainer(HyperLegacyError { err: hyper_util::client::legacy::Error(Connect, Os { code: 61, kind: ConnectionRefused, message: "Connection refused" }) }))
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

---- test_cli stdout ----

thread 'test_cli' panicked at datafusion-cli/tests/cli_integration.rs:63:10:
Failed to start MinIO container: Client(CreateContainer(HyperLegacyError { err: hyper_util::client::legacy::Error(Connect, Os { code: 61, kind: ConnectionRefused, message: "Connection refused" }) }))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note it didn't show any errors about starting the container in the logs / output

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if ! docker info > /dev/null 2>&1; then
  echo "This script requires docker to be running. Please start docker and try again."
  exit 1
fi

???

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, updated in 4deb0eb

```

Setup environment
For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used.

```shell
export TEST_STORAGE_INTEGRATION=1
export AWS_ACCESS_KEY_ID=TEST-DataFusionLogin
export AWS_SECRET_ACCESS_KEY=TEST-DataFusionPassword
export AWS_ENDPOINT=http://127.0.0.1:9000
export AWS_ALLOW_HTTP=true
```
### AWS

Note that `AWS_ENDPOINT` is set without slash at the end.
S3 integration is tested against [Minio](https://github.com/minio/minio) with [TestContainers](https://github.com/testcontainers/testcontainers-rs)
This requires Docker to be running on your machine and port 9000 to be free.

Run tests
If you see an error mentioning "failed to load IMDS session token" such as

```shell
cargo test
```
> ---- object_storage::tests::s3_object_store_builder_resolves_region_when_none_provided stdout ----
> Error: ObjectStore(Generic { store: "S3", source: "Error getting credentials from provider: an error occurred while loading credentials: failed to load IMDS session token" })

You my need to disable trying to fetch S3 credentials from the environment using the `AWS_EC2_METADATA_DISABLED`, for example:

> $ AWS_EC2_METADATA_DISABLED=true TEST_STORAGE_INTEGRATION=1 cargo test
2 changes: 2 additions & 0 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,5 @@ insta = { workspace = true }
insta-cmd = "0.6.0"
predicates = "3.0"
rstest = { workspace = true }
testcontainers = { workspace = true }
testcontainers-modules = { workspace = true, features = ["minio"] }
107 changes: 98 additions & 9 deletions datafusion-cli/tests/cli_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ use rstest::rstest;

use insta::{glob, Settings};
use insta_cmd::{assert_cmd_snapshot, get_cargo_bin};
use std::path::PathBuf;
use std::{env, fs};
use testcontainers::core::{CmdWaitFor, ExecCommand, Mount};
use testcontainers::runners::AsyncRunner;
use testcontainers::{ContainerAsync, ImageExt, TestcontainersError};
use testcontainers_modules::minio;

fn cli() -> Command {
Command::new(get_cargo_bin("datafusion-cli"))
Expand All @@ -35,6 +40,83 @@ fn make_settings() -> Settings {
settings
}

async fn setup_minio_container() -> ContainerAsync<minio::MinIO> {
const MINIO_ROOT_USER: &str = "TEST-DataFusionLogin";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is very nice that these environment variables get setup via the test harness now rather than having to be setup outside

const MINIO_ROOT_PASSWORD: &str = "TEST-DataFusionPassword";

let data_path =
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../datafusion/core/tests/data");

let absolute_data_path = data_path
.canonicalize()
.expect("Failed to get absolute path for test data");

let container = minio::MinIO::default()
.with_env_var("MINIO_ROOT_USER", MINIO_ROOT_USER)
.with_env_var("MINIO_ROOT_PASSWORD", MINIO_ROOT_PASSWORD)
.with_mount(Mount::bind_mount(
absolute_data_path.to_str().unwrap(),
"/source",
))
.start()
.await;

match container {
Ok(container) => {
// We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency
let commands = [
ExecCommand::new(["/usr/bin/mc", "ready", "local"]),
ExecCommand::new([
"/usr/bin/mc",
"alias",
"set",
"localminio",
"http://localhost:9000",
MINIO_ROOT_USER,
MINIO_ROOT_PASSWORD,
]),
ExecCommand::new(["/usr/bin/mc", "mb", "localminio/data"]),
ExecCommand::new([
"/usr/bin/mc",
"cp",
"-r",
"/source/",
"localminio/data/",
]),
];

for command in commands {
let command =
command.with_cmd_ready_condition(CmdWaitFor::Exit { code: Some(0) });

let cmd_ref = format!("{command:?}");

if let Err(e) = container.exec(command).await {
let stdout = container.stdout_to_vec().await.unwrap_or_default();
let stderr = container.stderr_to_vec().await.unwrap_or_default();

panic!(
"Failed to execute command: {}\nError: {}\nStdout: {:?}\nStderr: {:?}",
cmd_ref,
e,
String::from_utf8_lossy(&stdout),
String::from_utf8_lossy(&stderr)
);
}
}

container
}

Err(TestcontainersError::Client(e)) => {
panic!("Failed to start MinIO container. Ensure Docker is running and accessible: {e}");
}
Err(e) => {
panic!("Failed to start MinIO container: {e}");
}
}
}

#[cfg(test)]
#[ctor::ctor]
fn init() {
Expand Down Expand Up @@ -165,12 +247,22 @@ async fn test_cli() {
return;
}

let container = setup_minio_container().await;

let settings = make_settings();
let _bound = settings.bind_to_scope();

let port = container.get_host_port_ipv4(9000).await.unwrap();

glob!("sql/integration/*.sql", |path| {
let input = fs::read_to_string(path).unwrap();
assert_cmd_snapshot!(cli().pass_stdin(input))
assert_cmd_snapshot!(cli()
.env_clear()
.env("AWS_ACCESS_KEY_ID", "TEST-DataFusionLogin")
.env("AWS_SECRET_ACCESS_KEY", "TEST-DataFusionPassword")
.env("AWS_ENDPOINT", format!("http://localhost:{port}"))
.env("AWS_ALLOW_HTTP", "true")
.pass_stdin(input))
});
}

Expand All @@ -186,20 +278,17 @@ async fn test_aws_options() {
let settings = make_settings();
let _bound = settings.bind_to_scope();

let access_key_id =
env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set");
let secret_access_key =
env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set");
let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set");
let container = setup_minio_container().await;
let port = container.get_host_port_ipv4(9000).await.unwrap();

let input = format!(
r#"CREATE EXTERNAL TABLE CARS
STORED AS CSV
LOCATION 's3://data/cars.csv'
OPTIONS(
'aws.access_key_id' '{access_key_id}',
'aws.secret_access_key' '{secret_access_key}',
'aws.endpoint' '{endpoint_url}',
'aws.access_key_id' 'TEST-DataFusionLogin',
'aws.secret_access_key' 'TEST-DataFusionPassword',
'aws.endpoint' 'http://localhost:{port}',
'aws.allow_http' 'true'
);

Expand Down
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ rust_decimal = { version = "1.37.2", features = ["tokio-pg"] }
sqllogictest = "0.28.3"
sqlparser = { workspace = true }
tempfile = { workspace = true }
testcontainers = { version = "0.24", features = ["default"], optional = true }
testcontainers-modules = { version = "0.12", features = ["postgres"], optional = true }
testcontainers = { workspace = true, optional = true }
testcontainers-modules = { workspace = true, features = ["postgres"], optional = true }
thiserror = "2.0.12"
tokio = { workspace = true }
tokio-postgres = { version = "0.7.12", optional = true }
Expand Down