diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md index 550e4a558eb54..648e0a3eaacb3 100644 --- a/datafusion/core/tests/sqllogictests/README.md +++ b/datafusion/core/tests/sqllogictests/README.md @@ -23,7 +23,22 @@ This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/s #### Running tests -`cargo test -p datafusion --test sqllogictests` +```shell +cargo test -p datafusion --test sqllogictests +``` + +Run tests with debug logging enabled: + +```shell +RUST_LOG=debug cargo test -p datafusion --test sqllogictests +``` + +Run only the tests in `information_schema.slt`: + +```shell +# information_schema.slt matches due to substring matching `information` +cargo test -p datafusion --test sqllogictests -- information +``` #### sqllogictests diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index 5fa261b36978b..ad7cb20e976b3 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -20,9 +20,10 @@ use datafusion::arrow::csv::WriterBuilder; use datafusion::arrow::record_batch::RecordBatch; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_sql::parser::{DFParser, Statement}; +use log::info; use normalize::normalize_batch; use sqlparser::ast::Statement as SQLStatement; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::time::Duration; use crate::error::{DFSqlLogicTestError, Result}; @@ -76,56 +77,96 @@ pub async fn main() -> Result<()> { #[tokio::main] #[cfg(not(target_family = "windows"))] pub async fn main() -> Result<()> { - let paths = std::fs::read_dir(TEST_DIRECTORY).unwrap(); + // Enable logging (e.g. set RUST_LOG=debug to see debug logs) + env_logger::init(); - // run each file using its own new SessionContext + // run each file using its own new DB // // Note: can't use tester.run_parallel_async() // as that will reuse the same SessionContext // // We could run these tests in parallel eventually if we wanted. - for path in paths { - // TODO better error handling - let path = path.unwrap().path(); + let files = get_test_files(); + info!("Running test files {:?}", files); - run_file(&path).await?; + for path in files { + println!("Running: {}", path.display()); + + let file_name = path.file_name().unwrap().to_str().unwrap().to_string(); + + let ctx = context_for_test_file(&file_name).await; + + let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name }); + tester.run_file_async(path).await?; } Ok(()) } -/// Run the tests in the specified `.slt` file -async fn run_file(path: &Path) -> Result<()> { - println!("Running: {}", path.display()); - - let file_name = path.file_name().unwrap().to_str().unwrap().to_string(); - - let ctx = context_for_test_file(&file_name).await; +/// Gets a list of test files to execute. If there were arguments +/// passed to the program treat it as a cargo test filter (substring match on filenames) +fn get_test_files() -> Vec { + info!("Test directory: {}", TEST_DIRECTORY); + + let args: Vec<_> = std::env::args().collect(); + + // treat args after the first as filters to run (substring matching) + let filters = if !args.is_empty() { + args.iter() + .skip(1) + .map(|arg| arg.as_str()) + .collect::>() + } else { + vec![] + }; + + // default to all files in test directory filtering based on name + std::fs::read_dir(TEST_DIRECTORY) + .unwrap() + .map(|path| path.unwrap().path()) + .filter(|path| check_test_file(&filters, path.as_path())) + .collect() +} - let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name }); - tester.run_file_async(path).await?; +/// because this test can be run as a cargo test, commands like +/// +/// ```shell +/// cargo test foo +/// ``` +/// +/// Will end up passing `foo` as a command line argument. +/// +/// be compatible with this, treat the command line arguments as a +/// filter and that does a substring match on each input. +/// returns true f this path should be run +fn check_test_file(filters: &[&str], path: &Path) -> bool { + if filters.is_empty() { + return true; + } - Ok(()) + // otherwise check if any filter matches + let path_str = path.to_string_lossy(); + filters.iter().any(|filter| path_str.contains(filter)) } /// Create a SessionContext, configured for the specific test async fn context_for_test_file(file_name: &str) -> SessionContext { match file_name { "aggregate.slt" => { - println!("Registering aggregate tables"); + info!("Registering aggregate tables"); let ctx = SessionContext::new(); setup::register_aggregate_tables(&ctx).await; ctx } "information_schema.slt" => { - println!("Enabling information schema"); + info!("Enabling information schema"); SessionContext::with_config( SessionConfig::new().with_information_schema(true), ) } _ => { - println!("Using default SessionContex"); + info!("Using default SessionContext"); SessionContext::new() } }