From 42af19e5674db7ac457ecfa369f8f2ec31ba3c59 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Dec 2022 07:31:31 -0500 Subject: [PATCH 1/5] sqllogictest: A logging and command line filter --- datafusion/core/tests/sqllogictests/README.md | 16 ++++- .../core/tests/sqllogictests/src/main.rs | 60 +++++++++++++------ 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md index 550e4a558eb54..0073bd3d925e7 100644 --- a/datafusion/core/tests/sqllogictests/README.md +++ b/datafusion/core/tests/sqllogictests/README.md @@ -23,7 +23,21 @@ This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/s #### Running tests -`cargo test -p datafusion --test sqllogictests` +```shell +cargo test -p datafusion --test sqllogictests +``` + +Run tests with debug logging enabled: + +```shell +RUST_LOG=debug cargo test -p datafusion --test sqllogictests +``` + +Run only the tests in `information_schema.slt`: + +```shell +cargo test -p datafusion --test sqllogictests -- information_schema.slt +``` #### sqllogictests diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index fc27773c010d0..663540695cccd 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -19,7 +19,8 @@ use async_trait::async_trait; use datafusion::arrow::csv::WriterBuilder; use datafusion::arrow::record_batch::RecordBatch; use datafusion::prelude::{SessionConfig, SessionContext}; -use std::path::Path; +use log::info; +use std::path::PathBuf; use std::time::Duration; use sqllogictest::TestError; @@ -70,39 +71,60 @@ pub async fn main() -> Result<()> { #[tokio::main] #[cfg(not(target_family = "windows"))] pub async fn main() -> Result<()> { - let paths = std::fs::read_dir(TEST_DIRECTORY).unwrap(); + // Enable logging (e.g. set RUST_LOG=debug to see debug logs) - // run each file using its own new SessionContext + use log::info; + env_logger::init(); + + // run each file using its own new DB // // Note: can't use tester.run_parallel_async() // as that will reuse the same SessionContext // // We could run these tests in parallel eventually if we wanted. - for path in paths { - // TODO better error handling - let path = path.unwrap().path(); - - run_file(&path).await?; - } - - Ok(()) -} + let files = get_test_files(); + info!("Running test files {:?}", files); -/// Run the tests in the specified `.slt` file -async fn run_file(path: &Path) -> Result<()> { - println!("Running: {}", path.display()); + for path in files { + println!("Running: {}", path.display()); - let file_name = path.file_name().unwrap().to_str().unwrap().to_string(); + let file_name = path.file_name().unwrap().to_str().unwrap().to_string(); - let ctx = context_for_test_file(&file_name).await; + let ctx = context_for_test_file(&file_name).await; - let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name }); - tester.run_file_async(path).await?; + let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name }); + tester.run_file_async(path).await?; + } Ok(()) } +/// Gets a list of test files to execute. If there were arguments +/// passed to the program treat them as filenames +/// +fn get_test_files() -> Vec { + info!("Test directory: {}", TEST_DIRECTORY); + + let args: Vec<_> = std::env::args().collect(); + + if args.len() > 1 { + let test_path = PathBuf::from(TEST_DIRECTORY); + + // treat args after the first as filenames in the test directory + args.into_iter() + .skip(1) + .map(|arg| test_path.join(arg)) + .collect::>() + } else { + // default to all files in test directory + std::fs::read_dir(TEST_DIRECTORY) + .unwrap() + .map(|path| path.unwrap().path()) + .collect() + } +} + /// Create a SessionContext, configured for the specific test async fn context_for_test_file(file_name: &str) -> SessionContext { match file_name { From fbd597a3a338743990e3f839ac709b4bcfa07eba Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Dec 2022 07:40:04 -0500 Subject: [PATCH 2/5] Reduce some println to info --- datafusion/core/tests/sqllogictests/src/main.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index 663540695cccd..62530123d924f 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -72,8 +72,6 @@ pub async fn main() -> Result<()> { #[cfg(not(target_family = "windows"))] pub async fn main() -> Result<()> { // Enable logging (e.g. set RUST_LOG=debug to see debug logs) - - use log::info; env_logger::init(); // run each file using its own new DB @@ -129,19 +127,19 @@ fn get_test_files() -> Vec { async fn context_for_test_file(file_name: &str) -> SessionContext { match file_name { "aggregate.slt" => { - println!("Registering aggregate tables"); + info!("Registering aggregate tables"); let ctx = SessionContext::new(); setup::register_aggregate_tables(&ctx).await; ctx } "information_schema.slt" => { - println!("Enabling information schema"); + info!("Enabling information schema"); SessionContext::with_config( SessionConfig::new().with_information_schema(true), ) } _ => { - println!("Using default SessionContex"); + info!("Using default SessionContex"); SessionContext::new() } } From fb30383020cc5393961888209b74d2e726758267 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Dec 2022 08:59:02 -0500 Subject: [PATCH 3/5] Be compatible with Rust test runner --- .../core/tests/sqllogictests/src/main.rs | 49 +++++++++++++------ 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index 62530123d924f..0f067b733cead 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -20,7 +20,7 @@ use datafusion::arrow::csv::WriterBuilder; use datafusion::arrow::record_batch::RecordBatch; use datafusion::prelude::{SessionConfig, SessionContext}; use log::info; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::time::Duration; use sqllogictest::TestError; @@ -99,28 +99,49 @@ pub async fn main() -> Result<()> { } /// Gets a list of test files to execute. If there were arguments -/// passed to the program treat them as filenames -/// +/// passed to the program treat it as a cargo test filter (substring match on filenames) fn get_test_files() -> Vec { info!("Test directory: {}", TEST_DIRECTORY); let args: Vec<_> = std::env::args().collect(); - if args.len() > 1 { - let test_path = PathBuf::from(TEST_DIRECTORY); - - // treat args after the first as filenames in the test directory - args.into_iter() + // treat args after the first as filters to run (substring matching) + let filters = if !args.is_empty() { + args.iter() .skip(1) - .map(|arg| test_path.join(arg)) + .map(|arg| arg.as_str()) .collect::>() } else { - // default to all files in test directory - std::fs::read_dir(TEST_DIRECTORY) - .unwrap() - .map(|path| path.unwrap().path()) - .collect() + vec![] + }; + + // default to all files in test directory filtering based on name + std::fs::read_dir(TEST_DIRECTORY) + .unwrap() + .map(|path| path.unwrap().path()) + .filter(|path| check_test_file(&filters, path.as_path())) + .collect() +} + +/// because this test can be run as a cargo test, commands like +/// +/// ```shell +/// cargo test foo +/// ``` +/// +/// Will end up passing `foo` as a command line argument. +/// +/// be compatible with this, treat the command line arguments as a +/// filter and that does a substring match on each input. +/// returns true f this path should be run +fn check_test_file(filters: &[&str], path: &Path) -> bool { + if filters.is_empty() { + return true; } + + // otherwise check if any filter matches + let path_str = path.to_string_lossy(); + filters.iter().any(|filter| path_str.contains(filter)) } /// Create a SessionContext, configured for the specific test From 3be240924fe3262cd4e29b6cdf3dcc450a097d5f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 5 Dec 2022 12:42:04 -0500 Subject: [PATCH 4/5] Fix typo, cargo fmt --- datafusion/core/tests/sqllogictests/src/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index 47199e0919d0d..ad7cb20e976b3 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -19,11 +19,11 @@ use async_trait::async_trait; use datafusion::arrow::csv::WriterBuilder; use datafusion::arrow::record_batch::RecordBatch; use datafusion::prelude::{SessionConfig, SessionContext}; -use log::info; -use std::path::{Path, PathBuf}; use datafusion_sql::parser::{DFParser, Statement}; +use log::info; use normalize::normalize_batch; use sqlparser::ast::Statement as SQLStatement; +use std::path::{Path, PathBuf}; use std::time::Duration; use crate::error::{DFSqlLogicTestError, Result}; @@ -166,7 +166,7 @@ async fn context_for_test_file(file_name: &str) -> SessionContext { ) } _ => { - info!("Using default SessionContex"); + info!("Using default SessionContext"); SessionContext::new() } } From fd98463a02895c35e74f36ff1134e4f40cab8b20 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 5 Dec 2022 12:44:41 -0500 Subject: [PATCH 5/5] Add a note about substring matching --- datafusion/core/tests/sqllogictests/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md index 0073bd3d925e7..648e0a3eaacb3 100644 --- a/datafusion/core/tests/sqllogictests/README.md +++ b/datafusion/core/tests/sqllogictests/README.md @@ -36,7 +36,8 @@ RUST_LOG=debug cargo test -p datafusion --test sqllogictests Run only the tests in `information_schema.slt`: ```shell -cargo test -p datafusion --test sqllogictests -- information_schema.slt +# information_schema.slt matches due to substring matching `information` +cargo test -p datafusion --test sqllogictests -- information ``` #### sqllogictests