From 5d6faf274bbe520f99950112b553d53408ef961e Mon Sep 17 00:00:00 2001 From: Yevhenii Melnyk Date: Mon, 23 Jan 2023 22:46:39 +0100 Subject: [PATCH 1/5] [sqllogictest] Read subdirectories in `test_files` --- .../core/tests/sqllogictests/src/main.rs | 54 ++++++++++++------- .../{ => pg_compat}/pg_compat_simple.slt | 0 .../{ => pg_compat}/pg_compat_types.slt | 0 .../{ => pg_compat}/pg_compat_union.slt | 0 .../{ => pg_compat}/pg_compat_window.slt | 0 5 files changed, 36 insertions(+), 18 deletions(-) rename datafusion/core/tests/sqllogictests/test_files/{ => pg_compat}/pg_compat_simple.slt (100%) rename datafusion/core/tests/sqllogictests/test_files/{ => pg_compat}/pg_compat_types.slt (100%) rename datafusion/core/tests/sqllogictests/test_files/{ => pg_compat}/pg_compat_union.slt (100%) rename datafusion/core/tests/sqllogictests/test_files/{ => pg_compat}/pg_compat_window.slt (100%) diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index 2f3febb25d610..a744b9f021673 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -29,7 +29,7 @@ mod engines; mod setup; mod utils; -const TEST_DIRECTORY: &str = "tests/sqllogictests/test_files"; +const TEST_DIRECTORY: &str = "tests/sqllogictests/test_files/"; const PG_COMPAT_FILE_PREFIX: &str = "pg_compat_"; #[tokio::main] @@ -47,13 +47,7 @@ pub async fn main() -> Result<(), Box> { let options = Options::new(); - let files: Vec<_> = read_test_files(&options); - - info!("Running test files {:?}", files); - - for path in files { - let file_name = path.file_name().unwrap().to_str().unwrap().to_string(); - + for (path, file_name) in read_test_files(&options) { if options.complete_mode { run_complete_file(&path, file_name).await?; } else if options.postgres_runner { @@ -110,13 +104,38 @@ async fn run_complete_file( Ok(()) } -fn read_test_files(options: &Options) -> Vec { - std::fs::read_dir(TEST_DIRECTORY) - .unwrap() - .map(|path| path.unwrap().path()) - .filter(|path| options.check_test_file(path.as_path())) - .filter(|path| options.check_pg_compat_file(path.as_path())) - .collect() +fn read_test_files<'a>( + options: &'a Options, +) -> Box + 'a> { + Box::new( + read_dir_recursive(TEST_DIRECTORY) + .map(|path| { + ( + path.clone(), + path.to_string_lossy() + .strip_prefix(TEST_DIRECTORY) + .unwrap() + .to_string(), + ) + }) + .filter(|(_, file_name)| options.check_test_file(file_name)) + .filter(|(path, _)| options.check_pg_compat_file(path.as_path())), + ) +} + +fn read_dir_recursive>(path: P) -> Box> { + Box::new( + std::fs::read_dir(path) + .expect("Readable directory") + .map(|path| path.expect("Readable entry").path()) + .flat_map(|path| { + if path.is_dir() { + read_dir_recursive(path) + } else { + Box::new(std::iter::once(path)) + } + }), + ) } /// Create a SessionContext, configured for the specific test @@ -185,14 +204,13 @@ impl Options { /// To be compatible with this, treat the command line arguments as a /// filter and that does a substring match on each input. returns /// true f this path should be run - fn check_test_file(&self, path: &Path) -> bool { + fn check_test_file(&self, file_name: &str) -> bool { if self.filters.is_empty() { return true; } // otherwise check if any filter matches - let path_str = path.to_string_lossy(); - self.filters.iter().any(|filter| path_str.contains(filter)) + self.filters.iter().any(|filter| file_name.contains(filter)) } /// Postgres runner executes only tests in files with specific names diff --git a/datafusion/core/tests/sqllogictests/test_files/pg_compat_simple.slt b/datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_simple.slt similarity index 100% rename from datafusion/core/tests/sqllogictests/test_files/pg_compat_simple.slt rename to datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_simple.slt diff --git a/datafusion/core/tests/sqllogictests/test_files/pg_compat_types.slt b/datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_types.slt similarity index 100% rename from datafusion/core/tests/sqllogictests/test_files/pg_compat_types.slt rename to datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_types.slt diff --git a/datafusion/core/tests/sqllogictests/test_files/pg_compat_union.slt b/datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_union.slt similarity index 100% rename from datafusion/core/tests/sqllogictests/test_files/pg_compat_union.slt rename to datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_union.slt diff --git a/datafusion/core/tests/sqllogictests/test_files/pg_compat_window.slt b/datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_window.slt similarity index 100% rename from datafusion/core/tests/sqllogictests/test_files/pg_compat_window.slt rename to datafusion/core/tests/sqllogictests/test_files/pg_compat/pg_compat_window.slt From a24fbfe192a288d951f19c27264ae8ee0d674a4e Mon Sep 17 00:00:00 2001 From: Yevhenii Melnyk Date: Tue, 24 Jan 2023 00:07:46 +0100 Subject: [PATCH 2/5] Update the relative path type --- .../src/engines/datafusion/mod.rs | 13 +++-- .../sqllogictests/src/engines/postgres/mod.rs | 31 ++++++---- .../core/tests/sqllogictests/src/main.rs | 56 ++++++++++--------- 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs b/datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs index cb4990a42de57..7da453d775d9e 100644 --- a/datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs +++ b/datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use std::path::PathBuf; use std::time::Duration; use sqllogictest::DBOutput; @@ -36,12 +37,12 @@ mod util; pub struct DataFusion { ctx: SessionContext, - file_name: String, + relative_path: PathBuf, } impl DataFusion { - pub fn new(ctx: SessionContext, file_name: String) -> Self { - Self { ctx, file_name } + pub fn new(ctx: SessionContext, relative_path: PathBuf) -> Self { + Self { ctx, relative_path } } } @@ -50,7 +51,11 @@ impl sqllogictest::AsyncDB for DataFusion { type Error = DFSqlLogicTestError; async fn run(&mut self, sql: &str) -> Result { - println!("[{}] Running query: \"{}\"", self.file_name, sql); + println!( + "[{}] Running query: \"{}\"", + self.relative_path.display(), + sql + ); let result = run_query(&self.ctx, sql).await?; Ok(result) } diff --git a/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs b/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs index dab5b4d38598d..6fc3341411abd 100644 --- a/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs +++ b/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use std::path::{Path, PathBuf}; use std::str::FromStr; use async_trait::async_trait; @@ -47,13 +48,13 @@ pub type Result = std::result::Result; pub struct Postgres { client: tokio_postgres::Client, join_handle: JoinHandle<()>, - /// Filename, for display purposes - file_name: String, + /// Relative test file path + relative_path: PathBuf, } impl Postgres { - /// Creates a runner for executing queries against an existing - /// posgres connection. `file_name` is used for display output + /// Creates a runner for executing queries against an existing postgres connection. + /// `relative_path` is used for display output and to create a postgres schema. /// /// The database connection details can be overridden by the /// `PG_URI` environment variable. @@ -65,9 +66,7 @@ impl Postgres { /// ``` /// /// See https://docs.rs/tokio-postgres/latest/tokio_postgres/config/struct.Config.html#url for format - pub async fn connect(file_name: impl Into) -> Result { - let file_name = file_name.into(); - + pub async fn connect(relative_path: PathBuf) -> Result { let uri = std::env::var("PG_URI").map_or(PG_URI.to_string(), std::convert::identity); @@ -89,7 +88,7 @@ impl Postgres { } }); - let schema = schema_name(&file_name); + let schema = schema_name(&relative_path); // create a new clean schema for running the test debug!("Creating new empty schema '{schema}'"); @@ -108,7 +107,7 @@ impl Postgres { Ok(Self { client, join_handle, - file_name, + relative_path, }) } @@ -188,8 +187,12 @@ fn no_quotes(t: &str) -> &str { /// Given a file name like pg_compat_foo.slt /// return a schema name -fn schema_name(file_name: &str) -> &str { - file_name +fn schema_name(relative_path: &Path) -> &str { + relative_path + .file_name() + .unwrap() + .to_str() + .unwrap() .split('.') .next() .unwrap_or("default_schema") @@ -249,7 +252,11 @@ impl sqllogictest::AsyncDB for Postgres { type Error = Error; async fn run(&mut self, sql: &str) -> Result { - println!("[{}] Running query: \"{}\"", self.file_name, sql); + println!( + "[{}] Running query: \"{}\"", + self.relative_path.display(), + sql + ); let lower_sql = sql.trim_start().to_ascii_lowercase(); diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index a744b9f021673..4fb6cc26ad146 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -47,34 +47,37 @@ pub async fn main() -> Result<(), Box> { let options = Options::new(); - for (path, file_name) in read_test_files(&options) { + for (path, relative_path) in read_test_files(&options) { if options.complete_mode { - run_complete_file(&path, file_name).await?; + run_complete_file(&path, relative_path).await?; } else if options.postgres_runner { - run_test_file_with_postgres(&path, file_name).await?; + run_test_file_with_postgres(&path, relative_path).await?; } else { - run_test_file(&path, file_name).await?; + run_test_file(&path, relative_path).await?; } } Ok(()) } -async fn run_test_file(path: &PathBuf, file_name: String) -> Result<(), Box> { - println!("Running with DataFusion runner: {}", path.display()); - let ctx = context_for_test_file(&file_name).await; - let mut runner = sqllogictest::Runner::new(DataFusion::new(ctx, file_name)); +async fn run_test_file( + path: &Path, + relative_path: PathBuf, +) -> Result<(), Box> { + info!("Running with DataFusion runner: {}", path.display()); + let ctx = context_for_test_file(&relative_path).await; + let mut runner = sqllogictest::Runner::new(DataFusion::new(ctx, relative_path)); runner.run_file_async(path).await?; Ok(()) } async fn run_test_file_with_postgres( - path: &PathBuf, - file_name: String, + path: &Path, + relative_path: PathBuf, ) -> Result<(), Box> { info!("Running with Postgres runner: {}", path.display()); - let postgres_client = Postgres::connect(file_name).await?; + let postgres_client = Postgres::connect(relative_path).await?; sqllogictest::Runner::new(postgres_client) .run_file_async(path) @@ -84,17 +87,15 @@ async fn run_test_file_with_postgres( } async fn run_complete_file( - path: &PathBuf, - file_name: String, + path: &Path, + relative_path: PathBuf, ) -> Result<(), Box> { use sqllogictest::{default_validator, update_test_file}; info!("Using complete mode to complete: {}", path.display()); - let ctx = context_for_test_file(&file_name).await; - let mut runner = sqllogictest::Runner::new(DataFusion::new(ctx, file_name)); - - info!("Using complete mode to complete {}", path.display()); + let ctx = context_for_test_file(&relative_path).await; + let mut runner = sqllogictest::Runner::new(DataFusion::new(ctx, relative_path)); let col_separator = " "; let validator = default_validator; update_test_file(path, &mut runner, col_separator, validator) @@ -106,19 +107,18 @@ async fn run_complete_file( fn read_test_files<'a>( options: &'a Options, -) -> Box + 'a> { +) -> Box + 'a> { Box::new( read_dir_recursive(TEST_DIRECTORY) .map(|path| { ( path.clone(), - path.to_string_lossy() - .strip_prefix(TEST_DIRECTORY) - .unwrap() - .to_string(), + PathBuf::from( + path.to_string_lossy().strip_prefix(TEST_DIRECTORY).unwrap(), + ), ) }) - .filter(|(_, file_name)| options.check_test_file(file_name)) + .filter(|(_, relative_path)| options.check_test_file(relative_path)) .filter(|(path, _)| options.check_pg_compat_file(path.as_path())), ) } @@ -139,8 +139,8 @@ fn read_dir_recursive>(path: P) -> Box SessionContext { - match file_name { +async fn context_for_test_file(relative_path: &Path) -> SessionContext { + match relative_path.file_name().unwrap().to_str().unwrap() { "aggregate.slt" | "select.slt" => { info!("Registering aggregate tables"); let ctx = SessionContext::new(); @@ -204,13 +204,15 @@ impl Options { /// To be compatible with this, treat the command line arguments as a /// filter and that does a substring match on each input. returns /// true f this path should be run - fn check_test_file(&self, file_name: &str) -> bool { + fn check_test_file(&self, relative_path: &Path) -> bool { if self.filters.is_empty() { return true; } // otherwise check if any filter matches - self.filters.iter().any(|filter| file_name.contains(filter)) + self.filters + .iter() + .any(|filter| relative_path.to_string_lossy().contains(filter)) } /// Postgres runner executes only tests in files with specific names From 276db164c47936371ae082eee60f56e1415d60de Mon Sep 17 00:00:00 2001 From: Yevhenii Melnyk Date: Tue, 24 Jan 2023 17:53:56 +0100 Subject: [PATCH 3/5] expect instead of unwrap --- .../core/tests/sqllogictests/src/engines/postgres/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs b/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs index 6fc3341411abd..dc2947d3c9fd5 100644 --- a/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs +++ b/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs @@ -190,9 +190,9 @@ fn no_quotes(t: &str) -> &str { fn schema_name(relative_path: &Path) -> &str { relative_path .file_name() - .unwrap() - .to_str() - .unwrap() + .map(|name| name.to_str()) + .flatten() + .expect("A file with a UTF-8 name") .split('.') .next() .unwrap_or("default_schema") From 93f4ffde681e73a2e8ac0ba6269a7076ef5de236 Mon Sep 17 00:00:00 2001 From: Yevhenii Melnyk Date: Tue, 24 Jan 2023 18:07:39 +0100 Subject: [PATCH 4/5] Keep only ascii characters in Postgres schema name --- .../sqllogictests/src/engines/postgres/mod.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs b/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs index dc2947d3c9fd5..4023a524aef4b 100644 --- a/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs +++ b/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs @@ -187,16 +187,18 @@ fn no_quotes(t: &str) -> &str { /// Given a file name like pg_compat_foo.slt /// return a schema name -fn schema_name(relative_path: &Path) -> &str { +fn schema_name(relative_path: &Path) -> String { relative_path .file_name() - .map(|name| name.to_str()) - .flatten() - .expect("A file with a UTF-8 name") - .split('.') - .next() - .unwrap_or("default_schema") - .trim_start_matches("pg_") + .map(|name| { + name.to_string_lossy() + .chars() + .filter(|ch| ch.is_ascii_alphabetic()) + .collect::() + .trim_start_matches("pg_") + .to_string() + }) + .unwrap_or("default_schema".to_string()) } impl Drop for Postgres { From f8b9d4747e3b706d70dec61bc2574bf312292b72 Mon Sep 17 00:00:00 2001 From: Yevhenii Melnyk Date: Tue, 24 Jan 2023 23:23:49 +0100 Subject: [PATCH 5/5] clippy --- datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs b/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs index 4023a524aef4b..cfefecf69b7f2 100644 --- a/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs +++ b/datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs @@ -198,7 +198,7 @@ fn schema_name(relative_path: &Path) -> String { .trim_start_matches("pg_") .to_string() }) - .unwrap_or("default_schema".to_string()) + .unwrap_or_else(|| "default_schema".to_string()) } impl Drop for Postgres {