From 32acd9dfd80f08df0d5c8663a48f1c2a0ab39781 Mon Sep 17 00:00:00 2001 From: zhuliquan Date: Sat, 23 Nov 2024 00:34:54 +0800 Subject: [PATCH 1/3] test: allow external_access_plan run on windows --- datafusion/core/tests/parquet/external_access_plan.rs | 9 ++++++++- datafusion/core/tests/parquet/mod.rs | 2 -- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index 96267eeff5a7c..f391e730bddc1 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -319,7 +319,14 @@ impl TestFull { file_size, } = get_test_data(); - let mut partitioned_file = PartitionedFile::new(file_name, *file_size); + let new_file_name = if cfg!(target_os = "windows") { + // Windows path separator is different from Unix + file_name.replace("\\", "/") + } else { + file_name.clone() + }; + + let mut partitioned_file = PartitionedFile::new(new_file_name, *file_size); // add the access plan, if any, as an extension if let Some(access_plan) = access_plan { diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index cd298d1c5543a..3f68222a2ce3c 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -43,8 +43,6 @@ use std::sync::Arc; use tempfile::NamedTempFile; mod custom_reader; -// Don't run on windows as tempfiles don't seem to work the same -#[cfg(not(target_os = "windows"))] mod external_access_plan; mod file_statistics; #[cfg(not(target_family = "windows"))] From b65d2a3a770596bc33d90f11a908ae29065068bf Mon Sep 17 00:00:00 2001 From: zhuliquan Date: Sat, 23 Nov 2024 20:10:32 +0800 Subject: [PATCH 2/3] fix: avoid '%' in path for external_access_plan.rs and remove temp file --- .gitignore | 3 + .../tests/parquet/external_access_plan.rs | 67 ++++++++++--------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index 05570eacf630c..8195760513f7c 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,9 @@ datafusion/sqllogictests/test_files/tpch/data/* # Scratch temp dir for sqllogictests datafusion/sqllogictest/test_files/scratch* +# temp file for core +datafusion/core/*.parquet + # rat filtered_rat.txt rat.txt diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index f391e730bddc1..6dbde30dc6eb0 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -33,6 +33,7 @@ use datafusion_physical_plan::ExecutionPlan; use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; +use std::path::Path; use std::sync::{Arc, OnceLock}; use tempfile::NamedTempFile; @@ -314,9 +315,9 @@ impl TestFull { let TestData { _temp_file: _, - schema, - file_name, - file_size, + ref schema, + ref file_name, + ref file_size, } = get_test_data(); let new_file_name = if cfg!(target_os = "windows") { @@ -362,6 +363,8 @@ impl TestFull { pretty_format_batches(&results).unwrap() ); + std::fs::remove_file(file_name).unwrap(); + Ok(MetricsFinder::find_metrics(plan.as_ref()).unwrap()) } } @@ -376,45 +379,43 @@ struct TestData { file_size: u64, } -static TEST_DATA: OnceLock = OnceLock::new(); +static _TEST_DATA: OnceLock = OnceLock::new(); /// Return a parquet file with 2 row groups each with 5 rows -fn get_test_data() -> &'static TestData { - TEST_DATA.get_or_init(|| { - let scenario = Scenario::UTF8; - let row_per_group = 5; +fn get_test_data() -> TestData { + let scenario = Scenario::UTF8; + let row_per_group = 5; - let mut temp_file = tempfile::Builder::new() - .prefix("user_access_plan") - .suffix(".parquet") - .tempfile() - .expect("tempfile creation"); + let mut temp_file = tempfile::Builder::new() + .prefix("user_access_plan") + .suffix(".parquet") + .tempfile_in(Path::new("")) + .expect("tempfile creation"); - let props = WriterProperties::builder() - .set_max_row_group_size(row_per_group) - .build(); + let props = WriterProperties::builder() + .set_max_row_group_size(row_per_group) + .build(); - let batches = create_data_batch(scenario); - let schema = batches[0].schema(); + let batches = create_data_batch(scenario); + let schema = batches[0].schema(); - let mut writer = - ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap(); + let mut writer = + ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap(); - for batch in batches { - writer.write(&batch).expect("writing batch"); - } - writer.close().unwrap(); + for batch in batches { + writer.write(&batch).expect("writing batch"); + } + writer.close().unwrap(); - let file_name = temp_file.path().to_string_lossy().to_string(); - let file_size = temp_file.path().metadata().unwrap().len(); + let file_name = temp_file.path().to_string_lossy().to_string(); + let file_size = temp_file.path().metadata().unwrap().len(); - TestData { - _temp_file: temp_file, - schema, - file_name, - file_size, - } - }) + TestData { + _temp_file: temp_file, + schema, + file_name, + file_size, + } } /// Return the total value of the specified metric name From 9696952a5d084edfc908b11fb2a82c876539c9e2 Mon Sep 17 00:00:00 2001 From: zhuliquan Date: Mon, 25 Nov 2024 21:28:58 +0800 Subject: [PATCH 3/3] minor: remove singleton TEST_DATA for external_access_plan --- datafusion/core/tests/parquet/external_access_plan.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index 6dbde30dc6eb0..fa23f5c699e2d 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -34,7 +34,7 @@ use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; use std::path::Path; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use tempfile::NamedTempFile; #[tokio::test] @@ -379,8 +379,6 @@ struct TestData { file_size: u64, } -static _TEST_DATA: OnceLock = OnceLock::new(); - /// Return a parquet file with 2 row groups each with 5 rows fn get_test_data() -> TestData { let scenario = Scenario::UTF8;