diff --git a/.gitignore b/.gitignore index 05570eacf630c..8195760513f7c 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,9 @@ datafusion/sqllogictests/test_files/tpch/data/* # Scratch temp dir for sqllogictests datafusion/sqllogictest/test_files/scratch* +# temp file for core +datafusion/core/*.parquet + # rat filtered_rat.txt rat.txt diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index 96267eeff5a7c..fa23f5c699e2d 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -33,7 +33,8 @@ use datafusion_physical_plan::ExecutionPlan; use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; -use std::sync::{Arc, OnceLock}; +use std::path::Path; +use std::sync::Arc; use tempfile::NamedTempFile; #[tokio::test] @@ -314,12 +315,19 @@ impl TestFull { let TestData { _temp_file: _, - schema, - file_name, - file_size, + ref schema, + ref file_name, + ref file_size, } = get_test_data(); - let mut partitioned_file = PartitionedFile::new(file_name, *file_size); + let new_file_name = if cfg!(target_os = "windows") { + // Windows path separator is different from Unix + file_name.replace("\\", "/") + } else { + file_name.clone() + }; + + let mut partitioned_file = PartitionedFile::new(new_file_name, *file_size); // add the access plan, if any, as an extension if let Some(access_plan) = access_plan { @@ -355,6 +363,8 @@ impl TestFull { pretty_format_batches(&results).unwrap() ); + std::fs::remove_file(file_name).unwrap(); + Ok(MetricsFinder::find_metrics(plan.as_ref()).unwrap()) } } @@ -369,45 +379,41 @@ struct TestData { file_size: u64, } -static TEST_DATA: OnceLock = OnceLock::new(); - /// Return a parquet file with 2 row groups each with 5 rows -fn get_test_data() -> &'static TestData { - TEST_DATA.get_or_init(|| { - let scenario = Scenario::UTF8; - let row_per_group = 5; +fn get_test_data() -> TestData { + let scenario = Scenario::UTF8; + let row_per_group = 5; - let mut temp_file = tempfile::Builder::new() - .prefix("user_access_plan") - .suffix(".parquet") - .tempfile() - .expect("tempfile creation"); + let mut temp_file = tempfile::Builder::new() + .prefix("user_access_plan") + .suffix(".parquet") + .tempfile_in(Path::new("")) + .expect("tempfile creation"); - let props = WriterProperties::builder() - .set_max_row_group_size(row_per_group) - .build(); + let props = WriterProperties::builder() + .set_max_row_group_size(row_per_group) + .build(); - let batches = create_data_batch(scenario); - let schema = batches[0].schema(); + let batches = create_data_batch(scenario); + let schema = batches[0].schema(); - let mut writer = - ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap(); + let mut writer = + ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap(); - for batch in batches { - writer.write(&batch).expect("writing batch"); - } - writer.close().unwrap(); + for batch in batches { + writer.write(&batch).expect("writing batch"); + } + writer.close().unwrap(); - let file_name = temp_file.path().to_string_lossy().to_string(); - let file_size = temp_file.path().metadata().unwrap().len(); + let file_name = temp_file.path().to_string_lossy().to_string(); + let file_size = temp_file.path().metadata().unwrap().len(); - TestData { - _temp_file: temp_file, - schema, - file_name, - file_size, - } - }) + TestData { + _temp_file: temp_file, + schema, + file_name, + file_size, + } } /// Return the total value of the specified metric name diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index cd298d1c5543a..3f68222a2ce3c 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -43,8 +43,6 @@ use std::sync::Arc; use tempfile::NamedTempFile; mod custom_reader; -// Don't run on windows as tempfiles don't seem to work the same -#[cfg(not(target_os = "windows"))] mod external_access_plan; mod file_statistics; #[cfg(not(target_family = "windows"))]