From 78ee392940a99597b2485465d0ad8dfccd06d6de Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 5 Jun 2024 11:54:12 +0200 Subject: [PATCH 1/6] feat: add make_batch + basic test --- .../core/tests/parquet/arrow_statistics.rs | 44 ++++++++++- datafusion/core/tests/parquet/mod.rs | 78 ++++++++++++++++++- 2 files changed, 117 insertions(+), 5 deletions(-) diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index 19cc4db4d20e7..dd91014f3368b 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -24,14 +24,14 @@ use std::sync::Arc; use crate::parquet::{struct_array, Scenario}; use arrow::compute::kernels::cast_utils::Parser; use arrow::datatypes::{ - i256, Date32Type, Date64Type, TimestampMicrosecondType, TimestampMillisecondType, - TimestampNanosecondType, TimestampSecondType, + i256, Date32Type, Date64Type, IntervalYearMonthType, TimestampMicrosecondType, + TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; use arrow_array::{ make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array, Float32Array, - Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, - LargeStringArray, RecordBatch, StringArray, Time32MillisecondArray, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalYearMonthArray, + LargeBinaryArray, LargeStringArray, RecordBatch, StringArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, @@ -1073,6 +1073,42 @@ async fn test_dates_64_diff_rg_sizes() { .run(); } +#[tokio::test] +#[should_panic] +// Statistics for `Intervals` are not supported yet, see for ref: +// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#interval +async fn test_interval_diff_rg_sizes() { + // This creates a parquet files of 3 columns: + // "year_month" --> IntervalYearMonthArray + // "day_time" --> IntervalDayTimeArray + // "month_day_nano" --> IntervalMonthDayNanoArray + // + // The file is created by 4 record batches (each has a null row) + // each has 5 rows but then will be split into 2 row groups with size 13, 7 + let reader = TestReader { + scenario: Scenario::Interval, + row_per_group: 13, + } + .build() + .await; + + Test { + reader: &reader, + expected_min: Arc::new(IntervalYearMonthArray::from(vec![ + IntervalYearMonthType::make_value(1, 1), + IntervalYearMonthType::make_value(4, 4), + ])), + expected_max: Arc::new(IntervalYearMonthArray::from(vec![ + IntervalYearMonthType::make_value(6, 6), + IntervalYearMonthType::make_value(8, 8), + ])), + expected_null_counts: UInt64Array::from(vec![2, 2]), + expected_row_counts: UInt64Array::from(vec![13, 7]), + column_name: "year_month", + } + .run(); +} + #[tokio::test] async fn test_uint() { // This creates a parquet files of 4 columns named "u8", "u16", "u32", "u64" diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index f36a9a194a8f5..52a2ef594eec1 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -17,7 +17,9 @@ //! Parquet integration tests use arrow::array::Decimal128Array; -use arrow::datatypes::i256; +use arrow::datatypes::{ + i256, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType, +}; use arrow::{ array::{ make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, @@ -33,6 +35,10 @@ use arrow::{ record_batch::RecordBatch, util::pretty::pretty_format_batches, }; +use arrow_array::{ + IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, +}; +use arrow_schema::IntervalUnit; use chrono::{Datelike, Duration, TimeDelta}; use datafusion::{ datasource::{physical_plan::ParquetExec, provider_as_source, TableProvider}, @@ -80,6 +86,7 @@ enum Scenario { Time32Millisecond, Time64Nanosecond, Time64Microsecond, + Interval, /// 7 Rows, for each i8, i16, i32, i64, u8, u16, u32, u64, f32, f64 /// -MIN, -100, -1, 0, 1, 100, MAX NumericLimits, @@ -925,6 +932,69 @@ fn make_dict_batch() -> RecordBatch { .unwrap() } +fn make_interval_batch(offset: i32) -> RecordBatch { + let schema = Schema::new(vec![ + Field::new( + "year_month", + DataType::Interval(IntervalUnit::YearMonth), + true, + ), + Field::new("day_time", DataType::Interval(IntervalUnit::DayTime), true), + Field::new( + "month_day_nano", + DataType::Interval(IntervalUnit::MonthDayNano), + true, + ), + ]); + let schema = Arc::new(schema); + + let ym_arr = IntervalYearMonthArray::from(vec![ + Some(IntervalYearMonthType::make_value(1 + offset, 1 + offset)), + Some(IntervalYearMonthType::make_value(2 + offset, 2 + offset)), + Some(IntervalYearMonthType::make_value(3 + offset, 3 + offset)), + None, + Some(IntervalYearMonthType::make_value(5 + offset, 5 + offset)), + ]); + + let dt_arr = IntervalDayTimeArray::from(vec![ + Some(IntervalDayTimeType::make_value(1 + offset, 1 + offset)), + Some(IntervalDayTimeType::make_value(2 + offset, 2 + offset)), + Some(IntervalDayTimeType::make_value(3 + offset, 3 + offset)), + None, + Some(IntervalDayTimeType::make_value(5 + offset, 5 + offset)), + ]); + + let mdn_arr = IntervalMonthDayNanoArray::from(vec![ + Some(IntervalMonthDayNanoType::make_value( + 1 + offset, + 1 + offset, + 1 + (offset as i64), + )), + Some(IntervalMonthDayNanoType::make_value( + 2 + offset, + 2 + offset, + 2 + (offset as i64), + )), + Some(IntervalMonthDayNanoType::make_value( + 3 + offset, + 3 + offset, + 3 + (offset as i64), + )), + None, + Some(IntervalMonthDayNanoType::make_value( + 5 + offset, + 5 + offset, + 5 + (offset as i64), + )), + ]); + + RecordBatch::try_new( + schema, + vec![Arc::new(ym_arr), Arc::new(dt_arr), Arc::new(mdn_arr)], + ) + .unwrap() +} + fn create_data_batch(scenario: Scenario) -> Vec { match scenario { Scenario::Boolean => { @@ -1346,6 +1416,12 @@ fn create_data_batch(scenario: Scenario) -> Vec { ]), ] } + Scenario::Interval => vec![ + make_interval_batch(0), + make_interval_batch(1), + make_interval_batch(2), + make_interval_batch(3), + ], } } From afb994581e1bb0758a7338027d89f47369914d00 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 5 Jun 2024 12:07:08 +0200 Subject: [PATCH 2/6] feat: add get_statistics stub --- .../src/datasource/physical_plan/parquet/statistics.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index 8d0d30bf41fc6..6b336f169e25a 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -256,6 +256,13 @@ macro_rules! get_statistic { Some(DataType::Float16) => { Some(ScalarValue::Float16(from_bytes_to_f16(s.$bytes_func()))) } + Some(DataType::Interval(unit)) => { + match unit { + IntervalUnit::YearMonth => unimplemented!("Interval statistics not yet supported by parquet") + IntervalUnit::DayTime => unimplemented!("Interval statistics not yet supported by parquet") + IntervalUnit::MonthDayNano => unimplemented!("Interval statistics not yet supported by parquet") + } + } _ => None, } } From 73e70a2e4c537a158b3601ab86a37f985d860f4a Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 5 Jun 2024 12:07:18 +0200 Subject: [PATCH 3/6] chore: add comment --- datafusion/core/tests/parquet/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index 52a2ef594eec1..97a28e2e17e20 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -964,6 +964,8 @@ fn make_interval_batch(offset: i32) -> RecordBatch { Some(IntervalDayTimeType::make_value(5 + offset, 5 + offset)), ]); + // Not yet implemented, see for ref: + // https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747 let mdn_arr = IntervalMonthDayNanoArray::from(vec![ Some(IntervalMonthDayNanoType::make_value( 1 + offset, From ba3b8ead3ab64643d940a21e9f4ccacd0a7375ae Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 5 Jun 2024 12:19:02 +0200 Subject: [PATCH 4/6] feat: add test cases --- .../physical_plan/parquet/statistics.rs | 9 ++-- .../core/tests/parquet/arrow_statistics.rs | 42 +++++++++++++++++-- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index 6b336f169e25a..9f7eab1f50d63 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -19,7 +19,10 @@ // TODO: potentially move this to arrow-rs: https://github.com/apache/arrow-rs/issues/4328 -use arrow::{array::ArrayRef, datatypes::i256, datatypes::DataType, datatypes::TimeUnit}; +use arrow::{ + array::ArrayRef, datatypes::i256, datatypes::DataType, datatypes::IntervalUnit, + datatypes::TimeUnit, +}; use arrow_array::{new_empty_array, new_null_array, UInt64Array}; use arrow_schema::{Field, FieldRef, Schema}; use datafusion_common::{ @@ -258,8 +261,8 @@ macro_rules! get_statistic { } Some(DataType::Interval(unit)) => { match unit { - IntervalUnit::YearMonth => unimplemented!("Interval statistics not yet supported by parquet") - IntervalUnit::DayTime => unimplemented!("Interval statistics not yet supported by parquet") + IntervalUnit::YearMonth => unimplemented!("Interval statistics not yet supported by parquet"), + IntervalUnit::DayTime => unimplemented!("Interval statistics not yet supported by parquet"), IntervalUnit::MonthDayNano => unimplemented!("Interval statistics not yet supported by parquet") } } diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index dd91014f3368b..0b49178551d86 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -24,14 +24,16 @@ use std::sync::Arc; use crate::parquet::{struct_array, Scenario}; use arrow::compute::kernels::cast_utils::Parser; use arrow::datatypes::{ - i256, Date32Type, Date64Type, IntervalYearMonthType, TimestampMicrosecondType, - TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, + i256, Date32Type, Date64Type, IntervalDayTimeType, IntervalMonthDayNanoType, + IntervalYearMonthType, TimestampMicrosecondType, TimestampMillisecondType, + TimestampNanosecondType, TimestampSecondType, }; use arrow_array::{ make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array, Float32Array, - Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalYearMonthArray, - LargeBinaryArray, LargeStringArray, RecordBatch, StringArray, Time32MillisecondArray, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray, + IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, + LargeStringArray, RecordBatch, StringArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, @@ -1107,6 +1109,38 @@ async fn test_interval_diff_rg_sizes() { column_name: "year_month", } .run(); + + Test { + reader: &reader, + expected_min: Arc::new(IntervalDayTimeArray::from(vec![ + IntervalDayTimeType::make_value(1, 1), + IntervalDayTimeType::make_value(4, 4), + ])), + expected_max: Arc::new(IntervalDayTimeArray::from(vec![ + IntervalDayTimeType::make_value(6, 6), + IntervalDayTimeType::make_value(8, 8), + ])), + expected_null_counts: UInt64Array::from(vec![2, 2]), + expected_row_counts: UInt64Array::from(vec![13, 7]), + column_name: "day_time", + } + .run(); + + Test { + reader: &reader, + expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![ + IntervalMonthDayNanoType::make_value(1, 1, 1), + IntervalMonthDayNanoType::make_value(4, 4, 4), + ])), + expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![ + IntervalMonthDayNanoType::make_value(6, 6, 6), + IntervalMonthDayNanoType::make_value(8, 8, 8), + ])), + expected_null_counts: UInt64Array::from(vec![2, 2]), + expected_row_counts: UInt64Array::from(vec![13, 7]), + column_name: "month_day_nano", + } + .run(); } #[tokio::test] From 9cf0c642de615902ef71d14f7e6853a9b35715cc Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 5 Jun 2024 20:09:31 +0200 Subject: [PATCH 5/6] fix: use different values for multiple fields --- .../core/tests/parquet/arrow_statistics.rs | 24 +++++++------- datafusion/core/tests/parquet/mod.rs | 32 +++++++++---------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index 0a4b9e559525e..96697dbcb8005 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -1096,12 +1096,12 @@ async fn test_interval_diff_rg_sizes() { Test { reader: &reader, expected_min: Arc::new(IntervalYearMonthArray::from(vec![ - IntervalYearMonthType::make_value(1, 1), - IntervalYearMonthType::make_value(4, 4), + IntervalYearMonthType::make_value(1, 10), + IntervalYearMonthType::make_value(4, 13), ])), expected_max: Arc::new(IntervalYearMonthArray::from(vec![ - IntervalYearMonthType::make_value(6, 6), - IntervalYearMonthType::make_value(8, 8), + IntervalYearMonthType::make_value(6, 51), + IntervalYearMonthType::make_value(8, 53), ])), expected_null_counts: UInt64Array::from(vec![2, 2]), expected_row_counts: UInt64Array::from(vec![13, 7]), @@ -1112,12 +1112,12 @@ async fn test_interval_diff_rg_sizes() { Test { reader: &reader, expected_min: Arc::new(IntervalDayTimeArray::from(vec![ - IntervalDayTimeType::make_value(1, 1), - IntervalDayTimeType::make_value(4, 4), + IntervalDayTimeType::make_value(1, 10), + IntervalDayTimeType::make_value(4, 13), ])), expected_max: Arc::new(IntervalDayTimeArray::from(vec![ - IntervalDayTimeType::make_value(6, 6), - IntervalDayTimeType::make_value(8, 8), + IntervalDayTimeType::make_value(6, 51), + IntervalDayTimeType::make_value(8, 53), ])), expected_null_counts: UInt64Array::from(vec![2, 2]), expected_row_counts: UInt64Array::from(vec![13, 7]), @@ -1128,12 +1128,12 @@ async fn test_interval_diff_rg_sizes() { Test { reader: &reader, expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![ - IntervalMonthDayNanoType::make_value(1, 1, 1), - IntervalMonthDayNanoType::make_value(4, 4, 4), + IntervalMonthDayNanoType::make_value(1, 10, 100), + IntervalMonthDayNanoType::make_value(4, 13, 103), ])), expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![ - IntervalMonthDayNanoType::make_value(6, 6, 6), - IntervalMonthDayNanoType::make_value(8, 8, 8), + IntervalMonthDayNanoType::make_value(6, 51, 501), + IntervalMonthDayNanoType::make_value(8, 53, 503), ])), expected_null_counts: UInt64Array::from(vec![2, 2]), expected_row_counts: UInt64Array::from(vec![13, 7]), diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index 97a28e2e17e20..44bd822104882 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -949,19 +949,19 @@ fn make_interval_batch(offset: i32) -> RecordBatch { let schema = Arc::new(schema); let ym_arr = IntervalYearMonthArray::from(vec![ - Some(IntervalYearMonthType::make_value(1 + offset, 1 + offset)), - Some(IntervalYearMonthType::make_value(2 + offset, 2 + offset)), - Some(IntervalYearMonthType::make_value(3 + offset, 3 + offset)), + Some(IntervalYearMonthType::make_value(1 + offset, 10 + offset)), + Some(IntervalYearMonthType::make_value(2 + offset, 20 + offset)), + Some(IntervalYearMonthType::make_value(3 + offset, 30 + offset)), None, - Some(IntervalYearMonthType::make_value(5 + offset, 5 + offset)), + Some(IntervalYearMonthType::make_value(5 + offset, 50 + offset)), ]); let dt_arr = IntervalDayTimeArray::from(vec![ - Some(IntervalDayTimeType::make_value(1 + offset, 1 + offset)), - Some(IntervalDayTimeType::make_value(2 + offset, 2 + offset)), - Some(IntervalDayTimeType::make_value(3 + offset, 3 + offset)), + Some(IntervalDayTimeType::make_value(1 + offset, 10 + offset)), + Some(IntervalDayTimeType::make_value(2 + offset, 20 + offset)), + Some(IntervalDayTimeType::make_value(3 + offset, 30 + offset)), None, - Some(IntervalDayTimeType::make_value(5 + offset, 5 + offset)), + Some(IntervalDayTimeType::make_value(5 + offset, 50 + offset)), ]); // Not yet implemented, see for ref: @@ -969,24 +969,24 @@ fn make_interval_batch(offset: i32) -> RecordBatch { let mdn_arr = IntervalMonthDayNanoArray::from(vec![ Some(IntervalMonthDayNanoType::make_value( 1 + offset, - 1 + offset, - 1 + (offset as i64), + 10 + offset, + 100 + (offset as i64), )), Some(IntervalMonthDayNanoType::make_value( 2 + offset, - 2 + offset, - 2 + (offset as i64), + 20 + offset, + 200 + (offset as i64), )), Some(IntervalMonthDayNanoType::make_value( 3 + offset, - 3 + offset, - 3 + (offset as i64), + 30 + offset, + 300 + (offset as i64), )), None, Some(IntervalMonthDayNanoType::make_value( 5 + offset, - 5 + offset, - 5 + (offset as i64), + 50 + offset, + 500 + (offset as i64), )), ]); From 090be376baa2dd63c86c03dd64c04a81d59246bf Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 5 Jun 2024 20:31:30 +0200 Subject: [PATCH 6/6] fix: test-cases + comments --- .../core/tests/parquet/arrow_statistics.rs | 65 +++++++++++-------- datafusion/core/tests/parquet/mod.rs | 2 +- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index 96697dbcb8005..b378b2a6c3df6 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -24,8 +24,7 @@ use std::sync::Arc; use crate::parquet::{struct_array, Scenario}; use arrow::compute::kernels::cast_utils::Parser; use arrow::datatypes::{ - i256, Date32Type, Date64Type, IntervalDayTimeType, IntervalMonthDayNanoType, - IntervalYearMonthType, TimestampMicrosecondType, TimestampMillisecondType, + i256, Date32Type, Date64Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; use arrow_array::{ @@ -1076,8 +1075,11 @@ async fn test_dates_64_diff_rg_sizes() { #[tokio::test] #[should_panic] -// Statistics for `Intervals` are not supported yet, see for ref: -// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#interval +// Currently this test `should_panic` since statistics for `Intervals` +// are not supported and `IntervalMonthDayNano` cannot be written +// to parquet yet. +// Refer to issue: https://github.com/apache/arrow-rs/issues/5847 +// and https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747 async fn test_interval_diff_rg_sizes() { // This creates a parquet files of 3 columns: // "year_month" --> IntervalYearMonthArray @@ -1093,48 +1095,55 @@ async fn test_interval_diff_rg_sizes() { .build() .await; + // TODO: expected values need to be changed once issue is resolved + // expected_min: Arc::new(IntervalYearMonthArray::from(vec![ + // IntervalYearMonthType::make_value(1, 10), + // IntervalYearMonthType::make_value(4, 13), + // ])), + // expected_max: Arc::new(IntervalYearMonthArray::from(vec![ + // IntervalYearMonthType::make_value(6, 51), + // IntervalYearMonthType::make_value(8, 53), + // ])), Test { reader: &reader, - expected_min: Arc::new(IntervalYearMonthArray::from(vec![ - IntervalYearMonthType::make_value(1, 10), - IntervalYearMonthType::make_value(4, 13), - ])), - expected_max: Arc::new(IntervalYearMonthArray::from(vec![ - IntervalYearMonthType::make_value(6, 51), - IntervalYearMonthType::make_value(8, 53), - ])), + expected_min: Arc::new(IntervalYearMonthArray::from(vec![None, None])), + expected_max: Arc::new(IntervalYearMonthArray::from(vec![None, None])), expected_null_counts: UInt64Array::from(vec![2, 2]), expected_row_counts: UInt64Array::from(vec![13, 7]), column_name: "year_month", } .run(); + // expected_min: Arc::new(IntervalDayTimeArray::from(vec![ + // IntervalDayTimeType::make_value(1, 10), + // IntervalDayTimeType::make_value(4, 13), + // ])), + // expected_max: Arc::new(IntervalDayTimeArray::from(vec![ + // IntervalDayTimeType::make_value(6, 51), + // IntervalDayTimeType::make_value(8, 53), + // ])), Test { reader: &reader, - expected_min: Arc::new(IntervalDayTimeArray::from(vec![ - IntervalDayTimeType::make_value(1, 10), - IntervalDayTimeType::make_value(4, 13), - ])), - expected_max: Arc::new(IntervalDayTimeArray::from(vec![ - IntervalDayTimeType::make_value(6, 51), - IntervalDayTimeType::make_value(8, 53), - ])), + expected_min: Arc::new(IntervalDayTimeArray::from(vec![None, None])), + expected_max: Arc::new(IntervalDayTimeArray::from(vec![None, None])), expected_null_counts: UInt64Array::from(vec![2, 2]), expected_row_counts: UInt64Array::from(vec![13, 7]), column_name: "day_time", } .run(); + // expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![ + // IntervalMonthDayNanoType::make_value(1, 10, 100), + // IntervalMonthDayNanoType::make_value(4, 13, 103), + // ])), + // expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![ + // IntervalMonthDayNanoType::make_value(6, 51, 501), + // IntervalMonthDayNanoType::make_value(8, 53, 503), + // ])), Test { reader: &reader, - expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![ - IntervalMonthDayNanoType::make_value(1, 10, 100), - IntervalMonthDayNanoType::make_value(4, 13, 103), - ])), - expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![ - IntervalMonthDayNanoType::make_value(6, 51, 501), - IntervalMonthDayNanoType::make_value(8, 53, 503), - ])), + expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![None, None])), + expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![None, None])), expected_null_counts: UInt64Array::from(vec![2, 2]), expected_row_counts: UInt64Array::from(vec![13, 7]), column_name: "month_day_nano", diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index 44bd822104882..99769a3367228 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -964,7 +964,7 @@ fn make_interval_batch(offset: i32) -> RecordBatch { Some(IntervalDayTimeType::make_value(5 + offset, 50 + offset)), ]); - // Not yet implemented, see for ref: + // Not yet implemented, refer to: // https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747 let mdn_arr = IntervalMonthDayNanoArray::from(vec![ Some(IntervalMonthDayNanoType::make_value(