diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index 0e23e6824027c..2ea18d7cf8232 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -30,8 +30,7 @@ use arrow::datatypes::{ use arrow_array::{ make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array, Float32Array, - Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray, - IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, RecordBatch, StringArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, @@ -1061,84 +1060,6 @@ async fn test_dates_64_diff_rg_sizes() { .run(); } -#[tokio::test] -#[should_panic] -// Currently this test `should_panic` since statistics for `Intervals` -// are not supported and `IntervalMonthDayNano` cannot be written -// to parquet yet. -// Refer to issue: https://github.com/apache/arrow-rs/issues/5847 -// and https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747 -async fn test_interval_diff_rg_sizes() { - // This creates a parquet files of 3 columns: - // "year_month" --> IntervalYearMonthArray - // "day_time" --> IntervalDayTimeArray - // "month_day_nano" --> IntervalMonthDayNanoArray - // - // The file is created by 4 record batches (each has a null row) - // each has 5 rows but then will be split into 2 row groups with size 13, 7 - let reader = TestReader { - scenario: Scenario::Interval, - row_per_group: 13, - } - .build() - .await; - - // TODO: expected values need to be changed once issue is resolved - // expected_min: Arc::new(IntervalYearMonthArray::from(vec![ - // IntervalYearMonthType::make_value(1, 10), - // IntervalYearMonthType::make_value(4, 13), - // ])), - // expected_max: Arc::new(IntervalYearMonthArray::from(vec![ - // IntervalYearMonthType::make_value(6, 51), - // IntervalYearMonthType::make_value(8, 53), - // ])), - Test { - reader: &reader, - expected_min: Arc::new(IntervalYearMonthArray::from(vec![None, None])), - expected_max: Arc::new(IntervalYearMonthArray::from(vec![None, None])), - expected_null_counts: UInt64Array::from(vec![2, 2]), - expected_row_counts: UInt64Array::from(vec![13, 7]), - column_name: "year_month", - } - .run(); - - // expected_min: Arc::new(IntervalDayTimeArray::from(vec![ - // IntervalDayTimeType::make_value(1, 10), - // IntervalDayTimeType::make_value(4, 13), - // ])), - // expected_max: Arc::new(IntervalDayTimeArray::from(vec![ - // IntervalDayTimeType::make_value(6, 51), - // IntervalDayTimeType::make_value(8, 53), - // ])), - Test { - reader: &reader, - expected_min: Arc::new(IntervalDayTimeArray::from(vec![None, None])), - expected_max: Arc::new(IntervalDayTimeArray::from(vec![None, None])), - expected_null_counts: UInt64Array::from(vec![2, 2]), - expected_row_counts: UInt64Array::from(vec![13, 7]), - column_name: "day_time", - } - .run(); - - // expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![ - // IntervalMonthDayNanoType::make_value(1, 10, 100), - // IntervalMonthDayNanoType::make_value(4, 13, 103), - // ])), - // expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![ - // IntervalMonthDayNanoType::make_value(6, 51, 501), - // IntervalMonthDayNanoType::make_value(8, 53, 503), - // ])), - Test { - reader: &reader, - expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![None, None])), - expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![None, None])), - expected_null_counts: UInt64Array::from(vec![2, 2]), - expected_row_counts: UInt64Array::from(vec![13, 7]), - column_name: "month_day_nano", - } - .run(); -} - #[tokio::test] async fn test_uint() { // This creates a parquet files of 4 columns named "u8", "u16", "u32", "u64" diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index 5ab268beb92f9..9546ab30c9e01 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -18,9 +18,7 @@ //! Parquet integration tests use crate::parquet::utils::MetricsFinder; use arrow::array::Decimal128Array; -use arrow::datatypes::{ - i256, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType, -}; +use arrow::datatypes::i256; use arrow::{ array::{ make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, @@ -36,10 +34,6 @@ use arrow::{ record_batch::RecordBatch, util::pretty::pretty_format_batches, }; -use arrow_array::{ - IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, -}; -use arrow_schema::IntervalUnit; use chrono::{Datelike, Duration, TimeDelta}; use datafusion::{ datasource::{provider_as_source, TableProvider}, @@ -92,7 +86,6 @@ enum Scenario { Time32Millisecond, Time64Nanosecond, Time64Microsecond, - Interval, /// 7 Rows, for each i8, i16, i32, i64, u8, u16, u32, u64, f32, f64 /// -MIN, -100, -1, 0, 1, 100, MAX NumericLimits, @@ -921,71 +914,6 @@ fn make_dict_batch() -> RecordBatch { .unwrap() } -fn make_interval_batch(offset: i32) -> RecordBatch { - let schema = Schema::new(vec![ - Field::new( - "year_month", - DataType::Interval(IntervalUnit::YearMonth), - true, - ), - Field::new("day_time", DataType::Interval(IntervalUnit::DayTime), true), - Field::new( - "month_day_nano", - DataType::Interval(IntervalUnit::MonthDayNano), - true, - ), - ]); - let schema = Arc::new(schema); - - let ym_arr = IntervalYearMonthArray::from(vec![ - Some(IntervalYearMonthType::make_value(1 + offset, 10 + offset)), - Some(IntervalYearMonthType::make_value(2 + offset, 20 + offset)), - Some(IntervalYearMonthType::make_value(3 + offset, 30 + offset)), - None, - Some(IntervalYearMonthType::make_value(5 + offset, 50 + offset)), - ]); - - let dt_arr = IntervalDayTimeArray::from(vec![ - Some(IntervalDayTimeType::make_value(1 + offset, 10 + offset)), - Some(IntervalDayTimeType::make_value(2 + offset, 20 + offset)), - Some(IntervalDayTimeType::make_value(3 + offset, 30 + offset)), - None, - Some(IntervalDayTimeType::make_value(5 + offset, 50 + offset)), - ]); - - // Not yet implemented, refer to: - // https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747 - let mdn_arr = IntervalMonthDayNanoArray::from(vec![ - Some(IntervalMonthDayNanoType::make_value( - 1 + offset, - 10 + offset, - 100 + (offset as i64), - )), - Some(IntervalMonthDayNanoType::make_value( - 2 + offset, - 20 + offset, - 200 + (offset as i64), - )), - Some(IntervalMonthDayNanoType::make_value( - 3 + offset, - 30 + offset, - 300 + (offset as i64), - )), - None, - Some(IntervalMonthDayNanoType::make_value( - 5 + offset, - 50 + offset, - 500 + (offset as i64), - )), - ]); - - RecordBatch::try_new( - schema, - vec![Arc::new(ym_arr), Arc::new(dt_arr), Arc::new(mdn_arr)], - ) - .unwrap() -} - fn create_data_batch(scenario: Scenario) -> Vec { match scenario { Scenario::Boolean => { @@ -1407,12 +1335,6 @@ fn create_data_batch(scenario: Scenario) -> Vec { ]), ] } - Scenario::Interval => vec![ - make_interval_batch(0), - make_interval_batch(1), - make_interval_batch(2), - make_interval_batch(3), - ], } }