diff --git a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs index baeb198f7ed8..d2ee4e891c71 100644 --- a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs +++ b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs @@ -19,13 +19,14 @@ use std::sync::Arc; use arrow::array::{ArrayRef, RecordBatch}; use arrow::datatypes::{ - BooleanType, DataType, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Field, - Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, - IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, - Schema, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, - Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, - TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, - UInt8Type, + BooleanType, DataType, Date32Type, Date64Type, Decimal128Type, Decimal256Type, + DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, + DurationSecondType, Field, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, + Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, + IntervalYearMonthType, Schema, Time32MillisecondType, Time32SecondType, + Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType, + TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, + UInt32Type, UInt64Type, UInt8Type, }; use arrow_schema::{ DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, @@ -85,6 +86,23 @@ pub fn get_supported_types_columns(rng_seed: u64) -> Vec { "interval_month_day_nano", DataType::Interval(IntervalUnit::MonthDayNano), ), + // Internal error: AggregationFuzzer task error: JoinError::Panic(Id(29108), "called `Option::unwrap()` on a `None` value", ...). + // ColumnDescr::new( + // "duration_seconds", + // DataType::Duration(TimeUnit::Second), + // ), + ColumnDescr::new( + "duration_milliseconds", + DataType::Duration(TimeUnit::Millisecond), + ), + ColumnDescr::new( + "duration_microsecond", + DataType::Duration(TimeUnit::Microsecond), + ), + ColumnDescr::new( + "duration_nanosecond", + DataType::Duration(TimeUnit::Nanosecond), + ), ColumnDescr::new("decimal128", { let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION); let scale: i8 = rng.gen_range( @@ -484,6 +502,46 @@ impl RecordBatchGenerator { IntervalMonthDayNanoType ) } + DataType::Duration(TimeUnit::Second) => { + generate_primitive_array!( + self, + num_rows, + max_num_distinct, + batch_gen_rng, + array_gen_rng, + DurationSecondType + ) + } + DataType::Duration(TimeUnit::Millisecond) => { + generate_primitive_array!( + self, + num_rows, + max_num_distinct, + batch_gen_rng, + array_gen_rng, + DurationMillisecondType + ) + } + DataType::Duration(TimeUnit::Microsecond) => { + generate_primitive_array!( + self, + num_rows, + max_num_distinct, + batch_gen_rng, + array_gen_rng, + DurationMicrosecondType + ) + } + DataType::Duration(TimeUnit::Nanosecond) => { + generate_primitive_array!( + self, + num_rows, + max_num_distinct, + batch_gen_rng, + array_gen_rng, + DurationNanosecondType + ) + } DataType::Timestamp(TimeUnit::Second, None) => { generate_primitive_array!( self, diff --git a/test-utils/src/array_gen/primitive.rs b/test-utils/src/array_gen/primitive.rs index 58d39c14e65d..1897b0d3db29 100644 --- a/test-utils/src/array_gen/primitive.rs +++ b/test-utils/src/array_gen/primitive.rs @@ -66,6 +66,7 @@ impl PrimitiveArrayGenerator { | DataType::Time32(_) | DataType::Time64(_) | DataType::Interval(_) + | DataType::Duration(_) | DataType::Binary | DataType::LargeBinary | DataType::BinaryView diff --git a/test-utils/src/array_gen/random_data.rs b/test-utils/src/array_gen/random_data.rs index a7297d45fdf0..3989e4842fac 100644 --- a/test-utils/src/array_gen/random_data.rs +++ b/test-utils/src/array_gen/random_data.rs @@ -17,13 +17,14 @@ use arrow::array::ArrowPrimitiveType; use arrow::datatypes::{ - i256, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Float32Type, - Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTime, - IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType, - IntervalYearMonthType, Time32MillisecondType, Time32SecondType, - Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType, - TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, - UInt32Type, UInt64Type, UInt8Type, + i256, Date32Type, Date64Type, Decimal128Type, Decimal256Type, + DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, + DurationSecondType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, + Int8Type, IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano, + IntervalMonthDayNanoType, IntervalYearMonthType, Time32MillisecondType, + Time32SecondType, Time64MicrosecondType, Time64NanosecondType, + TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; use rand::distributions::Standard; use rand::prelude::Distribution; @@ -71,6 +72,11 @@ basic_random_data!(TimestampSecondType); basic_random_data!(TimestampMillisecondType); basic_random_data!(TimestampMicrosecondType); basic_random_data!(TimestampNanosecondType); +// Note DurationSecondType is restricted to i64::MIN / 1000 to i64::MAX / 1000 +// due to https://github.com/apache/arrow-rs/issues/7533 so handle it specially below +basic_random_data!(DurationMillisecondType); +basic_random_data!(DurationMicrosecondType); +basic_random_data!(DurationNanosecondType); impl RandomNativeData for Date64Type { fn generate_random_native_data(rng: &mut StdRng) -> Self::Native { @@ -100,6 +106,15 @@ impl RandomNativeData for IntervalMonthDayNanoType { } } +// Restrict Duration(Seconds) to i64::MIN / 1000 to i64::MAX / 1000 to +// avoid panics on pretty printing. See +// https://github.com/apache/arrow-rs/issues/7533 +impl RandomNativeData for DurationSecondType { + fn generate_random_native_data(rng: &mut StdRng) -> Self::Native { + rng.gen::() / 1000 + } +} + impl RandomNativeData for Decimal256Type { fn generate_random_native_data(rng: &mut StdRng) -> Self::Native { i256::from_parts(rng.gen::(), rng.gen::())