From ba2ffa73293be1f5ef29317db96ed55aeade6576 Mon Sep 17 00:00:00 2001 From: PinkCrow007 <1053603622@qq.com> Date: Wed, 17 Sep 2025 22:43:30 -0400 Subject: [PATCH 1/2] init --- parquet-variant-compute/src/variant_array.rs | 8 ++- parquet-variant-compute/src/variant_get.rs | 69 +++++++++++++++++++- parquet/tests/variant_integration.rs | 5 +- 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index 4abffa65c23f..b90b28778717 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -21,7 +21,7 @@ use crate::type_conversion::primitive_conversion_single_value; use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; use arrow::datatypes::{ - Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, + Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; @@ -556,6 +556,12 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, ' let value = boolean_array.value(index); Variant::from(value) } + DataType::Date32 => { + let array = typed_value.as_primitive::(); + let value = array.value(index); + let date = Date32Type::to_naive_date(value); + Variant::from(date) + } DataType::FixedSizeBinary(binary_len) => { let array = typed_value.as_fixed_size_binary(); // Try to treat 16 byte FixedSizeBinary as UUID diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 9d32c7f5a613..900eeafb976c 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -298,7 +298,7 @@ mod test { use std::sync::Arc; use arrow::array::{ - Array, ArrayRef, BinaryViewArray, Float16Array, Float32Array, Float64Array, Int16Array, + Array, ArrayRef, BinaryViewArray, Date32Array, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, StringArray, StructArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; @@ -526,6 +526,26 @@ mod test { assert_eq!(result.value(3), Variant::from("world")); } + #[test] + fn get_variant_partially_shredded_date32_as_variant() { + let array = partially_shredded_date32_variant_array(); + let options = GetOptions::new(); + let result = variant_get(&array, options).unwrap(); + + // expect the result is a VariantArray + let result: &VariantArray = result.as_any().downcast_ref().unwrap(); + assert_eq!(result.len(), 4); + + // Expect the values are the same as the original values + use chrono::NaiveDate; + let date1 = NaiveDate::from_ymd_opt(2025, 9, 17).unwrap(); + let date2 = NaiveDate::from_ymd_opt(2025, 9, 9).unwrap(); + assert_eq!(result.value(0), Variant::from(date1)); + assert!(!result.is_valid(1)); + assert_eq!(result.value(2), Variant::from("n/a")); + assert_eq!(result.value(3), Variant::from(date2)); + } + #[test] fn get_variant_partially_shredded_binary_view_as_variant() { let array = partially_shredded_binary_view_variant_array(); @@ -1138,6 +1158,53 @@ mod test { ) } + /// Return a VariantArray that represents a partially "shredded" variant for Date32 + fn partially_shredded_date32_variant_array() -> ArrayRef { + let (metadata, string_value) = { + let mut builder = parquet_variant::VariantBuilder::new(); + builder.append_value("n/a"); + builder.finish() + }; + + // Create the null buffer for the overall array + let nulls = NullBuffer::from(vec![ + true, // row 0 non null + false, // row 1 is null + true, // row 2 non null + true, // row 3 non null + ]); + + // metadata is the same for all rows + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4)); + + // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY + // about why row1 is an empty but non null, value. + let values = BinaryViewArray::from(vec![ + None, // row 0 is shredded, so no value + Some(b"" as &[u8]), // row 1 is null, so empty value + Some(&string_value), // copy the string value "N/A" + None, // row 3 is shredded, so no value + ]); + + let typed_value = Date32Array::from(vec![ + Some(20348), // row 0 is shredded, 2025-09-17 + None, // row 1 is null + None, // row 2 is a string, not a date + Some(20340), // row 3 is shredded, 2025-09-09 + ]); + + let struct_array = StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata), true) + .with_field("typed_value", Arc::new(typed_value), true) + .with_field("value", Arc::new(values), true) + .with_nulls(nulls) + .build(); + + Arc::new( + VariantArray::try_new(Arc::new(struct_array)).expect("should create variant array"), + ) + } + /// Return a VariantArray that represents a partially "shredded" variant for BinaryView fn partially_shredded_binary_view_variant_array() -> ArrayRef { let (metadata, string_value) = { diff --git a/parquet/tests/variant_integration.rs b/parquet/tests/variant_integration.rs index 97fb6b880108..ebce056cc4ad 100644 --- a/parquet/tests/variant_integration.rs +++ b/parquet/tests/variant_integration.rs @@ -92,9 +92,8 @@ variant_test_case!(14); variant_test_case!(15); variant_test_case!(16); variant_test_case!(17); -// https://github.com/apache/arrow-rs/issues/8330 -variant_test_case!(18, "Unsupported typed_value type: Date32"); -variant_test_case!(19, "Unsupported typed_value type: Date32"); +variant_test_case!(18); +variant_test_case!(19); // https://github.com/apache/arrow-rs/issues/8331 variant_test_case!( 20, From e6b24d14e3d9daffe9805f535916648ba0478e5e Mon Sep 17 00:00:00 2001 From: PinkCrow007 <1053603622@qq.com> Date: Wed, 17 Sep 2025 22:58:24 -0400 Subject: [PATCH 2/2] format --- parquet-variant-compute/src/variant_array.rs | 4 ++-- parquet-variant-compute/src/variant_get.rs | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index b90b28778717..faaa1611ef06 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -21,8 +21,8 @@ use crate::type_conversion::primitive_conversion_single_value; use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; use arrow::datatypes::{ - Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, - UInt32Type, UInt64Type, UInt8Type, + Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, + UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use parquet_variant::Uuid; diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 900eeafb976c..654513ac128e 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -298,9 +298,9 @@ mod test { use std::sync::Arc; use arrow::array::{ - Array, ArrayRef, BinaryViewArray, Date32Array, Float16Array, Float32Array, Float64Array, Int16Array, - Int32Array, Int64Array, Int8Array, StringArray, StructArray, UInt16Array, UInt32Array, - UInt64Array, UInt8Array, + Array, ArrayRef, BinaryViewArray, Date32Array, Float16Array, Float32Array, Float64Array, + Int16Array, Int32Array, Int64Array, Int8Array, StringArray, StructArray, UInt16Array, + UInt32Array, UInt64Array, UInt8Array, }; use arrow::buffer::NullBuffer; use arrow::compute::CastOptions; @@ -1187,10 +1187,10 @@ mod test { ]); let typed_value = Date32Array::from(vec![ - Some(20348), // row 0 is shredded, 2025-09-17 - None, // row 1 is null - None, // row 2 is a string, not a date - Some(20340), // row 3 is shredded, 2025-09-09 + Some(20348), // row 0 is shredded, 2025-09-17 + None, // row 1 is null + None, // row 2 is a string, not a date + Some(20340), // row 3 is shredded, 2025-09-09 ]); let struct_array = StructArrayBuilder::new()