From bc7295649ac6cd938239446dde1cc637d527f384 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Mon, 15 Sep 2025 11:31:43 -0700 Subject: [PATCH 1/2] Add support for fixed sized binary --- parquet-variant-compute/src/variant_array.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index f42fa51f512c..050ba053cb78 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -595,6 +595,11 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, ' let value = boolean_array.value(index); Variant::from(value) } + DataType::FixedSizeBinary(_) => { + let array = typed_value.as_fixed_size_binary(); + let value = array.value(index); + Variant::from(value) + } DataType::Int8 => { primitive_conversion_single_value!(Int8Type, typed_value, index) } From 5689f2c83438498a7d5386a62c996c9c36050b1d Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Mon, 15 Sep 2025 11:32:01 -0700 Subject: [PATCH 2/2] Add test --- parquet-variant-compute/src/variant_get.rs | 77 ++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index f9026735db1a..aecf4d69c358 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -490,6 +490,23 @@ mod test { assert_eq!(result.value(3), Variant::from(false)); } + #[test] + fn get_variant_partially_shredded_fixed_size_binary_as_variant() { + let array = partially_shredded_fixed_size_binary_variant_array(); + let options = GetOptions::new(); + let result = variant_get(&array, options).unwrap(); + + // expect the result is a VariantArray + let result: &VariantArray = result.as_any().downcast_ref().unwrap(); + assert_eq!(result.len(), 4); + + // Expect the values are the same as the original values + assert_eq!(result.value(0), Variant::from(&[1u8, 2u8, 3u8][..])); + assert!(!result.is_valid(1)); + assert_eq!(result.value(2), Variant::from("n/a")); + assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..])); + } + /// Shredding: extract a value as an Int32Array #[test] fn get_variant_shredded_int32_as_int32_safe_cast() { @@ -938,6 +955,66 @@ mod test { ) } + /// Return a VariantArray that represents a partially "shredded" variant for fixed size binary + fn partially_shredded_fixed_size_binary_variant_array() -> ArrayRef { + let (metadata, string_value) = { + let mut builder = parquet_variant::VariantBuilder::new(); + builder.append_value("n/a"); + builder.finish() + }; + + // Create the null buffer for the overall array + let nulls = NullBuffer::from(vec![ + true, // row 0 non null + false, // row 1 is null + true, // row 2 non null + true, // row 3 non null + ]); + + // metadata is the same for all rows + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4)); + + // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY + // about why row1 is an empty but non null, value. + let values = BinaryViewArray::from(vec![ + None, // row 0 is shredded, so no value + Some(b"" as &[u8]), // row 1 is null, so empty value + Some(&string_value), // copy the string value "N/A" + None, // row 3 is shredded, so no value + ]); + + // Create fixed size binary array with 3-byte values + let data = vec![ + 1u8, 2u8, 3u8, // row 0 is shredded + 0u8, 0u8, 0u8, // row 1 is null (value doesn't matter) + 0u8, 0u8, 0u8, // row 2 is a string (value doesn't matter) + 4u8, 5u8, 6u8, // row 3 is shredded + ]; + let typed_value_nulls = arrow::buffer::NullBuffer::from(vec![ + true, // row 0 has value + false, // row 1 is null + false, // row 2 is string + true, // row 3 has value + ]); + let typed_value = arrow::array::FixedSizeBinaryArray::try_new( + 3, // byte width + arrow::buffer::Buffer::from(data), + Some(typed_value_nulls), + ) + .expect("should create fixed size binary array"); + + let struct_array = StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata), true) + .with_field("typed_value", Arc::new(typed_value), true) + .with_field("value", Arc::new(values), true) + .with_nulls(nulls) + .build(); + + Arc::new( + VariantArray::try_new(Arc::new(struct_array)).expect("should create variant array"), + ) + } + /// Return a VariantArray that represents an "all null" variant /// for the following example (3 null values): ///