From 569a8471c7bbd0867b3959dec17885ca1a1c7c78 Mon Sep 17 00:00:00 2001 From: "Konstantin.Tarasov" Date: Mon, 28 Jul 2025 10:53:53 -0400 Subject: [PATCH 1/3] [Variant] impl FromIterator for VariantPath --- parquet-variant/src/path.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index 42dbdb3abc2d..6b058aa8a3ee 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -43,10 +43,10 @@ use std::{borrow::Cow, ops::Deref}; /// // access the field "foo" and then the first element in a variant list value /// let path = VariantPath::from("foo").join(0); /// // this is the same as the previous one -/// let path2 = VariantPath::new(vec!["foo".into(), 0.into()]); +/// let path2 = VariantPath::from_iter(["foo".into(), 0.into()]); /// assert_eq!(path, path2); /// // you can also create a path from a vector of `VariantPathElement` directly -/// let path3 = VariantPath::new(vec![ +/// let path3 = VariantPath::from_iter([ /// VariantPathElement::field("foo"), /// VariantPathElement::index(0) /// ]); @@ -109,6 +109,13 @@ impl<'a> From for VariantPath<'a> { } } +/// Create from iter +impl<'a> FromIterator> for VariantPath<'a> { + fn from_iter>>(iter: T) -> Self { + VariantPath::new(Vec::from_iter(iter)) + } +} + impl<'a> Deref for VariantPath<'a> { type Target = [VariantPathElement<'a>]; From 70c6f1f397155c61fd7c6f03e9c8978317f2c43a Mon Sep 17 00:00:00 2001 From: "Konstantin.Tarasov" Date: Thu, 7 Aug 2025 14:08:02 -0400 Subject: [PATCH 2/3] Implement `DataType::Boolean` support for `cast_to_variant` --- .../src/cast_to_variant.rs | 49 ++++++++++++++++--- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index 446baf30384c..adced93f2740 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -41,9 +41,9 @@ macro_rules! primitive_conversion { } /// Convert the input array to a `VariantArray` row by row, using `method` -/// to downcast the generic array to a specific array type and `cast_fn` -/// to transform each element to a type compatible with Variant -macro_rules! cast_conversion { +/// requiring a generic type to downcast the generic array to a specific +/// array type and `cast_fn` to transform each element to a type compatible with Variant +macro_rules! generic_conversion { ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ let array = $input.$method::<$t>(); for i in 0..array.len() { @@ -57,6 +57,23 @@ macro_rules! cast_conversion { }}; } +/// Convert the input array to a `VariantArray` row by row, using `method` +/// not requiring a generic type to downcast the generic array to a specific +/// array type and `cast_fn` to transform each element to a type compatible with Variant +macro_rules! non_generic_conversion { + ($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ + let array = $input.$method(); + for i in 0..array.len() { + if array.is_null(i) { + $builder.append_null(); + continue; + } + let cast_value = $cast_fn(array.value(i)); + $builder.append_variant(Variant::from(cast_value)); + } + }}; +} + /// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you /// need to convert a specific data type /// @@ -86,14 +103,18 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { let input_type = input.data_type(); // todo: handle other types like Boolean, Strings, Date, Timestamp, etc. match input_type { + DataType::Boolean => { + non_generic_conversion!(as_boolean, |v| v, input, builder); + } + DataType::Binary => { - cast_conversion!(BinaryType, as_bytes, |v| v, input, builder); + generic_conversion!(BinaryType, as_bytes, |v| v, input, builder); } DataType::LargeBinary => { - cast_conversion!(LargeBinaryType, as_bytes, |v| v, input, builder); + generic_conversion!(LargeBinaryType, as_bytes, |v| v, input, builder); } DataType::BinaryView => { - cast_conversion!(BinaryViewType, as_byte_view, |v| v, input, builder); + generic_conversion!(BinaryViewType, as_byte_view, |v| v, input, builder); } DataType::Int8 => { primitive_conversion!(Int8Type, input, builder); @@ -120,7 +141,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { primitive_conversion!(UInt64Type, input, builder); } DataType::Float16 => { - cast_conversion!( + generic_conversion!( Float16Type, as_primitive, |v: f16| -> f32 { v.into() }, @@ -151,7 +172,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { mod tests { use super::*; use arrow::array::{ - ArrayRef, Float16Array, Float32Array, Float64Array, GenericByteBuilder, + ArrayRef, BooleanArray, Float16Array, Float32Array, Float64Array, GenericByteBuilder, GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; @@ -212,6 +233,18 @@ mod tests { ); } + #[test] + fn test_cast_to_variant_bool() { + run_test( + Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])), + vec![ + Some(Variant::BooleanTrue), + None, + Some(Variant::BooleanFalse), + ], + ); + } + #[test] fn test_cast_to_variant_int8() { run_test( From 5764287d5d555e91e9e2ea3d9b8ba8e063a39977 Mon Sep 17 00:00:00 2001 From: "Konstantin.Tarasov" Date: Thu, 7 Aug 2025 14:23:07 -0400 Subject: [PATCH 3/3] cargo fmt --- parquet-variant-compute/src/cast_to_variant.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index adced93f2740..e9e2bb3dd8f0 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -41,7 +41,7 @@ macro_rules! primitive_conversion { } /// Convert the input array to a `VariantArray` row by row, using `method` -/// requiring a generic type to downcast the generic array to a specific +/// requiring a generic type to downcast the generic array to a specific /// array type and `cast_fn` to transform each element to a type compatible with Variant macro_rules! generic_conversion { ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{ @@ -58,7 +58,7 @@ macro_rules! generic_conversion { } /// Convert the input array to a `VariantArray` row by row, using `method` -/// not requiring a generic type to downcast the generic array to a specific +/// not requiring a generic type to downcast the generic array to a specific /// array type and `cast_fn` to transform each element to a type compatible with Variant macro_rules! non_generic_conversion { ($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{