From 41dcbf5ea0df072f301bd4ae3d58978574f39327 Mon Sep 17 00:00:00 2001 From: klion26 Date: Mon, 16 Mar 2026 17:34:29 +0800 Subject: [PATCH 1/8] WIP --- arrow-cast/src/cast/mod.rs | 48 ++++++++-- arrow-cast/src/cast/string.rs | 36 ++++--- parquet-variant/Cargo.toml | 2 + parquet-variant/src/utils.rs | 17 ---- parquet-variant/src/variant.rs | 169 ++++++++++++--------------------- 5 files changed, 128 insertions(+), 144 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 67efb5742485..184695f84889 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -71,6 +71,7 @@ use arrow_select::take::take; use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; pub use decimal::{DecimalCast, rescale_decimal}; +pub use string::cast_single_string_to_boolean_default; /// CastOptions provides a way to override the default cast behaviors #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -2464,7 +2465,7 @@ where R::Native: NumCast, { from.try_unary(|value| { - num_traits::cast::cast::(value).ok_or_else(|| { + num_cast::(value).ok_or_else(|| { ArrowError::CastError(format!( "Can't cast value {:?} to type {}", value, @@ -2474,6 +2475,17 @@ where }) } +/// Natural cast between numeric types +/// Return None if the input `value` can't be casted to type `O`. +#[inline] +pub fn num_cast(value: I) -> Option +where + I: NumCast, + O: NumCast, +{ + num_traits::cast::cast::(value) +} + // Natural cast between numeric types // If the value of T can't be casted to R, it will be converted to null fn numeric_cast(from: &PrimitiveArray) -> PrimitiveArray @@ -2483,7 +2495,7 @@ where T::Native: NumCast, R::Native: NumCast, { - from.unary_opt::<_, R>(num_traits::cast::cast::) + from.unary_opt::<_, R>(num_cast::) } fn cast_numeric_to_binary( @@ -2540,16 +2552,23 @@ where for i in 0..from.len() { if from.is_null(i) { b.append_null(); - } else if from.value(i) != T::default_value() { - b.append_value(true); } else { - b.append_value(false); + b.append_value(cast_num_to_bool::(from.value(i))); } } Ok(b.finish()) } +/// Cast numeric types to boolean +#[inline] +pub fn cast_num_to_bool(value: I) -> bool +where + I: Default + PartialEq, +{ + value != I::default() +} + /// Cast Boolean types to numeric /// /// `false` returns 0 while `true` returns 1 @@ -2575,11 +2594,8 @@ where let iter = (0..from.len()).map(|i| { if from.is_null(i) { None - } else if from.value(i) { - // a workaround to cast a primitive to T::Native, infallible - num_traits::cast::cast(1) } else { - Some(T::default_value()) + single_bool_to_numeric::(from.value(i)) } }); // Benefit: @@ -2589,6 +2605,20 @@ where unsafe { PrimitiveArray::::from_trusted_len_iter(iter) } } +/// Cat single bool value to numeric value. +#[inline] +pub fn single_bool_to_numeric(value: bool) -> Option +where + O: num_traits::NumCast + Default, +{ + if value { + // a workaround to cast a primitive to type O, infallible + num_traits::cast::cast(1) + } else { + Some(O::default()) + } +} + /// Helper function to cast from one `BinaryArray` or 'LargeBinaryArray' to 'FixedSizeBinaryArray'. fn cast_binary_to_fixed_size_binary( array: &dyn Array, diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs index 77696ae0d8cc..2fd29b388090 100644 --- a/arrow-cast/src/cast/string.rs +++ b/arrow-cast/src/cast/string.rs @@ -401,18 +401,7 @@ where let output_array = array .iter() .map(|value| match value { - Some(value) => match value.to_ascii_lowercase().trim() { - "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(Some(true)), - "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => { - Ok(Some(false)) - } - invalid_value => match cast_options.safe { - true => Ok(None), - false => Err(ArrowError::CastError(format!( - "Cannot cast value '{invalid_value}' to value of Boolean type", - ))), - }, - }, + Some(value) => cast_single_string_to_boolean(value, cast_options), None => Ok(None), }) .collect::>()?; @@ -420,6 +409,29 @@ where Ok(Arc::new(output_array)) } +fn cast_single_string_to_boolean( + value: &str, + cast_options: &CastOptions, +) -> Result, ArrowError> { + match value.to_ascii_lowercase().trim() { + "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(Some(true)), + "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => Ok(Some(false)), + invalid_value => match cast_options.safe { + true => Ok(None), + false => Err(ArrowError::CastError(format!( + "Cannot cast value '{invalid_value}' to value of Boolean type", + ))), + }, + } +} + +/// Cast a single string to boolean with default cast option(safe=true). +pub fn cast_single_string_to_boolean_default(value: &str) -> Option { + cast_single_string_to_boolean(value, &CastOptions::default()) + .ok() + .flatten() +} + pub(crate) fn cast_utf8_to_boolean( from: &dyn Array, cast_options: &CastOptions, diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml index 51671d518910..7d5064331e4c 100644 --- a/parquet-variant/Cargo.toml +++ b/parquet-variant/Cargo.toml @@ -29,10 +29,12 @@ edition = { workspace = true } rust-version = { workspace = true } [dependencies] +arrow = { workspace = true , features = ["canonical_extension_types"] } arrow-schema = { workspace = true } chrono = { workspace = true } half = { version = "2.1", default-features = false } indexmap = "2.10.0" +num-traits = { version = "0.2", default-features = false } uuid = { version = "1.18.0", features = ["v4"]} simdutf8 = { workspace = true , optional = true } diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs index 0984a601b213..10bbfa986778 100644 --- a/parquet-variant/src/utils.rs +++ b/parquet-variant/src/utils.rs @@ -146,10 +146,6 @@ pub(crate) const fn expect_size_of(expected: usize) { } } -pub(crate) fn fits_precision(n: impl Into) -> bool { - n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N) -} - /// Parse a path string into a vector of [`VariantPathElement`]. /// /// # Syntax @@ -274,16 +270,3 @@ fn parse_in_bracket(s: &str, i: usize) -> Result<(VariantPathElement<'_>, usize) Ok((element, end + 1)) } - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_fits_precision() { - assert!(fits_precision::<10>(1023)); - assert!(!fits_precision::<10>(1024)); - assert!(fits_precision::<10>(-1023)); - assert!(!fits_precision::<10>(-1024)); - } -} diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 53fb3c4b1e10..7e39a88047e7 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -28,11 +28,14 @@ use crate::decoder::{ self, VariantBasicType, VariantPrimitiveType, get_basic_type, get_primitive_type, }; use crate::path::{VariantPath, VariantPathElement}; -use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice}; -use std::ops::Deref; - +use crate::utils::{first_byte_from_slice, slice_from_slice}; +use arrow::compute::{ + cast_num_to_bool, cast_single_string_to_boolean_default, num_cast, single_bool_to_numeric, +}; use arrow_schema::ArrowError; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc}; +use num_traits::NumCast; +use std::ops::Deref; mod decimal; mod list; @@ -499,6 +502,14 @@ impl<'m, 'v> Variant<'m, 'v> { match self { Variant::BooleanTrue => Some(true), Variant::BooleanFalse => Some(false), + Variant::Int8(i) => Some(cast_num_to_bool::(*i)), + Variant::Int16(i) => Some(cast_num_to_bool::(*i)), + Variant::Int32(i) => Some(cast_num_to_bool::(*i)), + Variant::Int64(i) => Some(cast_num_to_bool::(*i)), + Variant::Float(f) => Some(cast_num_to_bool::(*f)), + Variant::Double(d) => Some(cast_num_to_bool::(*d)), + Variant::ShortString(s) => cast_single_string_to_boolean_default(s.0), + Variant::String(s) => cast_single_string_to_boolean_default(s), _ => None, } } @@ -760,6 +771,26 @@ impl<'m, 'v> Variant<'m, 'v> { } } + fn as_num(&self) -> Option + where + T: NumCast + Default, + { + match *self { + Variant::BooleanFalse => single_bool_to_numeric::(false), + Variant::BooleanTrue => single_bool_to_numeric::(true), + Variant::Int8(i) => num_cast::(i), + Variant::Int16(i) => num_cast::(i), + Variant::Int32(i) => num_cast::(i), + Variant::Int64(i) => num_cast::(i), + Variant::Float(f) => num_cast::(f), + Variant::Double(d) => num_cast::(d), + Variant::Decimal4(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), + Variant::Decimal8(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), + Variant::Decimal16(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), + _ => None, + } + } + /// Converts this variant to an `i8` if possible. /// /// Returns `Some(i8)` for integer variants that fit in `i8` range, @@ -783,16 +814,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v3.as_int8(), None); /// ``` pub fn as_int8(&self) -> Option { - match *self { - Variant::Int8(i) => Some(i), - Variant::Int16(i) => i.try_into().ok(), - Variant::Int32(i) => i.try_into().ok(), - Variant::Int64(i) => i.try_into().ok(), - Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(), - Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(), - Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(), - _ => None, - } + self.as_num::() } /// Converts this variant to an `i16` if possible. @@ -818,16 +840,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v3.as_int16(), None); /// ``` pub fn as_int16(&self) -> Option { - match *self { - Variant::Int8(i) => Some(i.into()), - Variant::Int16(i) => Some(i), - Variant::Int32(i) => i.try_into().ok(), - Variant::Int64(i) => i.try_into().ok(), - Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(), - Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(), - Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(), - _ => None, - } + self.as_num::() } /// Converts this variant to an `i32` if possible. @@ -853,16 +866,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v3.as_int32(), None); /// ``` pub fn as_int32(&self) -> Option { - match *self { - Variant::Int8(i) => Some(i.into()), - Variant::Int16(i) => Some(i.into()), - Variant::Int32(i) => Some(i), - Variant::Int64(i) => i.try_into().ok(), - Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer()), - Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(), - Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(), - _ => None, - } + self.as_num::<_>() } /// Converts this variant to an `i64` if possible. @@ -884,32 +888,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v2.as_int64(), None); /// ``` pub fn as_int64(&self) -> Option { - match *self { - Variant::Int8(i) => Some(i.into()), - Variant::Int16(i) => Some(i.into()), - Variant::Int32(i) => Some(i.into()), - Variant::Int64(i) => Some(i), - Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer().into()), - Variant::Decimal8(d) if d.scale() == 0 => Some(d.integer()), - Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(), - _ => None, - } - } - - fn generic_convert_unsigned_primitive(&self) -> Option - where - T: TryFrom + TryFrom + TryFrom + TryFrom + TryFrom, - { - match *self { - Variant::Int8(i) => i.try_into().ok(), - Variant::Int16(i) => i.try_into().ok(), - Variant::Int32(i) => i.try_into().ok(), - Variant::Int64(i) => i.try_into().ok(), - Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(), - Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(), - Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(), - _ => None, - } + self.as_num::() } /// Converts this variant to a `u8` if possible. @@ -945,7 +924,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v5.as_u8(), None); /// ``` pub fn as_u8(&self) -> Option { - self.generic_convert_unsigned_primitive::() + self.as_num::() } /// Converts this variant to an `u16` if possible. @@ -981,7 +960,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v5.as_u16(), None); /// ``` pub fn as_u16(&self) -> Option { - self.generic_convert_unsigned_primitive::() + self.as_num::() } /// Converts this variant to an `u32` if possible. @@ -1017,7 +996,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v5.as_u32(), None); /// ``` pub fn as_u32(&self) -> Option { - self.generic_convert_unsigned_primitive::() + self.as_num::() } /// Converts this variant to an `u64` if possible. @@ -1053,7 +1032,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v5.as_u64(), None); /// ``` pub fn as_u64(&self) -> Option { - self.generic_convert_unsigned_primitive::() + self.as_num::() } /// Converts this variant to tuple with a 4-byte unscaled value if possible. @@ -1085,10 +1064,9 @@ impl<'m, 'v> Variant<'m, 'v> { /// ``` pub fn as_decimal4(&self) -> Option { match *self { - Variant::Int8(i) => i32::from(i).try_into().ok(), - Variant::Int16(i) => i32::from(i).try_into().ok(), - Variant::Int32(i) => i.try_into().ok(), - Variant::Int64(i) => i32::try_from(i).ok()?.try_into().ok(), + Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { + self.as_num::().and_then(|x| x.try_into().ok()) + } Variant::Decimal4(decimal4) => Some(decimal4), Variant::Decimal8(decimal8) => decimal8.try_into().ok(), Variant::Decimal16(decimal16) => decimal16.try_into().ok(), @@ -1125,10 +1103,9 @@ impl<'m, 'v> Variant<'m, 'v> { /// ``` pub fn as_decimal8(&self) -> Option { match *self { - Variant::Int8(i) => i64::from(i).try_into().ok(), - Variant::Int16(i) => i64::from(i).try_into().ok(), - Variant::Int32(i) => i64::from(i).try_into().ok(), - Variant::Int64(i) => i.try_into().ok(), + Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { + self.as_num::().and_then(|x| x.try_into().ok()) + } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8), Variant::Decimal16(decimal16) => decimal16.try_into().ok(), @@ -1157,10 +1134,9 @@ impl<'m, 'v> Variant<'m, 'v> { /// ``` pub fn as_decimal16(&self) -> Option { match *self { - Variant::Int8(i) => i128::from(i).try_into().ok(), - Variant::Int16(i) => i128::from(i).try_into().ok(), - Variant::Int32(i) => i128::from(i).try_into().ok(), - Variant::Int64(i) => i128::from(i).try_into().ok(), + Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => { + self.as_num::().and_then(|x| x.try_into().ok()) + } Variant::Decimal4(decimal4) => Some(decimal4.into()), Variant::Decimal8(decimal8) => Some(decimal8.into()), Variant::Decimal16(decimal16) => Some(decimal16), @@ -1195,15 +1171,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v4 = Variant::from("hello!"); /// assert_eq!(v4.as_f16(), None); pub fn as_f16(&self) -> Option { - match *self { - Variant::Float(i) => Some(f16::from_f32(i)), - Variant::Double(i) => Some(f16::from_f64(i)), - Variant::Int8(i) => Some(i.into()), - Variant::Int16(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)), - Variant::Int32(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)), - Variant::Int64(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)), - _ => None, - } + self.as_num::() } /// Converts this variant to an `f32` if possible. @@ -1234,15 +1202,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// ``` #[allow(clippy::cast_possible_truncation)] pub fn as_f32(&self) -> Option { - match *self { - Variant::Float(i) => Some(i), - Variant::Double(i) => Some(i as f32), - Variant::Int8(i) => Some(i.into()), - Variant::Int16(i) => Some(i.into()), - Variant::Int32(i) if fits_precision::<24>(i) => Some(i as _), - Variant::Int64(i) if fits_precision::<24>(i) => Some(i as _), - _ => None, - } + self.as_num::() } /// Converts this variant to an `f64` if possible. @@ -1272,15 +1232,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v4.as_f64(), None); /// ``` pub fn as_f64(&self) -> Option { - match *self { - Variant::Float(i) => Some(i.into()), - Variant::Double(i) => Some(i), - Variant::Int8(i) => Some(i.into()), - Variant::Int16(i) => Some(i.into()), - Variant::Int32(i) => Some(i.into()), - Variant::Int64(i) if fits_precision::<53>(i) => Some(i as _), - _ => None, - } + self.as_num::() } /// Converts this variant to an `Object` if it is an [`VariantObject`]. @@ -1527,7 +1479,8 @@ impl From for Variant<'_, '_> { if let Ok(value) = i8::try_from(value) { Variant::Int8(value) } else { - Variant::Int16(i16::from(value)) + // It will always fit in i16 because u8 max is 255 and i16 max is 32767 + Variant::Int16(num_cast::(value).unwrap()) } } } @@ -1538,7 +1491,8 @@ impl From for Variant<'_, '_> { if let Ok(value) = i16::try_from(value) { Variant::Int16(value) } else { - Variant::Int32(i32::from(value)) + // It will always fit in i32 because u16 max is 65535 and i32 max is 2147483647 + Variant::Int32(num_cast::(value).unwrap()) } } } @@ -1548,7 +1502,8 @@ impl From for Variant<'_, '_> { if let Ok(value) = i32::try_from(value) { Variant::Int32(value) } else { - Variant::Int64(i64::from(value)) + // It will always fit in i64 because u32 max is 4294967295 and i64 max is 9223372036854775807 + Variant::Int64(num_cast::(value).unwrap()) } } } @@ -1560,7 +1515,9 @@ impl From for Variant<'_, '_> { Variant::Int64(value) } else { // u64 max is 18446744073709551615, which fits in i128 - Variant::Decimal16(VariantDecimal16::try_new(i128::from(value), 0).unwrap()) + Variant::Decimal16( + VariantDecimal16::try_new(num_cast::(value).unwrap(), 0).unwrap(), + ) } } } From 74d9e5135a998416c8438e8fde65977dd1b3e3de Mon Sep 17 00:00:00 2001 From: klion26 Date: Mon, 16 Mar 2026 20:07:18 +0800 Subject: [PATCH 2/8] fix test --- parquet-variant-compute/src/shred_variant.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index c60c602baa37..994be7723b04 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -1128,7 +1128,7 @@ mod tests { .downcast_ref::() .unwrap(); assert_eq!(typed_value_int32.value(0), 42); - assert!(typed_value_int32.is_null(1)); // float doesn't convert to int32 + assert_eq!(typed_value_int32.value(1), 3); assert!(typed_value_int32.is_null(2)); // string doesn't convert to int32 // Test Float64 target From 2f63276537f400c819680276ed329e81463625c0 Mon Sep 17 00:00:00 2001 From: klion26 Date: Mon, 16 Mar 2026 20:21:39 +0800 Subject: [PATCH 3/8] fix tests --- parquet-variant-compute/src/variant_get.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index f9985084cc49..c595e9fd08cf 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -2626,7 +2626,7 @@ mod test { #[test] fn test_error_message_boolean_type_display() { let mut builder = VariantArrayBuilder::new(1); - builder.append_variant(Variant::Int32(123)); + builder.append_variant(Variant::Null); let variant_array: ArrayRef = ArrayRef::from(builder.build()); // Request Boolean with strict casting to force an error @@ -2647,7 +2647,7 @@ mod test { #[test] fn test_error_message_numeric_type_display() { let mut builder = VariantArrayBuilder::new(1); - builder.append_variant(Variant::BooleanTrue); + builder.append_variant(Variant::Null); let variant_array: ArrayRef = ArrayRef::from(builder.build()); // Request Boolean with strict casting to force an error From a0f711c0d1fe16848d5ae10a901f18d7c3d69b64 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 19 Mar 2026 16:25:53 +0800 Subject: [PATCH 4/8] address comment --- parquet-variant/src/variant.rs | 231 ++++++++++++++++++++------------- 1 file changed, 140 insertions(+), 91 deletions(-) diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 7e39a88047e7..10b620e53178 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -478,7 +478,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to a `bool` if possible. /// - /// Returns `Some(bool)` for boolean variants, + /// Returns `Some(bool)` for boolean, numeric and string variants, /// `None` for non-boolean variants. /// /// # Examples @@ -494,21 +494,29 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(false); /// assert_eq!(v2.as_boolean(), Some(false)); /// + /// // and a numeric variant + /// let v3 = Variant::from(3); + /// assert_eq!(v3.as_boolean(), Some(true)); + /// + /// // and a string variant + /// let v4 = Variant::from("true"); + /// assert_eq!(v4.as_boolean(), Some(true)); + /// /// // but not from other variants - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_boolean(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_boolean(), None); /// ``` pub fn as_boolean(&self) -> Option { match self { Variant::BooleanTrue => Some(true), Variant::BooleanFalse => Some(false), - Variant::Int8(i) => Some(cast_num_to_bool::(*i)), - Variant::Int16(i) => Some(cast_num_to_bool::(*i)), - Variant::Int32(i) => Some(cast_num_to_bool::(*i)), - Variant::Int64(i) => Some(cast_num_to_bool::(*i)), - Variant::Float(f) => Some(cast_num_to_bool::(*f)), - Variant::Double(d) => Some(cast_num_to_bool::(*d)), - Variant::ShortString(s) => cast_single_string_to_boolean_default(s.0), + Variant::Int8(i) => Some(cast_num_to_bool(*i)), + Variant::Int16(i) => Some(cast_num_to_bool(*i)), + Variant::Int32(i) => Some(cast_num_to_bool(*i)), + Variant::Int64(i) => Some(cast_num_to_bool(*i)), + Variant::Float(f) => Some(cast_num_to_bool(*f)), + Variant::Double(d) => Some(cast_num_to_bool(*d)), + Variant::ShortString(s) => cast_single_string_to_boolean_default(s.as_str()), Variant::String(s) => cast_single_string_to_boolean_default(s), _ => None, } @@ -778,12 +786,12 @@ impl<'m, 'v> Variant<'m, 'v> { match *self { Variant::BooleanFalse => single_bool_to_numeric::(false), Variant::BooleanTrue => single_bool_to_numeric::(true), - Variant::Int8(i) => num_cast::(i), - Variant::Int16(i) => num_cast::(i), - Variant::Int32(i) => num_cast::(i), - Variant::Int64(i) => num_cast::(i), - Variant::Float(f) => num_cast::(f), - Variant::Double(d) => num_cast::(d), + Variant::Int8(i) => num_cast::<_, T>(i), + Variant::Int16(i) => num_cast::<_, T>(i), + Variant::Int32(i) => num_cast::<_, T>(i), + Variant::Int64(i) => num_cast::<_, T>(i), + Variant::Float(f) => num_cast::<_, T>(f), + Variant::Double(d) => num_cast::<_, T>(d), Variant::Decimal4(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), Variant::Decimal8(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), Variant::Decimal16(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), @@ -793,8 +801,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `i8` if possible. /// - /// Returns `Some(i8)` for integer variants that fit in `i8` range, - /// `None` for non-integer variants or values that would overflow. + /// Returns `Some(i8)` for integer variants that fit in `i8` range and boolean variant, + /// `None` for other variants or values that would overflow. /// /// # Examples /// @@ -805,13 +813,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_int8(), Some(123i8)); /// + /// // or from boolean variant + /// let v2 = Variant::BooleanFalse; + /// assert_eq!(v2.as_int8(), Some(0)); + /// /// // but not if it would overflow - /// let v2 = Variant::from(1234i64); - /// assert_eq!(v2.as_int8(), None); + /// let v3 = Variant::from(1234i64); + /// assert_eq!(v3.as_int8(), None); /// /// // or if the variant cannot be cast into an integer - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_int8(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_int8(), None); /// ``` pub fn as_int8(&self) -> Option { self.as_num::() @@ -819,8 +831,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `i16` if possible. /// - /// Returns `Some(i16)` for integer variants that fit in `i16` range, - /// `None` for non-integer variants or values that would overflow. + /// Returns `Some(i16)` for integer variants that fit in `i16` range and boolean variant, + /// `None` for other variants or values that would overflow. /// /// # Examples /// @@ -831,13 +843,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_int16(), Some(123i16)); /// + /// // or from boolean variant + /// let v2 = Variant::BooleanFalse; + /// assert_eq!(v2.as_int16(), Some(0)); + /// /// // but not if it would overflow - /// let v2 = Variant::from(123456i64); - /// assert_eq!(v2.as_int16(), None); + /// let v3 = Variant::from(123456i64); + /// assert_eq!(v3.as_int16(), None); /// /// // or if the variant cannot be cast into an integer - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_int16(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_int16(), None); /// ``` pub fn as_int16(&self) -> Option { self.as_num::() @@ -845,8 +861,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `i32` if possible. /// - /// Returns `Some(i32)` for integer variants that fit in `i32` range, - /// `None` for non-integer variants or values that would overflow. + /// Returns `Some(i32)` for integer variants that fit in `i32` range and boolean variant, + /// `None` for other variants or values that would overflow. /// /// # Examples /// @@ -857,13 +873,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_int32(), Some(123i32)); /// + /// // or from boolean variant + /// let v2 = Variant::BooleanFalse; + /// assert_eq!(v2.as_int32(), Some(0)); + /// /// // but not if it would overflow - /// let v2 = Variant::from(12345678901i64); - /// assert_eq!(v2.as_int32(), None); + /// let v3 = Variant::from(12345678901i64); + /// assert_eq!(v3.as_int32(), None); /// /// // or if the variant cannot be cast into an integer - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_int32(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_int32(), None); /// ``` pub fn as_int32(&self) -> Option { self.as_num::<_>() @@ -871,8 +891,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `i64` if possible. /// - /// Returns `Some(i64)` for integer variants that fit in `i64` range, - /// `None` for non-integer variants or values that would overflow. + /// Returns `Some(i64)` for integer variants that fit in `i64` range and boolean variant, + /// `None` for other variants or values that would overflow. /// /// # Examples /// @@ -883,9 +903,13 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v1 = Variant::from(123i64); /// assert_eq!(v1.as_int64(), Some(123i64)); /// + /// // or from boolean variant + /// let v2 = Variant::BooleanFalse; + /// assert_eq!(v2.as_int64(), Some(0)); + /// /// // but not a variant that cannot be cast into an integer - /// let v2 = Variant::from("hello!"); - /// assert_eq!(v2.as_int64(), None); + /// let v3 = Variant::from("hello!"); + /// assert_eq!(v3.as_int64(), None); /// ``` pub fn as_int64(&self) -> Option { self.as_num::() @@ -893,8 +917,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to a `u8` if possible. /// - /// Returns `Some(u8)` for integer variants that fit in `u8` - /// `None` for non-integer variants or values that would overflow. + /// Returns `Some(u8)` for integer variants that fit in `u8` and boolean variant + /// `None` for other variants or values that would overflow. /// /// # Examples /// @@ -910,18 +934,22 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u8(), Some(26u8)); /// + /// // or from boolean variant + /// let v3 = Variant::BooleanFalse; + /// assert_eq!(v3.as_int8(), Some(0)); + /// /// // but not a variant that can't fit into the range - /// let v3 = Variant::from(-1); - /// assert_eq!(v3.as_u8(), None); + /// let v4 = Variant::from(-1); + /// assert_eq!(v4.as_u8(), None); /// /// // not a variant that decimal with scale not equal to zero /// let d = VariantDecimal4::try_new(1, 2).unwrap(); - /// let v4 = Variant::from(d); - /// assert_eq!(v4.as_u8(), None); + /// let v5 = Variant::from(d); + /// assert_eq!(v5.as_u8(), None); /// /// // or not a variant that cannot be cast into an integer - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_u8(), None); + /// let v6 = Variant::from("hello!"); + /// assert_eq!(v6.as_u8(), None); /// ``` pub fn as_u8(&self) -> Option { self.as_num::() @@ -929,8 +957,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `u16` if possible. /// - /// Returns `Some(u16)` for integer variants that fit in `u16` - /// `None` for non-integer variants or values that would overflow. + /// Returns `Some(u16)` for integer variants that fit in `u16` or boolean variant + /// `None` for other variants or values that would overflow. /// /// # Examples /// @@ -946,18 +974,22 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u16(), Some(u16::MAX)); /// + /// // or from boolean variant + /// let v3= Variant::BooleanFalse; + /// assert_eq!(v3.as_int8(), Some(0)); + /// /// // but not a variant that can't fit into the range - /// let v3 = Variant::from(-1); - /// assert_eq!(v3.as_u16(), None); + /// let v4 = Variant::from(-1); + /// assert_eq!(v4.as_u16(), None); /// /// // not a variant that decimal with scale not equal to zero /// let d = VariantDecimal4::try_new(1, 2).unwrap(); - /// let v4 = Variant::from(d); - /// assert_eq!(v4.as_u16(), None); + /// let v5 = Variant::from(d); + /// assert_eq!(v5.as_u16(), None); /// /// // or not a variant that cannot be cast into an integer - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_u16(), None); + /// let v6 = Variant::from("hello!"); + /// assert_eq!(v6.as_u16(), None); /// ``` pub fn as_u16(&self) -> Option { self.as_num::() @@ -965,8 +997,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `u32` if possible. /// - /// Returns `Some(u32)` for integer variants that fit in `u32` - /// `None` for non-integer variants or values that would overflow. + /// Returns `Some(u32)` for integer variants that fit in `u32` and boolean variant + /// `None` for other variants or values that would overflow. /// /// # Examples /// @@ -982,18 +1014,22 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u32(), Some(u32::MAX)); /// + /// // or from boolean variant + /// let v3 = Variant::BooleanFalse; + /// assert_eq!(v3.as_int8(), Some(0)); + /// /// // but not a variant that can't fit into the range - /// let v3 = Variant::from(-1); - /// assert_eq!(v3.as_u32(), None); + /// let v4 = Variant::from(-1); + /// assert_eq!(v4.as_u32(), None); /// /// // not a variant that decimal with scale not equal to zero /// let d = VariantDecimal8::try_new(1, 2).unwrap(); - /// let v4 = Variant::from(d); - /// assert_eq!(v4.as_u32(), None); + /// let v5 = Variant::from(d); + /// assert_eq!(v5.as_u32(), None); /// /// // or not a variant that cannot be cast into an integer - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_u32(), None); + /// let v6 = Variant::from("hello!"); + /// assert_eq!(v6.as_u32(), None); /// ``` pub fn as_u32(&self) -> Option { self.as_num::() @@ -1001,8 +1037,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `u64` if possible. /// - /// Returns `Some(u64)` for integer variants that fit in `u64` - /// `None` for non-integer variants or values that would overflow. + /// Returns `Some(u64)` for integer variants that fit in `u64` and boolean variant + /// `None` for other variants or values that would overflow. /// /// # Examples /// @@ -1018,18 +1054,22 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(d); /// assert_eq!(v2.as_u64(), Some(u64::MAX)); /// + /// // or from boolean variant + /// let v3 = Variant::BooleanFalse; + /// assert_eq!(v3.as_int8(), Some(0)); + /// /// // but not a variant that can't fit into the range - /// let v3 = Variant::from(-1); - /// assert_eq!(v3.as_u64(), None); + /// let v4 = Variant::from(-1); + /// assert_eq!(v4.as_u64(), None); /// /// // not a variant that decimal with scale not equal to zero /// let d = VariantDecimal16::try_new(1, 2).unwrap(); - /// let v4 = Variant::from(d); - /// assert_eq!(v4.as_u64(), None); + /// let v5 = Variant::from(d); + /// assert_eq!(v5.as_u64(), None); /// /// // or not a variant that cannot be cast into an integer - /// let v5 = Variant::from("hello!"); - /// assert_eq!(v5.as_u64(), None); + /// let v6 = Variant::from("hello!"); + /// assert_eq!(v6.as_u64(), None); /// ``` pub fn as_u64(&self) -> Option { self.as_num::() @@ -1146,8 +1186,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `f16` if possible. /// - /// Returns `Some(f16)` for floating point values, and integers with up to 11 bits of - /// precision. `None` otherwise. + /// Returns `Some(f16)` for floating point values, integer and boolean variants. + /// `None` otherwise. /// /// # Example /// @@ -1163,21 +1203,25 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI))); /// - /// // and from integers with no more than 11 bits of precision - /// let v3 = Variant::from(2047); - /// assert_eq!(v3.as_f16(), Some(f16::from_f32(2047.0))); + /// // and from boolean + /// let v3 = Variant::BooleanTrue; + /// assert_eq!(v3.as_f16(), Some(f16::from_f32(1.0))); + /// + /// // return inf if overflow + /// let v4 = Variant::from(123456); + /// assert_eq!(v4.as_f16(), Some(f16::INFINITY)); /// /// // but not from other variants - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_f16(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_f16(), None); pub fn as_f16(&self) -> Option { self.as_num::() } /// Converts this variant to an `f32` if possible. /// - /// Returns `Some(f32)` for floating point values, and integer values with up to 24 bits of - /// precision. `None` otherwise. + /// Returns `Some(f32)` for floating point values, integer values, and boolean variants. + /// `None` otherwise. /// /// # Examples /// @@ -1192,13 +1236,17 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI)); /// - /// // and from integers with no more than 24 bits of precision - /// let v3 = Variant::from(16777215i64); - /// assert_eq!(v3.as_f32(), Some(16777215.0)); + /// // and from boolean variant + /// let v3 = Variant::BooleanTrue; + /// assert_eq!(v3.as_f32(), Some(1.0)); + /// + /// // and return inf if overflow + /// let v4 = Variant::from(f64::MAX); + /// assert_eq!(v4.as_f32(), Some(f32::INFINITY)); /// /// // but not from other variants - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_f32(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_f32(), None); /// ``` #[allow(clippy::cast_possible_truncation)] pub fn as_f32(&self) -> Option { @@ -1207,13 +1255,14 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `f64` if possible. /// - /// Returns `Some(f64)` for floating point values, and integer values with up to 53 bits of - /// precision. `None` otherwise. + /// Returns `Some(f64)` for floating point values, integer values, and boolean variants + /// `None` for other variants or can't be represented by an f64. /// /// # Examples /// /// ``` /// use parquet_variant::Variant; + /// use parquet_variant::VariantDecimal16; /// /// // you can extract an f64 from a float variant /// let v1 = Variant::from(std::f32::consts::PI); @@ -1223,13 +1272,13 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI)); /// - /// // and from integers with no more than 53 bits of precision - /// let v3 = Variant::from(9007199254740991i64); - /// assert_eq!(v3.as_f64(), Some(9007199254740991.0)); + /// // and from boolean variant + /// let v3 = Variant::BooleanTrue; + /// assert_eq!(v3.as_f64(), Some(1.0f64)); /// /// // but not from other variants - /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_f64(), None); + /// let v5 = Variant::from("hello!"); + /// assert_eq!(v5.as_f64(), None); /// ``` pub fn as_f64(&self) -> Option { self.as_num::() From 966091fd241ec6624465577baa71f3b917e9b534 Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 20 Mar 2026 16:37:11 +0800 Subject: [PATCH 5/8] address comments --- parquet-variant/src/variant.rs | 36 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 10b620e53178..2f0bcc616ca7 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -779,6 +779,10 @@ impl<'m, 'v> Variant<'m, 'v> { } } + /// Converts a boolean or numeric variant to the specified numeric type `T`. + /// + /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if + /// the conversion succeeds, `None` if the variant can't be casted to type `T`. fn as_num(&self) -> Option where T: NumCast + Default, @@ -826,7 +830,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v4.as_int8(), None); /// ``` pub fn as_int8(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to an `i16` if possible. @@ -856,7 +860,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v4.as_int16(), None); /// ``` pub fn as_int16(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to an `i32` if possible. @@ -886,7 +890,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v4.as_int32(), None); /// ``` pub fn as_int32(&self) -> Option { - self.as_num::<_>() + self.as_num() } /// Converts this variant to an `i64` if possible. @@ -912,7 +916,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v3.as_int64(), None); /// ``` pub fn as_int64(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to a `u8` if possible. @@ -952,7 +956,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v6.as_u8(), None); /// ``` pub fn as_u8(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to an `u16` if possible. @@ -992,7 +996,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v6.as_u16(), None); /// ``` pub fn as_u16(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to an `u32` if possible. @@ -1032,7 +1036,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v6.as_u32(), None); /// ``` pub fn as_u32(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to an `u64` if possible. @@ -1072,7 +1076,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v6.as_u64(), None); /// ``` pub fn as_u64(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to tuple with a 4-byte unscaled value if possible. @@ -1215,7 +1219,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v5 = Variant::from("hello!"); /// assert_eq!(v5.as_f16(), None); pub fn as_f16(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to an `f32` if possible. @@ -1250,7 +1254,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// ``` #[allow(clippy::cast_possible_truncation)] pub fn as_f32(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to an `f64` if possible. @@ -1281,7 +1285,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// assert_eq!(v5.as_f64(), None); /// ``` pub fn as_f64(&self) -> Option { - self.as_num::() + self.as_num() } /// Converts this variant to an `Object` if it is an [`VariantObject`]. @@ -1529,7 +1533,7 @@ impl From for Variant<'_, '_> { Variant::Int8(value) } else { // It will always fit in i16 because u8 max is 255 and i16 max is 32767 - Variant::Int16(num_cast::(value).unwrap()) + Variant::Int16(num_cast(value).unwrap()) } } } @@ -1541,7 +1545,7 @@ impl From for Variant<'_, '_> { Variant::Int16(value) } else { // It will always fit in i32 because u16 max is 65535 and i32 max is 2147483647 - Variant::Int32(num_cast::(value).unwrap()) + Variant::Int32(num_cast(value).unwrap()) } } } @@ -1552,7 +1556,7 @@ impl From for Variant<'_, '_> { Variant::Int32(value) } else { // It will always fit in i64 because u32 max is 4294967295 and i64 max is 9223372036854775807 - Variant::Int64(num_cast::(value).unwrap()) + Variant::Int64(num_cast(value).unwrap()) } } } @@ -1564,9 +1568,7 @@ impl From for Variant<'_, '_> { Variant::Int64(value) } else { // u64 max is 18446744073709551615, which fits in i128 - Variant::Decimal16( - VariantDecimal16::try_new(num_cast::(value).unwrap(), 0).unwrap(), - ) + Variant::Decimal16(VariantDecimal16::try_new(num_cast(value).unwrap(), 0).unwrap()) } } } From a9214bcc4486061a1db794c5fbdbd672393383b0 Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 27 Mar 2026 15:26:40 +0800 Subject: [PATCH 6/8] address comments --- arrow-cast/src/cast/mod.rs | 2 +- parquet-variant-compute/src/variant_get.rs | 8 ++-- parquet-variant/src/variant.rs | 45 ++++++++++++++-------- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 184695f84889..0984e4423b37 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -2605,7 +2605,7 @@ where unsafe { PrimitiveArray::::from_trusted_len_iter(iter) } } -/// Cat single bool value to numeric value. +/// Cast single bool value to numeric value. #[inline] pub fn single_bool_to_numeric(value: bool) -> Option where diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index c595e9fd08cf..9b7509975290 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -2626,7 +2626,7 @@ mod test { #[test] fn test_error_message_boolean_type_display() { let mut builder = VariantArrayBuilder::new(1); - builder.append_variant(Variant::Null); + builder.append_variant(Variant::from("abcd")); let variant_array: ArrayRef = ArrayRef::from(builder.build()); // Request Boolean with strict casting to force an error @@ -2647,10 +2647,10 @@ mod test { #[test] fn test_error_message_numeric_type_display() { let mut builder = VariantArrayBuilder::new(1); - builder.append_variant(Variant::Null); + builder.append_variant(Variant::from("abcd")); let variant_array: ArrayRef = ArrayRef::from(builder.build()); - // Request Boolean with strict casting to force an error + // Request Float32 with strict casting to force an error let options = GetOptions { path: VariantPath::default(), as_type: Some(Arc::new(Field::new("result", DataType::Float32, true))), @@ -2671,7 +2671,7 @@ mod test { builder.append_variant(Variant::BooleanFalse); let variant_array: ArrayRef = ArrayRef::from(builder.build()); - // Request Boolean with strict casting to force an error + // Request Timestamp with strict casting to force an error let options = GetOptions { path: VariantPath::default(), as_type: Some(Arc::new(Field::new( diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 2f0bcc616ca7..ebcdffbfa5e7 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -779,7 +779,8 @@ impl<'m, 'v> Variant<'m, 'v> { } } - /// Converts a boolean or numeric variant to the specified numeric type `T`. + /// Converts a boolean or numeric variant(integers, floating-point, and decimals with scale 0) + /// to the specified numeric type `T`. /// /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if /// the conversion succeeds, `None` if the variant can't be casted to type `T`. @@ -805,7 +806,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `i8` if possible. /// - /// Returns `Some(i8)` for integer variants that fit in `i8` range and boolean variant, + /// Returns `Some(i8)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `i8` range, /// `None` for other variants or values that would overflow. /// /// # Examples @@ -835,7 +837,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `i16` if possible. /// - /// Returns `Some(i16)` for integer variants that fit in `i16` range and boolean variant, + /// Returns `Some(i16)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `i16` range /// `None` for other variants or values that would overflow. /// /// # Examples @@ -865,7 +868,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `i32` if possible. /// - /// Returns `Some(i32)` for integer variants that fit in `i32` range and boolean variant, + /// Returns `Some(i32)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `i32` range /// `None` for other variants or values that would overflow. /// /// # Examples @@ -895,7 +899,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `i64` if possible. /// - /// Returns `Some(i64)` for integer variants that fit in `i64` range and boolean variant, + /// Returns `Some(i64)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `i64` range /// `None` for other variants or values that would overflow. /// /// # Examples @@ -921,7 +926,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to a `u8` if possible. /// - /// Returns `Some(u8)` for integer variants that fit in `u8` and boolean variant + /// Returns `Some(u8)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `u8` range /// `None` for other variants or values that would overflow. /// /// # Examples @@ -940,7 +946,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// /// // or from boolean variant /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_int8(), Some(0)); + /// assert_eq!(v3.as_u8(), Some(0)); /// /// // but not a variant that can't fit into the range /// let v4 = Variant::from(-1); @@ -961,7 +967,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `u16` if possible. /// - /// Returns `Some(u16)` for integer variants that fit in `u16` or boolean variant + /// Returns `Some(u16)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `u16` range /// `None` for other variants or values that would overflow. /// /// # Examples @@ -980,7 +987,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// /// // or from boolean variant /// let v3= Variant::BooleanFalse; - /// assert_eq!(v3.as_int8(), Some(0)); + /// assert_eq!(v3.as_u16(), Some(0)); /// /// // but not a variant that can't fit into the range /// let v4 = Variant::from(-1); @@ -1001,7 +1008,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `u32` if possible. /// - /// Returns `Some(u32)` for integer variants that fit in `u32` and boolean variant + /// Returns `Some(u32)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `u32` range /// `None` for other variants or values that would overflow. /// /// # Examples @@ -1020,7 +1028,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// /// // or from boolean variant /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_int8(), Some(0)); + /// assert_eq!(v3.as_u32(), Some(0)); /// /// // but not a variant that can't fit into the range /// let v4 = Variant::from(-1); @@ -1041,7 +1049,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `u64` if possible. /// - /// Returns `Some(u64)` for integer variants that fit in `u64` and boolean variant + /// Returns `Some(u64)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `u64` range /// `None` for other variants or values that would overflow. /// /// # Examples @@ -1060,7 +1069,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// /// // or from boolean variant /// let v3 = Variant::BooleanFalse; - /// assert_eq!(v3.as_int8(), Some(0)); + /// assert_eq!(v3.as_u64(), Some(0)); /// /// // but not a variant that can't fit into the range /// let v4 = Variant::from(-1); @@ -1190,7 +1199,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `f16` if possible. /// - /// Returns `Some(f16)` for floating point values, integer and boolean variants. + /// Returns `Some(f16)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `f16` range /// `None` otherwise. /// /// # Example @@ -1224,7 +1234,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `f32` if possible. /// - /// Returns `Some(f32)` for floating point values, integer values, and boolean variants. + /// Returns `Some(f32)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `f32` range /// `None` otherwise. /// /// # Examples @@ -1259,14 +1270,14 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `f64` if possible. /// - /// Returns `Some(f64)` for floating point values, integer values, and boolean variants + /// Returns `Some(f64)` for boolean and numeric variants(integers, floating-point, + /// and decimals with scale 0) that fit in `f64` range /// `None` for other variants or can't be represented by an f64. /// /// # Examples /// /// ``` /// use parquet_variant::Variant; - /// use parquet_variant::VariantDecimal16; /// /// // you can extract an f64 from a float variant /// let v1 = Variant::from(std::f32::consts::PI); From d7dda39a99f5bfbb61dbd0aac8aea3d9db180d26 Mon Sep 17 00:00:00 2001 From: klion26 Date: Tue, 31 Mar 2026 17:42:56 +0800 Subject: [PATCH 7/8] address comment to refine code --- arrow-cast/src/cast/string.rs | 1 + parquet-variant/src/variant.rs | 32 ++++++++++++++------------------ 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs index 2fd29b388090..6230f86a2610 100644 --- a/arrow-cast/src/cast/string.rs +++ b/arrow-cast/src/cast/string.rs @@ -409,6 +409,7 @@ where Ok(Arc::new(output_array)) } +#[inline] fn cast_single_string_to_boolean( value: &str, cast_options: &CastOptions, diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index ebcdffbfa5e7..4f89a67b8f2b 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -789,17 +789,17 @@ impl<'m, 'v> Variant<'m, 'v> { T: NumCast + Default, { match *self { - Variant::BooleanFalse => single_bool_to_numeric::(false), - Variant::BooleanTrue => single_bool_to_numeric::(true), - Variant::Int8(i) => num_cast::<_, T>(i), - Variant::Int16(i) => num_cast::<_, T>(i), - Variant::Int32(i) => num_cast::<_, T>(i), - Variant::Int64(i) => num_cast::<_, T>(i), - Variant::Float(f) => num_cast::<_, T>(f), - Variant::Double(d) => num_cast::<_, T>(d), - Variant::Decimal4(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), - Variant::Decimal8(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), - Variant::Decimal16(d) if d.scale() == 0 => num_cast::<_, T>(d.integer()), + Variant::BooleanFalse => single_bool_to_numeric(false), + Variant::BooleanTrue => single_bool_to_numeric(true), + Variant::Int8(i) => num_cast(i), + Variant::Int16(i) => num_cast(i), + Variant::Int32(i) => num_cast(i), + Variant::Int64(i) => num_cast(i), + Variant::Float(f) => num_cast(f), + Variant::Double(d) => num_cast(d), + Variant::Decimal4(d) if d.scale() == 0 => num_cast(d.integer()), + Variant::Decimal8(d) if d.scale() == 0 => num_cast(d.integer()), + Variant::Decimal16(d) if d.scale() == 0 => num_cast(d.integer()), _ => None, } } @@ -1263,7 +1263,6 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v5 = Variant::from("hello!"); /// assert_eq!(v5.as_f32(), None); /// ``` - #[allow(clippy::cast_possible_truncation)] pub fn as_f32(&self) -> Option { self.as_num() } @@ -1543,8 +1542,7 @@ impl From for Variant<'_, '_> { if let Ok(value) = i8::try_from(value) { Variant::Int8(value) } else { - // It will always fit in i16 because u8 max is 255 and i16 max is 32767 - Variant::Int16(num_cast(value).unwrap()) + Variant::Int16(num_cast(value).unwrap()) // u8 -> i16 is infallible } } } @@ -1555,8 +1553,7 @@ impl From for Variant<'_, '_> { if let Ok(value) = i16::try_from(value) { Variant::Int16(value) } else { - // It will always fit in i32 because u16 max is 65535 and i32 max is 2147483647 - Variant::Int32(num_cast(value).unwrap()) + Variant::Int32(num_cast(value).unwrap()) // u16 -> i32 is infallible } } } @@ -1566,8 +1563,7 @@ impl From for Variant<'_, '_> { if let Ok(value) = i32::try_from(value) { Variant::Int32(value) } else { - // It will always fit in i64 because u32 max is 4294967295 and i64 max is 9223372036854775807 - Variant::Int64(num_cast(value).unwrap()) + Variant::Int64(num_cast(value).unwrap()) // u32 -> i64 is infallible } } } From 75eb71fd35d94def1228ce111d9c411ff645b7e1 Mon Sep 17 00:00:00 2001 From: klion26 Date: Wed, 1 Apr 2026 16:16:19 +0800 Subject: [PATCH 8/8] update doc --- parquet-variant/src/variant.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 4f89a67b8f2b..accff009045a 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -154,6 +154,24 @@ impl Deref for ShortString<'_> { /// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md /// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md /// +/// # Casting Semantics +/// +/// Scalar conversion semantics intentionally follow Arrow cast behavior where applicable. +/// Conversions in this module delegate to Arrow compute cast helpers such as +/// [`num_cast`], [`cast_num_to_bool`], [`single_bool_to_numeric`], and +/// [`cast_single_string_to_boolean_default`]. +/// +/// - [`Self::as_boolean`] accepts boolean, numeric, and string variants. +/// Numeric zero maps to `false`; non-zero maps to `true`. String parsing follows +/// Arrow UTF8-to-boolean cast rules. +/// - Numeric accessors such as [`Self::as_int8`], [`Self::as_int64`], [`Self::as_u8`], +/// [`Self::as_u64`], [`Self::as_f16`], [`Self::as_f32`], and [`Self::as_f64`] accept +/// boolean and numeric variants (integers, floating-point, and decimals with scale `0`). +/// They return `None` when conversion is not possible. +/// - Decimal accessors such as [`Self::as_decimal4`], [`Self::as_decimal8`], and +/// [`Self::as_decimal16`] accept compatible decimal variants and integer variants. +/// They return `None` when conversion is not possible. +/// /// # Examples: /// /// ## Creating `Variant` from Rust Types