From eac9c972ae10bf243f8d97fba8a92131288e5291 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Fri, 27 Jun 2025 12:28:22 -0700 Subject: [PATCH 1/2] [Variant] impl [Try]From for VariantDecimalXX types --- parquet-variant/src/variant.rs | 75 ++++++----- parquet-variant/src/variant/decimal.rs | 178 +++++++++++++++---------- 2 files changed, 143 insertions(+), 110 deletions(-) diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 28583f165897..89ad85298c9f 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -537,6 +537,9 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int16(i) => i.try_into().ok(), Variant::Int32(i) => i.try_into().ok(), Variant::Int64(i) => i.try_into().ok(), + Variant::Decimal4(d) if d.scale == 0 => d.integer.try_into().ok(), + Variant::Decimal8(d) if d.scale == 0 => d.integer.try_into().ok(), + Variant::Decimal16(d) if d.scale == 0 => d.integer.try_into().ok(), _ => None, } } @@ -569,6 +572,9 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int16(i) => Some(i), Variant::Int32(i) => i.try_into().ok(), Variant::Int64(i) => i.try_into().ok(), + Variant::Decimal4(d) if d.scale == 0 => d.integer.try_into().ok(), + Variant::Decimal8(d) if d.scale == 0 => d.integer.try_into().ok(), + Variant::Decimal16(d) if d.scale == 0 => d.integer.try_into().ok(), _ => None, } } @@ -601,6 +607,9 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int16(i) => Some(i.into()), Variant::Int32(i) => Some(i), Variant::Int64(i) => i.try_into().ok(), + Variant::Decimal4(d) if d.scale == 0 => Some(d.integer), + Variant::Decimal8(d) if d.scale == 0 => d.integer.try_into().ok(), + Variant::Decimal16(d) if d.scale == 0 => d.integer.try_into().ok(), _ => None, } } @@ -629,6 +638,9 @@ impl<'m, 'v> Variant<'m, 'v> { Variant::Int16(i) => Some(i.into()), Variant::Int32(i) => Some(i.into()), Variant::Int64(i) => Some(i), + Variant::Decimal4(d) if d.scale == 0 => Some(d.integer.into()), + Variant::Decimal8(d) if d.scale == 0 => Some(d.integer), + Variant::Decimal16(d) if d.scale == 0 => d.integer.try_into().ok(), _ => None, } } @@ -660,23 +672,15 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v4 = Variant::from("hello!"); /// assert_eq!(v4.as_decimal_int32(), None); /// ``` - pub fn as_decimal_int32(&self) -> Option<(i32, u8)> { + pub fn as_decimal4(&self) -> Option { match *self { - Variant::Decimal4(decimal4) => Some((decimal4.integer, decimal4.scale)), - Variant::Decimal8(decimal8) => { - if let Ok(converted_integer) = decimal8.integer.try_into() { - Some((converted_integer, decimal8.scale)) - } else { - None - } - } - Variant::Decimal16(decimal16) => { - if let Ok(converted_integer) = decimal16.integer.try_into() { - Some((converted_integer, decimal16.scale)) - } else { - None - } - } + Variant::Int8(i) => i32::from(i).try_into().ok(), + Variant::Int16(i) => i32::from(i).try_into().ok(), + Variant::Int32(i) => i.try_into().ok(), + Variant::Int64(i) => i32::try_from(i).ok()?.try_into().ok(), + Variant::Decimal4(decimal4) => Some(decimal4), + Variant::Decimal8(decimal8) => decimal8.try_into().ok(), + Variant::Decimal16(decimal16) => decimal16.try_into().ok(), _ => None, } } @@ -708,17 +712,15 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v4 = Variant::from("hello!"); /// assert_eq!(v4.as_decimal_int64(), None); /// ``` - pub fn as_decimal_int64(&self) -> Option<(i64, u8)> { + pub fn as_decimal8(&self) -> Option { match *self { - Variant::Decimal4(decimal) => Some((decimal.integer.into(), decimal.scale)), - Variant::Decimal8(decimal) => Some((decimal.integer, decimal.scale)), - Variant::Decimal16(decimal) => { - if let Ok(converted_integer) = decimal.integer.try_into() { - Some((converted_integer, decimal.scale)) - } else { - None - } - } + Variant::Int8(i) => i64::from(i).try_into().ok(), + Variant::Int16(i) => i64::from(i).try_into().ok(), + Variant::Int32(i) => i64::from(i).try_into().ok(), + Variant::Int64(i) => i.try_into().ok(), + Variant::Decimal4(decimal4) => Some(decimal4.into()), + Variant::Decimal8(decimal8) => Some(decimal8), + Variant::Decimal16(decimal16) => decimal16.try_into().ok(), _ => None, } } @@ -742,11 +744,15 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from("hello!"); /// assert_eq!(v2.as_decimal_int128(), None); /// ``` - pub fn as_decimal_int128(&self) -> Option<(i128, u8)> { + pub fn as_decimal16(&self) -> Option { match *self { - Variant::Decimal4(decimal) => Some((decimal.integer.into(), decimal.scale)), - Variant::Decimal8(decimal) => Some((decimal.integer.into(), decimal.scale)), - Variant::Decimal16(decimal) => Some((decimal.integer, decimal.scale)), + Variant::Int8(i) => i128::from(i).try_into().ok(), + Variant::Int16(i) => i128::from(i).try_into().ok(), + Variant::Int32(i) => i128::from(i).try_into().ok(), + Variant::Int64(i) => i128::from(i).try_into().ok(), + Variant::Decimal4(decimal4) => Some(decimal4.into()), + Variant::Decimal8(decimal8) => Some(decimal8.into()), + Variant::Decimal16(decimal16) => Some(decimal16), _ => None, } } @@ -1034,17 +1040,14 @@ mod tests { fn test_variant_decimal_conversion() { let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap(); let variant = Variant::from(decimal4); - assert_eq!(variant.as_decimal_int32(), Some((1234_i32, 2))); + assert_eq!(variant.as_decimal4(), Some(decimal4)); let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap(); let variant = Variant::from(decimal8); - assert_eq!(variant.as_decimal_int64(), Some((12345678901_i64, 2))); + assert_eq!(variant.as_decimal8(), Some(decimal8)); let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap(); let variant = Variant::from(decimal16); - assert_eq!( - variant.as_decimal_int128(), - Some((123456789012345678901234567890_i128, 2)) - ); + assert_eq!(variant.as_decimal16(), Some(decimal16)); } } diff --git a/parquet-variant/src/variant/decimal.rs b/parquet-variant/src/variant/decimal.rs index c92fd1df8293..9469bc01aa24 100644 --- a/parquet-variant/src/variant/decimal.rs +++ b/parquet-variant/src/variant/decimal.rs @@ -17,13 +17,38 @@ use arrow_schema::ArrowError; use std::fmt; -// Macro to format decimal values, using only integer arithmetic to avoid floating point precision loss +// All decimal types use the same try_new implementation +macro_rules! decimal_try_new { + ($integer:ident, $scale:ident) => {{ + // Validate that scale doesn't exceed precision + if $scale > Self::MAX_PRECISION { + return Err(ArrowError::InvalidArgumentError(format!( + "Scale {} is larger than max precision {}", + $scale, + Self::MAX_PRECISION, + ))); + } + + // Validate that the integer value fits within the precision + if $integer.unsigned_abs() > Self::MAX_UNSCALED_VALUE { + return Err(ArrowError::InvalidArgumentError(format!( + "{} is wider than max precision {}", + $integer, + Self::MAX_PRECISION + ))); + } + + Ok(Self { $integer, $scale }) + }}; +} + +// All decimal values format the same way, using integer arithmetic to avoid floating point precision loss macro_rules! format_decimal { ($f:expr, $integer:expr, $scale:expr, $int_type:ty) => {{ let integer = if $scale == 0 { $integer } else { - let divisor = (10 as $int_type).pow($scale as u32); + let divisor = <$int_type>::pow(10, $scale as u32); let remainder = $integer % divisor; if remainder != 0 { // Track the sign explicitly, in case the quotient is zero @@ -55,29 +80,11 @@ pub struct VariantDecimal4 { } impl VariantDecimal4 { - const MAX_PRECISION: u32 = 9; - const MAX_UNSCALED_VALUE: u32 = 10_u32.pow(Self::MAX_PRECISION) - 1; + const MAX_PRECISION: u8 = 9; + const MAX_UNSCALED_VALUE: u32 = u32::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i32, scale: u8) -> Result { - // Validate that scale doesn't exceed precision - if scale as u32 > Self::MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Scale {} of a 4-byte decimal cannot exceed the max precision {}", - scale, - Self::MAX_PRECISION, - ))); - } - - // Validate that the integer value fits within the precision - if integer.unsigned_abs() > Self::MAX_UNSCALED_VALUE { - return Err(ArrowError::InvalidArgumentError(format!( - "{} is too large to store in a 4-byte decimal with max precision {}", - integer, - Self::MAX_PRECISION - ))); - } - - Ok(VariantDecimal4 { integer, scale }) + decimal_try_new!(integer, scale) } } @@ -103,29 +110,11 @@ pub struct VariantDecimal8 { } impl VariantDecimal8 { - const MAX_PRECISION: u32 = 18; - const MAX_UNSCALED_VALUE: u64 = 10_u64.pow(Self::MAX_PRECISION) - 1; + const MAX_PRECISION: u8 = 18; + const MAX_UNSCALED_VALUE: u64 = u64::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i64, scale: u8) -> Result { - // Validate that scale doesn't exceed precision - if scale as u32 > Self::MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Scale {} of an 8-byte decimal cannot exceed the max precision {}", - scale, - Self::MAX_PRECISION, - ))); - } - - // Validate that the integer value fits within the precision - if integer.unsigned_abs() > Self::MAX_UNSCALED_VALUE { - return Err(ArrowError::InvalidArgumentError(format!( - "{} is too large to store in an 8-byte decimal with max precision {}", - integer, - Self::MAX_PRECISION - ))); - } - - Ok(VariantDecimal8 { integer, scale }) + decimal_try_new!(integer, scale) } } @@ -151,29 +140,11 @@ pub struct VariantDecimal16 { } impl VariantDecimal16 { - const MAX_PRECISION: u32 = 38; - const MAX_UNSCALED_VALUE: u128 = 10_u128.pow(Self::MAX_PRECISION) - 1; + const MAX_PRECISION: u8 = 38; + const MAX_UNSCALED_VALUE: u128 = u128::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i128, scale: u8) -> Result { - // Validate that scale doesn't exceed precision - if scale as u32 > Self::MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "Scale {} of a 16-byte decimal cannot exceed the max precision {}", - scale, - Self::MAX_PRECISION, - ))); - } - - // Validate that the integer value fits within the precision - if integer.unsigned_abs() > Self::MAX_UNSCALED_VALUE { - return Err(ArrowError::InvalidArgumentError(format!( - "{} is too large to store in a 16-byte decimal with max precision {}", - integer, - Self::MAX_PRECISION - ))); - } - - Ok(VariantDecimal16 { integer, scale }) + decimal_try_new!(integer, scale) } } @@ -183,6 +154,65 @@ impl fmt::Display for VariantDecimal16 { } } +// Infallible conversion from a narrower decimal type to a wider one +macro_rules! impl_from_decimal_for_decimal { + ($from_ty:ty, $for_ty:ty) => { + impl From<$from_ty> for $for_ty { + fn from(decimal: $from_ty) -> Self { + Self { + integer: decimal.integer.into(), + scale: decimal.scale, + } + } + } + }; +} + +impl_from_decimal_for_decimal!(VariantDecimal4, VariantDecimal8); +impl_from_decimal_for_decimal!(VariantDecimal4, VariantDecimal16); +impl_from_decimal_for_decimal!(VariantDecimal8, VariantDecimal16); + +// Fallible conversion from a wider decimal type to a narrower one +macro_rules! impl_try_from_decimal_for_decimal { + ($from_ty:ty, $for_ty:ty) => { + impl TryFrom<$from_ty> for $for_ty { + type Error = ArrowError; + + fn try_from(decimal: $from_ty) -> Result { + let Ok(integer) = decimal.integer.try_into() else { + return Err(ArrowError::InvalidArgumentError(format!( + "Value {} is wider than max precision {}", + decimal.integer, + Self::MAX_PRECISION + ))); + }; + Self::try_new(integer, decimal.scale) + } + } + }; +} + +impl_try_from_decimal_for_decimal!(VariantDecimal8, VariantDecimal4); +impl_try_from_decimal_for_decimal!(VariantDecimal16, VariantDecimal4); +impl_try_from_decimal_for_decimal!(VariantDecimal16, VariantDecimal8); + +// Fallible conversion from a decimal's underlying integer type +macro_rules! impl_try_from_int_for_decimal { + ($from_ty:ty, $for_ty:ty) => { + impl TryFrom<$from_ty> for $for_ty { + type Error = ArrowError; + + fn try_from(integer: $from_ty) -> Result { + Self::try_new(integer, 0) + } + } + }; +} + +impl_try_from_int_for_decimal!(i32, VariantDecimal4); +impl_try_from_int_for_decimal!(i64, VariantDecimal8); +impl_try_from_int_for_decimal!(i128, VariantDecimal16); + #[cfg(test)] mod tests { use super::*; @@ -198,7 +228,7 @@ mod tests { assert!(decimal4_too_large .unwrap_err() .to_string() - .contains("too large")); + .contains("wider than max precision")); let decimal4_too_small = VariantDecimal4::try_new(-1_000_000_000_i32, 2); assert!( @@ -208,7 +238,7 @@ mod tests { assert!(decimal4_too_small .unwrap_err() .to_string() - .contains("too large")); + .contains("wider than max precision")); // Test valid edge cases for Decimal4 let decimal4_max_valid = VariantDecimal4::try_new(999_999_999_i32, 2); @@ -232,7 +262,7 @@ mod tests { assert!(decimal8_too_large .unwrap_err() .to_string() - .contains("too large")); + .contains("wider than max precision")); let decimal8_too_small = VariantDecimal8::try_new(-1_000_000_000_000_000_000_i64, 2); assert!( @@ -242,7 +272,7 @@ mod tests { assert!(decimal8_too_small .unwrap_err() .to_string() - .contains("too large")); + .contains("wider than max precision")); // Test valid edge cases for Decimal8 let decimal8_max_valid = VariantDecimal8::try_new(999_999_999_999_999_999_i64, 2); @@ -267,7 +297,7 @@ mod tests { assert!(decimal16_too_large .unwrap_err() .to_string() - .contains("too large")); + .contains("wider than max precision")); let decimal16_too_small = VariantDecimal16::try_new(-100000000000000000000000000000000000000_i128, 2); @@ -278,7 +308,7 @@ mod tests { assert!(decimal16_too_small .unwrap_err() .to_string() - .contains("too large")); + .contains("wider than max precision")); // Test valid edge cases for Decimal16 let decimal16_max_valid = @@ -307,7 +337,7 @@ mod tests { assert!(decimal4_invalid_scale .unwrap_err() .to_string() - .contains("cannot exceed the max precision")); + .contains("larger than max precision")); let decimal4_invalid_scale_large = VariantDecimal4::try_new(123_i32, 20); assert!( @@ -331,7 +361,7 @@ mod tests { assert!(decimal8_invalid_scale .unwrap_err() .to_string() - .contains("cannot exceed the max precision")); + .contains("larger than max precision")); let decimal8_invalid_scale_large = VariantDecimal8::try_new(123_i64, 25); assert!( @@ -355,7 +385,7 @@ mod tests { assert!(decimal16_invalid_scale .unwrap_err() .to_string() - .contains("cannot exceed the max precision")); + .contains("larger than max precision")); let decimal16_invalid_scale_large = VariantDecimal16::try_new(123_i128, 50); assert!( From b743de3dce4b0637aff077c5656c5ed4bc82d8b7 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Fri, 27 Jun 2025 12:44:01 -0700 Subject: [PATCH 2/2] fix doctests --- parquet-variant/src/variant.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 89ad85298c9f..c5f16e7cc3d2 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -658,19 +658,19 @@ impl<'m, 'v> Variant<'m, 'v> { /// /// // you can extract decimal parts from smaller or equally-sized decimal variants /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap()); - /// assert_eq!(v1.as_decimal_int32(), Some((1234_i32, 2))); + /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok()); /// /// // and from larger decimal variants if they fit /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap()); - /// assert_eq!(v2.as_decimal_int32(), Some((1234_i32, 2))); + /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok()); /// /// // but not if the value would overflow i32 /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap()); - /// assert_eq!(v3.as_decimal_int32(), None); + /// assert_eq!(v3.as_decimal4(), None); /// /// // or if the variant is not a decimal /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_decimal_int32(), None); + /// assert_eq!(v4.as_decimal4(), None); /// ``` pub fn as_decimal4(&self) -> Option { match *self { @@ -694,23 +694,23 @@ impl<'m, 'v> Variant<'m, 'v> { /// # Examples /// /// ``` - /// use parquet_variant::{Variant, VariantDecimal8, VariantDecimal16}; + /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16}; /// /// // you can extract decimal parts from smaller or equally-sized decimal variants - /// let v1 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap()); - /// assert_eq!(v1.as_decimal_int64(), Some((1234_i64, 2))); + /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap()); + /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok()); /// /// // and from larger decimal variants if they fit /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap()); - /// assert_eq!(v2.as_decimal_int64(), Some((1234_i64, 2))); + /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok()); /// /// // but not if the value would overflow i64 /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap()); - /// assert_eq!(v3.as_decimal_int64(), None); + /// assert_eq!(v3.as_decimal8(), None); /// /// // or if the variant is not a decimal /// let v4 = Variant::from("hello!"); - /// assert_eq!(v4.as_decimal_int64(), None); + /// assert_eq!(v4.as_decimal8(), None); /// ``` pub fn as_decimal8(&self) -> Option { match *self { @@ -734,15 +734,15 @@ impl<'m, 'v> Variant<'m, 'v> { /// # Examples /// /// ``` - /// use parquet_variant::{Variant, VariantDecimal16}; + /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4}; /// /// // you can extract decimal parts from smaller or equally-sized decimal variants - /// let v1 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap()); - /// assert_eq!(v1.as_decimal_int128(), Some((1234_i128, 2))); + /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap()); + /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok()); /// /// // but not if the variant is not a decimal /// let v2 = Variant::from("hello!"); - /// assert_eq!(v2.as_decimal_int128(), None); + /// assert_eq!(v2.as_decimal16(), None); /// ``` pub fn as_decimal16(&self) -> Option { match *self {