From d7a5f2cde91af23f24dd98e6b46f41a8ded91b5f Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Thu, 17 Jul 2025 12:52:40 -0700 Subject: [PATCH 1/3] Removing validation check from partial equals for variants --- parquet-variant/src/variant/list.rs | 18 ++++++++++++++++++ parquet-variant/src/variant/metadata.rs | 4 +--- parquet-variant/src/variant/object.rs | 22 ++++++++++++++++++++-- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/parquet-variant/src/variant/list.rs b/parquet-variant/src/variant/list.rs index 6de6ed830720..334e8dfff9d2 100644 --- a/parquet-variant/src/variant/list.rs +++ b/parquet-variant/src/variant/list.rs @@ -627,4 +627,22 @@ mod tests { assert_eq!(expected_list.get(i).unwrap(), item_str); } } + + #[test] + fn items_are_equal_even_if_not_validated() { + use crate::Variant; + use crate::VariantBuilder; + + let mut builder = VariantBuilder::new(); + + let mut list = builder.new_list(); + list.append_value("hello2"); + list.finish(); + + let (metadata, value) = builder.finish(); + + let variant1 = Variant::new(&metadata, &value); + let variant2 = Variant::new(&metadata, &value).with_full_validation().unwrap(); + assert_eq!(variant1, variant2) + } } diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs index f957ebb6f15b..eb2dc8de045d 100644 --- a/parquet-variant/src/variant/metadata.rs +++ b/parquet-variant/src/variant/metadata.rs @@ -356,9 +356,7 @@ impl<'m> VariantMetadata<'m> { impl<'m> PartialEq for VariantMetadata<'m> { fn eq(&self, other: &Self) -> bool { let is_equal = self.is_empty() == other.is_empty() - && self.is_fully_validated() == other.is_fully_validated() - && self.first_value_byte == other.first_value_byte - && self.validated == other.validated; + && self.first_value_byte == other.first_value_byte; let other_field_names: HashSet<&'m str> = HashSet::from_iter(other.iter()); diff --git a/parquet-variant/src/variant/object.rs b/parquet-variant/src/variant/object.rs index bce2ffc876b5..eea2c406ff3b 100644 --- a/parquet-variant/src/variant/object.rs +++ b/parquet-variant/src/variant/object.rs @@ -416,8 +416,7 @@ impl<'m, 'v> PartialEq for VariantObject<'m, 'v> { && self.header == other.header && self.num_elements == other.num_elements && self.first_field_offset_byte == other.first_field_offset_byte - && self.first_value_byte == other.first_value_byte - && self.validated == other.validated; + && self.first_value_byte == other.first_value_byte; // value validation let other_fields: HashMap<&str, Variant> = HashMap::from_iter(other.iter()); @@ -949,4 +948,23 @@ mod tests { // objects are still logically equal assert_eq!(v1, v2); } + + #[test] + fn items_are_equal_even_if_not_validated() { + use crate::Variant; + use crate::VariantBuilder; + + let mut builder = VariantBuilder::new(); + let mut obj = builder.new_object(); + obj.insert("field1", "hello"); + obj.insert("field2", 34); + obj.insert("field3", true); + obj.finish().unwrap(); + + let (metadata, value) = builder.finish(); + + let variant1 = Variant::new(&metadata, &value); + let variant2 = Variant::new(&metadata, &value).with_full_validation().unwrap(); + assert_eq!(variant1, variant2) + } } From 193a13654498415089898f4ce1b36359fb0194a2 Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Thu, 17 Jul 2025 13:05:05 -0700 Subject: [PATCH 2/3] Removing partial equals for variant list validated field --- parquet-variant/src/variant/list.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/parquet-variant/src/variant/list.rs b/parquet-variant/src/variant/list.rs index 334e8dfff9d2..5007c1b94b3f 100644 --- a/parquet-variant/src/variant/list.rs +++ b/parquet-variant/src/variant/list.rs @@ -117,7 +117,7 @@ impl VariantListHeader { /// /// [valid]: VariantMetadata#Validation /// [Variant spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-data-for-array-basic_type3 -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct VariantList<'m, 'v> { pub metadata: VariantMetadata<'m>, pub value: &'v [u8], @@ -302,6 +302,17 @@ impl<'m, 'v> VariantList<'m, 'v> { } } + +impl<'m, 'v> PartialEq for VariantList<'m, 'v> { + fn eq(&self, other: &Self) -> bool { + self.metadata == other.metadata && + self.value == other.value && + self.header == other.header && + self.num_elements == other.num_elements && + self.first_value_byte == other.first_value_byte + } +} + #[cfg(test)] mod tests { use super::*; From 6517e6bfffb0086fee995e94dc6f34286ef374a0 Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Thu, 17 Jul 2025 13:13:48 -0700 Subject: [PATCH 3/3] cargo fmt --- parquet-variant/src/variant/list.rs | 15 ++++++++------- parquet-variant/src/variant/metadata.rs | 15 +++++++++++++-- parquet-variant/src/variant/object.rs | 4 +++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/parquet-variant/src/variant/list.rs b/parquet-variant/src/variant/list.rs index 5007c1b94b3f..ca15932347c0 100644 --- a/parquet-variant/src/variant/list.rs +++ b/parquet-variant/src/variant/list.rs @@ -302,14 +302,13 @@ impl<'m, 'v> VariantList<'m, 'v> { } } - impl<'m, 'v> PartialEq for VariantList<'m, 'v> { fn eq(&self, other: &Self) -> bool { - self.metadata == other.metadata && - self.value == other.value && - self.header == other.header && - self.num_elements == other.num_elements && - self.first_value_byte == other.first_value_byte + self.metadata == other.metadata + && self.value == other.value + && self.header == other.header + && self.num_elements == other.num_elements + && self.first_value_byte == other.first_value_byte } } @@ -653,7 +652,9 @@ mod tests { let (metadata, value) = builder.finish(); let variant1 = Variant::new(&metadata, &value); - let variant2 = Variant::new(&metadata, &value).with_full_validation().unwrap(); + let variant2 = Variant::new(&metadata, &value) + .with_full_validation() + .unwrap(); assert_eq!(variant1, variant2) } } diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs index eb2dc8de045d..6ae9eb8dbd2d 100644 --- a/parquet-variant/src/variant/metadata.rs +++ b/parquet-variant/src/variant/metadata.rs @@ -355,8 +355,8 @@ impl<'m> VariantMetadata<'m> { // checks whether the dictionary entries are equal -- regardless of their sorting order impl<'m> PartialEq for VariantMetadata<'m> { fn eq(&self, other: &Self) -> bool { - let is_equal = self.is_empty() == other.is_empty() - && self.first_value_byte == other.first_value_byte; + let is_equal = + self.is_empty() == other.is_empty() && self.first_value_byte == other.first_value_byte; let other_field_names: HashSet<&'m str> = HashSet::from_iter(other.iter()); @@ -529,4 +529,15 @@ mod tests { "unexpected error: {err:?}" ); } + + #[test] + fn metadata_is_equal() { + let metadata_bytes = vec![0x11, 0, 0]; + let metadata1 = VariantMetadata::try_new(&metadata_bytes).unwrap(); + let metadata2 = VariantMetadata::try_new(&metadata_bytes) + .unwrap() + .with_full_validation() + .unwrap(); + assert_eq!(metadata1, metadata2) + } } diff --git a/parquet-variant/src/variant/object.rs b/parquet-variant/src/variant/object.rs index eea2c406ff3b..d8292f5e007b 100644 --- a/parquet-variant/src/variant/object.rs +++ b/parquet-variant/src/variant/object.rs @@ -964,7 +964,9 @@ mod tests { let (metadata, value) = builder.finish(); let variant1 = Variant::new(&metadata, &value); - let variant2 = Variant::new(&metadata, &value).with_full_validation().unwrap(); + let variant2 = Variant::new(&metadata, &value) + .with_full_validation() + .unwrap(); assert_eq!(variant1, variant2) } }