From ecd2dc46dd27b17bee5a10269d4c16112bc4afeb Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Thu, 9 Apr 2026 16:52:58 -0400 Subject: [PATCH 1/6] Add the API --- .../src/variant_array_builder.rs | 37 +++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/parquet-variant-compute/src/variant_array_builder.rs b/parquet-variant-compute/src/variant_array_builder.rs index 86ece0010042..a5879591d319 100644 --- a/parquet-variant-compute/src/variant_array_builder.rs +++ b/parquet-variant-compute/src/variant_array_builder.rs @@ -150,10 +150,23 @@ impl VariantArrayBuilder { /// Appends a null row to the builder. pub fn append_null(&mut self) { - self.nulls.append_null(); + self.append_nulls(1); + } + + /// Appends `count` null rows to the builder. + pub fn append_nulls(&mut self, count: usize) { + if count == 0 { + return; + } + + self.nulls.append_n_nulls(count); // The subfields are expected to be non-nullable according to the parquet variant spec. - self.metadata_offsets.push(self.metadata_builder.offset()); - self.value_offsets.push(self.value_builder.offset()); + let metadata_offset = self.metadata_builder.offset(); + let value_offset = self.value_builder.offset(); + self.metadata_offsets + .resize(self.metadata_offsets.len() + count, metadata_offset); + self.value_offsets + .resize(self.value_offsets.len() + count, value_offset); } /// Append the [`Variant`] to the builder as the next row @@ -526,6 +539,24 @@ mod test { assert_eq!(list.len(), 2); } + #[test] + fn test_variant_array_builder_append_nulls() { + let mut builder = VariantArrayBuilder::new(6); + builder.append_variant(Variant::from(1i32)); + builder.append_nulls(0); // should be a no-op + builder.append_nulls(3); + builder.append_variant(Variant::from(2i32)); + + let variant_array = builder.build(); + + assert_eq!(variant_array.len(), 5); + assert_eq!(variant_array.value(0), Variant::from(1i32)); + assert!(variant_array.is_null(1)); + assert!(variant_array.is_null(2)); + assert!(variant_array.is_null(3)); + assert_eq!(variant_array.value(4), Variant::from(2i32)); + } + #[test] fn test_extend_variant_array_builder() { let mut b = VariantArrayBuilder::new(3); From f8d3a14b91dce2fb77d71866b7ed7b8042243f88 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Thu, 9 Apr 2026 16:58:29 -0400 Subject: [PATCH 2/6] Add fast paths --- parquet-variant-compute/src/cast_to_variant.rs | 7 +++++++ parquet-variant-compute/src/from_json.rs | 17 +++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index b6c968b0678d..1b26ffe07d2a 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -58,6 +58,13 @@ pub fn cast_to_variant_with_options( input: &dyn Array, options: &CastOptions, ) -> Result { + // Fast path: any all-null input maps to an all-null VariantArray. + if input.null_count() == input.len() { + let mut array_builder = VariantArrayBuilder::new(input.len()); + array_builder.append_nulls(input.len()); + return Ok(array_builder.build()); + } + // Create row builder for the input array type let mut row_builder = make_arrow_to_variant_row_builder(input.data_type(), input, options)?; diff --git a/parquet-variant-compute/src/from_json.rs b/parquet-variant-compute/src/from_json.rs index 0983147132a2..0ca938ac0a24 100644 --- a/parquet-variant-compute/src/from_json.rs +++ b/parquet-variant-compute/src/from_json.rs @@ -26,12 +26,21 @@ use parquet_variant_json::JsonToVariant; /// Macro to convert string array to variant array macro_rules! string_array_to_variant { ($input:expr, $array:expr, $builder:expr) => {{ - for i in 0..$input.len() { - if $input.is_null(i) { - $builder.append_null(); - } else { + let len = $input.len(); + let mut i = 0; + while i < len { + if !$input.is_null(i) { $builder.append_json($array.value(i))?; + i += 1; + continue; } + + let start = i; + i += 1; + while i < len && $input.is_null(i) { + i += 1; + } + $builder.append_nulls(i - start); } }}; } From b04e39e78de7162d0a4dca5be0b85b127c12433a Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Thu, 9 Apr 2026 17:15:57 -0400 Subject: [PATCH 3/6] Keep old append_null --- parquet-variant-compute/src/variant_array_builder.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/parquet-variant-compute/src/variant_array_builder.rs b/parquet-variant-compute/src/variant_array_builder.rs index a5879591d319..efc9885a044f 100644 --- a/parquet-variant-compute/src/variant_array_builder.rs +++ b/parquet-variant-compute/src/variant_array_builder.rs @@ -150,7 +150,10 @@ impl VariantArrayBuilder { /// Appends a null row to the builder. pub fn append_null(&mut self) { - self.append_nulls(1); + self.nulls.append_null(); + // The subfields are expected to be non-nullable according to the parquet variant spec. + self.metadata_offsets.push(self.metadata_builder.offset()); + self.value_offsets.push(self.value_builder.offset()); } /// Appends `count` null rows to the builder. @@ -158,6 +161,10 @@ impl VariantArrayBuilder { if count == 0 { return; } + if count == 1 { + self.append_null(); + return; + } self.nulls.append_n_nulls(count); // The subfields are expected to be non-nullable according to the parquet variant spec. From c437cf3d3b79c1912082b943e8763fcf8b3a100c Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Fri, 10 Apr 2026 09:32:20 -0400 Subject: [PATCH 4/6] fix nit --- parquet-variant-compute/src/from_json.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-variant-compute/src/from_json.rs b/parquet-variant-compute/src/from_json.rs index 0ca938ac0a24..619d860ac3c2 100644 --- a/parquet-variant-compute/src/from_json.rs +++ b/parquet-variant-compute/src/from_json.rs @@ -29,7 +29,7 @@ macro_rules! string_array_to_variant { let len = $input.len(); let mut i = 0; while i < len { - if !$input.is_null(i) { + if $input.is_valid(i) { $builder.append_json($array.value(i))?; i += 1; continue; From 55d3d3889654974683e5b1b4fa096bd511e742ca Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Fri, 10 Apr 2026 11:48:46 -0400 Subject: [PATCH 5/6] match other builders' append_nulls --- .../src/variant_array_builder.rs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/parquet-variant-compute/src/variant_array_builder.rs b/parquet-variant-compute/src/variant_array_builder.rs index efc9885a044f..f6b95c881af4 100644 --- a/parquet-variant-compute/src/variant_array_builder.rs +++ b/parquet-variant-compute/src/variant_array_builder.rs @@ -156,24 +156,16 @@ impl VariantArrayBuilder { self.value_offsets.push(self.value_builder.offset()); } - /// Appends `count` null rows to the builder. - pub fn append_nulls(&mut self, count: usize) { - if count == 0 { - return; - } - if count == 1 { - self.append_null(); - return; - } - - self.nulls.append_n_nulls(count); + /// Appends `n` null rows to the builder. + pub fn append_nulls(&mut self, n: usize) { + self.nulls.append_n_nulls(n); // The subfields are expected to be non-nullable according to the parquet variant spec. let metadata_offset = self.metadata_builder.offset(); let value_offset = self.value_builder.offset(); self.metadata_offsets - .resize(self.metadata_offsets.len() + count, metadata_offset); + .extend(std::iter::repeat_n(metadata_offset, n)); self.value_offsets - .resize(self.value_offsets.len() + count, value_offset); + .extend(std::iter::repeat_n(value_offset, n)); } /// Append the [`Variant`] to the builder as the next row From 9356c61fcc9a29363eb89d866346940762968768 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Fri, 10 Apr 2026 12:48:08 -0400 Subject: [PATCH 6/6] Remove batching nulls logic from `string_array_to_variant` --- parquet-variant-compute/src/from_json.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/parquet-variant-compute/src/from_json.rs b/parquet-variant-compute/src/from_json.rs index 619d860ac3c2..0983147132a2 100644 --- a/parquet-variant-compute/src/from_json.rs +++ b/parquet-variant-compute/src/from_json.rs @@ -26,21 +26,12 @@ use parquet_variant_json::JsonToVariant; /// Macro to convert string array to variant array macro_rules! string_array_to_variant { ($input:expr, $array:expr, $builder:expr) => {{ - let len = $input.len(); - let mut i = 0; - while i < len { - if $input.is_valid(i) { + for i in 0..$input.len() { + if $input.is_null(i) { + $builder.append_null(); + } else { $builder.append_json($array.value(i))?; - i += 1; - continue; } - - let start = i; - i += 1; - while i < len && $input.is_null(i) { - i += 1; - } - $builder.append_nulls(i - start); } }}; }