From 993402fcde114fe77ffcfad5e0f3f828569fc810 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Sat, 13 Dec 2025 15:35:13 +0100 Subject: [PATCH 1/7] Add benchmarks for Utf8View scalars for zip --- arrow/benches/zip_kernels.rs | 155 ++++++++++++++++++++++++++++++++++- 1 file changed, 153 insertions(+), 2 deletions(-) diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs index 31cbca639717..488918fd5831 100644 --- a/arrow/benches/zip_kernels.rs +++ b/arrow/benches/zip_kernels.rs @@ -133,6 +133,34 @@ where } } +struct GenerateStringView { + str_len: usize, + description: String, + _marker: std::marker::PhantomData, +} + +impl InputGenerator for GenerateStringView { + fn name(&self) -> &str { + self.description.as_str() + } + fn generate_scalar_with_null_value(&self) -> ArrayRef { + new_null_array(&DataType::Utf8View, 1) + } + + fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec { + let array = self.generate_array(seed, number_of_scalars, 0.0); + (0..number_of_scalars).map(|i| array.slice(i, 1)).collect() + } + + fn generate_array(&self, _seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef { + Arc::new(create_string_view_array_with_fixed_len( + array_length, + null_percentage, + self.str_len, + )) + } +} + fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> { vec![ ("all_true", create_boolean_array(len, 0.0, 1.0)), @@ -145,10 +173,9 @@ fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> { ("50pct_nulls", create_boolean_array(len, 0.5, 0.5)), ] } +const ARRAY_LEN: usize = 8192; fn bench_zip_on_input_generator(c: &mut Criterion, input_generator: &impl InputGenerator) { - const ARRAY_LEN: usize = 8192; - let mut group = c.benchmark_group(format!("zip_{ARRAY_LEN}_from_{}", input_generator.name()).as_str()); @@ -224,6 +251,61 @@ fn bench_zip_input_on_all_masks( } } +fn bench_zip_on_input_generator_for_scalars( + c: &mut Criterion, + input_generator: &impl InputGenerator, +) { + bench_zip_on_input_generators_for_scalars(c, input_generator, input_generator); +} + +fn bench_zip_on_input_generators_for_scalars( + c: &mut Criterion, + input_generator_1: &impl InputGenerator, + input_generator_2: &impl InputGenerator, +) { + let mut group = c.benchmark_group( + format!( + "zip_{ARRAY_LEN}_from_{} and {}", + input_generator_1.name(), + input_generator_2.name() + ) + .as_str(), + ); + + let null_scalar = input_generator_1.generate_scalar_with_null_value(); + + let [non_null_scalar_1]: [_; 1] = input_generator_1 + .generate_non_null_scalars(42, 1) + .try_into() + .unwrap(); + + let [non_null_scalar_2]: [_; 1] = input_generator_2 + .generate_non_null_scalars(18, 1) + .try_into() + .unwrap(); + + let masks = mask_cases(ARRAY_LEN); + + for (description, truthy, falsy) in &[ + ("null_vs_non_null_scalar", &null_scalar, &non_null_scalar_1), + ( + "non_null_scalar_vs_null_scalar", + &non_null_scalar_1, + &null_scalar, + ), + ("non_nulls_scalars", &non_null_scalar_1, &non_null_scalar_2), + ] { + bench_zip_input_on_all_masks( + description, + &mut group, + &masks, + &Scalar::new(truthy), + &Scalar::new(falsy), + ); + } + group.finish(); +} + fn add_benchmark(c: &mut Criterion) { // Primitive bench_zip_on_input_generator( @@ -273,6 +355,75 @@ fn add_benchmark(c: &mut Criterion) { _marker: std::marker::PhantomData, }, ); + + bench_zip_on_input_generator_for_scalars( + c, + &GenerateStringView { + description: "string_views size 3".to_string(), + str_len: 3, + _marker: std::marker::PhantomData, + }, + ); + + bench_zip_on_input_generator_for_scalars( + c, + &GenerateStringView { + description: "string_views size 10".to_string(), + str_len: 10, + _marker: std::marker::PhantomData, + }, + ); + + bench_zip_on_input_generator_for_scalars( + c, + &GenerateStringView { + description: "string_views size 100".to_string(), + str_len: 10, + _marker: std::marker::PhantomData, + }, + ); + + bench_zip_on_input_generators_for_scalars( + c, + &GenerateStringView { + description: "string_views size 3".to_string(), + str_len: 3, + _marker: std::marker::PhantomData, + }, + &GenerateStringView { + description: "string_views size 10".to_string(), + str_len: 10, + _marker: std::marker::PhantomData, + }, + ); + + bench_zip_on_input_generators_for_scalars( + c, + &GenerateStringView { + description: "string_views size 3".to_string(), + str_len: 3, + _marker: std::marker::PhantomData, + }, + &GenerateStringView { + description: "string_views size 100".to_string(), + str_len: 100, + _marker: std::marker::PhantomData, + }, + ); + + bench_zip_on_input_generators_for_scalars( + c, + &GenerateStringView { + description: "string_views size 10".to_string(), + str_len: 10, + _marker: std::marker::PhantomData, + }, + &GenerateStringView { + description: "string_views size 100".to_string(), + str_len: 100, + _marker: std::marker::PhantomData, + }, + ); } criterion_group!(benches, add_benchmark); From 8414bdab40fd66c41f86f6238b66f941a13b6ee3 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 15 Dec 2025 01:25:46 +0100 Subject: [PATCH 2/7] Simplify generator code --- arrow/benches/zip_kernels.rs | 91 +++++++++++++++--------------------- 1 file changed, 38 insertions(+), 53 deletions(-) diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs index 488918fd5831..2fdf55ed5fd5 100644 --- a/arrow/benches/zip_kernels.rs +++ b/arrow/benches/zip_kernels.rs @@ -133,34 +133,6 @@ where } } -struct GenerateStringView { - str_len: usize, - description: String, - _marker: std::marker::PhantomData, -} - -impl InputGenerator for GenerateStringView { - fn name(&self) -> &str { - self.description.as_str() - } - fn generate_scalar_with_null_value(&self) -> ArrayRef { - new_null_array(&DataType::Utf8View, 1) - } - - fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec { - let array = self.generate_array(seed, number_of_scalars, 0.0); - (0..number_of_scalars).map(|i| array.slice(i, 1)).collect() - } - - fn generate_array(&self, _seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef { - Arc::new(create_string_view_array_with_fixed_len( - array_length, - null_percentage, - self.str_len, - )) - } -} - fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> { vec![ ("all_true", create_boolean_array(len, 0.0, 1.0)), @@ -251,17 +223,14 @@ fn bench_zip_input_on_all_masks( } } -fn bench_zip_on_input_generator_for_scalars( - c: &mut Criterion, - input_generator: &impl InputGenerator, -) { - bench_zip_on_input_generators_for_scalars(c, input_generator, input_generator); +fn bench_zip_on_string_view_scalar(c: &mut Criterion, input_generator: &GenerateStringView) { + bench_zip_on_string_view_scalars(c, input_generator, input_generator); } -fn bench_zip_on_input_generators_for_scalars( +fn bench_zip_on_string_view_scalars( c: &mut Criterion, - input_generator_1: &impl InputGenerator, - input_generator_2: &impl InputGenerator, + input_generator_1: &GenerateStringView, + input_generator_2: &GenerateStringView, ) { let mut group = c.benchmark_group( format!( @@ -272,17 +241,10 @@ fn bench_zip_on_input_generators_for_scalars( .as_str(), ); - let null_scalar = input_generator_1.generate_scalar_with_null_value(); + let null_scalar = input_generator_1.generate_null(); - let [non_null_scalar_1]: [_; 1] = input_generator_1 - .generate_non_null_scalars(42, 1) - .try_into() - .unwrap(); - - let [non_null_scalar_2]: [_; 1] = input_generator_2 - .generate_non_null_scalars(18, 1) - .try_into() - .unwrap(); + let non_null_scalar_1 = input_generator_1.generate(); + let non_null_scalar_2 = input_generator_2.generate(); let masks = mask_cases(ARRAY_LEN); @@ -306,6 +268,29 @@ fn bench_zip_on_input_generators_for_scalars( group.finish(); } +struct GenerateStringView { + str_len: usize, + description: String, + _marker: std::marker::PhantomData, +} + +impl GenerateStringView { + fn name(&self) -> &str { + self.description.as_str() + } + fn generate_null(&self) -> ArrayRef { + new_null_array(&DataType::Utf8View, 1) + } + + fn generate(&self) -> ArrayRef { + Arc::new(create_string_view_array_with_fixed_len( + 1, + 0.0, + self.str_len, + )) + } +} + fn add_benchmark(c: &mut Criterion) { // Primitive bench_zip_on_input_generator( @@ -356,7 +341,7 @@ fn add_benchmark(c: &mut Criterion) { }, ); - bench_zip_on_input_generator_for_scalars( + bench_zip_on_string_view_scalar( c, &GenerateStringView { description: "string_views size 3".to_string(), @@ -365,7 +350,7 @@ fn add_benchmark(c: &mut Criterion) { }, ); - bench_zip_on_input_generator_for_scalars( + bench_zip_on_string_view_scalar( c, &GenerateStringView { description: "string_views size 10".to_string(), @@ -374,16 +359,16 @@ fn add_benchmark(c: &mut Criterion) { }, ); - bench_zip_on_input_generator_for_scalars( + bench_zip_on_string_view_scalar( c, &GenerateStringView { description: "string_views size 100".to_string(), - str_len: 10, + str_len: 100, _marker: std::marker::PhantomData, }, ); - bench_zip_on_input_generators_for_scalars( + bench_zip_on_string_view_scalars( c, &GenerateStringView { description: "string_views size 3".to_string(), @@ -397,7 +382,7 @@ fn add_benchmark(c: &mut Criterion) { }, ); - bench_zip_on_input_generators_for_scalars( + bench_zip_on_string_view_scalars( c, &GenerateStringView { description: "string_views size 3".to_string(), @@ -411,7 +396,7 @@ fn add_benchmark(c: &mut Criterion) { }, ); - bench_zip_on_input_generators_for_scalars( + bench_zip_on_string_view_scalars( c, &GenerateStringView { description: "string_views size 10".to_string(), From df8499ac77eda340fc48d817e3f189ccc711ec38 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 15 Dec 2025 23:33:17 +0100 Subject: [PATCH 3/7] Return scalar instead of array from generator --- arrow/benches/zip_kernels.rs | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs index 2fdf55ed5fd5..8bf0acda10c1 100644 --- a/arrow/benches/zip_kernels.rs +++ b/arrow/benches/zip_kernels.rs @@ -241,14 +241,14 @@ fn bench_zip_on_string_view_scalars( .as_str(), ); - let null_scalar = input_generator_1.generate_null(); + let null_scalar = input_generator_1.generate_null_scalar(); - let non_null_scalar_1 = input_generator_1.generate(); - let non_null_scalar_2 = input_generator_2.generate(); + let non_null_scalar_1 = input_generator_1.generate_scalar(); + let non_null_scalar_2 = input_generator_2.generate_scalar(); let masks = mask_cases(ARRAY_LEN); - for (description, truthy, falsy) in &[ + for (description, truthy, falsy) in [ ("null_vs_non_null_scalar", &null_scalar, &non_null_scalar_1), ( "non_null_scalar_vs_null_scalar", @@ -257,13 +257,7 @@ fn bench_zip_on_string_view_scalars( ), ("non_nulls_scalars", &non_null_scalar_1, &non_null_scalar_2), ] { - bench_zip_input_on_all_masks( - description, - &mut group, - &masks, - &Scalar::new(truthy), - &Scalar::new(falsy), - ); + bench_zip_input_on_all_masks(description, &mut group, &masks, truthy, falsy); } group.finish(); } @@ -278,16 +272,16 @@ impl GenerateStringView { fn name(&self) -> &str { self.description.as_str() } - fn generate_null(&self) -> ArrayRef { - new_null_array(&DataType::Utf8View, 1) + fn generate_null_scalar(&self) -> Scalar { + Scalar::new(new_null_array(&DataType::Utf8View, 1)) } - fn generate(&self) -> ArrayRef { - Arc::new(create_string_view_array_with_fixed_len( + fn generate_scalar(&self) -> Scalar { + Scalar::new(Arc::new(create_string_view_array_with_fixed_len( 1, 0.0, self.str_len, - )) + ))) } } From 04ed1ce32464fb33084483502f02f7efd71025b0 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 17 Dec 2025 23:05:55 +0100 Subject: [PATCH 4/7] Follow previous zip benchmarks in design --- arrow/benches/zip_kernels.rs | 142 +++++++++-------------------------- arrow/src/util/bench_util.rs | 48 +++++++++++- 2 files changed, 81 insertions(+), 109 deletions(-) diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs index 8bf0acda10c1..f9761e8dd51a 100644 --- a/arrow/benches/zip_kernels.rs +++ b/arrow/benches/zip_kernels.rs @@ -133,6 +133,35 @@ where } } +struct GenerateStringView { + str_len: usize, + description: String, + _marker: std::marker::PhantomData, +} + +impl InputGenerator for GenerateStringView { + fn name(&self) -> &str { + self.description.as_str() + } + fn generate_scalar_with_null_value(&self) -> ArrayRef { + new_null_array(&DataType::Utf8View, 1) + } + + fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec { + let array = self.generate_array(seed, number_of_scalars, 0.0); + (0..number_of_scalars).map(|i| array.slice(i, 1)).collect() + } + + fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef { + Arc::new(create_string_view_array_with_fixed_len_with_seed( + array_length, + null_percentage, + self.str_len, + seed, + )) + } +} + fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> { vec![ ("all_true", create_boolean_array(len, 0.0, 1.0)), @@ -145,9 +174,10 @@ fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> { ("50pct_nulls", create_boolean_array(len, 0.5, 0.5)), ] } -const ARRAY_LEN: usize = 8192; fn bench_zip_on_input_generator(c: &mut Criterion, input_generator: &impl InputGenerator) { + const ARRAY_LEN: usize = 8192; + let mut group = c.benchmark_group(format!("zip_{ARRAY_LEN}_from_{}", input_generator.name()).as_str()); @@ -223,68 +253,6 @@ fn bench_zip_input_on_all_masks( } } -fn bench_zip_on_string_view_scalar(c: &mut Criterion, input_generator: &GenerateStringView) { - bench_zip_on_string_view_scalars(c, input_generator, input_generator); -} - -fn bench_zip_on_string_view_scalars( - c: &mut Criterion, - input_generator_1: &GenerateStringView, - input_generator_2: &GenerateStringView, -) { - let mut group = c.benchmark_group( - format!( - "zip_{ARRAY_LEN}_from_{} and {}", - input_generator_1.name(), - input_generator_2.name() - ) - .as_str(), - ); - - let null_scalar = input_generator_1.generate_null_scalar(); - - let non_null_scalar_1 = input_generator_1.generate_scalar(); - let non_null_scalar_2 = input_generator_2.generate_scalar(); - - let masks = mask_cases(ARRAY_LEN); - - for (description, truthy, falsy) in [ - ("null_vs_non_null_scalar", &null_scalar, &non_null_scalar_1), - ( - "non_null_scalar_vs_null_scalar", - &non_null_scalar_1, - &null_scalar, - ), - ("non_nulls_scalars", &non_null_scalar_1, &non_null_scalar_2), - ] { - bench_zip_input_on_all_masks(description, &mut group, &masks, truthy, falsy); - } - group.finish(); -} - -struct GenerateStringView { - str_len: usize, - description: String, - _marker: std::marker::PhantomData, -} - -impl GenerateStringView { - fn name(&self) -> &str { - self.description.as_str() - } - fn generate_null_scalar(&self) -> Scalar { - Scalar::new(new_null_array(&DataType::Utf8View, 1)) - } - - fn generate_scalar(&self) -> Scalar { - Scalar::new(Arc::new(create_string_view_array_with_fixed_len( - 1, - 0.0, - self.str_len, - ))) - } -} - fn add_benchmark(c: &mut Criterion) { // Primitive bench_zip_on_input_generator( @@ -335,7 +303,7 @@ fn add_benchmark(c: &mut Criterion) { }, ); - bench_zip_on_string_view_scalar( + bench_zip_on_input_generator( c, &GenerateStringView { description: "string_views size 3".to_string(), @@ -344,31 +312,8 @@ fn add_benchmark(c: &mut Criterion) { }, ); - bench_zip_on_string_view_scalar( - c, - &GenerateStringView { - description: "string_views size 10".to_string(), - str_len: 10, - _marker: std::marker::PhantomData, - }, - ); - - bench_zip_on_string_view_scalar( - c, - &GenerateStringView { - description: "string_views size 100".to_string(), - str_len: 100, - _marker: std::marker::PhantomData, - }, - ); - - bench_zip_on_string_view_scalars( + bench_zip_on_input_generator( c, - &GenerateStringView { - description: "string_views size 3".to_string(), - str_len: 3, - _marker: std::marker::PhantomData, - }, &GenerateStringView { description: "string_views size 10".to_string(), str_len: 10, @@ -376,27 +321,8 @@ fn add_benchmark(c: &mut Criterion) { }, ); - bench_zip_on_string_view_scalars( - c, - &GenerateStringView { - description: "string_views size 3".to_string(), - str_len: 3, - _marker: std::marker::PhantomData, - }, - &GenerateStringView { - description: "string_views size 100".to_string(), - str_len: 100, - _marker: std::marker::PhantomData, - }, - ); - - bench_zip_on_string_view_scalars( + bench_zip_on_input_generator( c, - &GenerateStringView { - description: "string_views size 10".to_string(), - str_len: 10, - _marker: std::marker::PhantomData, - }, &GenerateStringView { description: "string_views size 100".to_string(), str_len: 100, diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index 9f83a50f4f8f..3441a2a2dc88 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -319,12 +319,58 @@ pub fn create_string_view_array_with_max_len( } /// Creates a random (but fixed-seeded) array of a given size, null density and length +/// +/// Arguments: +/// - `size`: number of string view array +/// - `null_density`: density of nulls in the string view array +/// - `str_len`: size of each string in the string view array pub fn create_string_view_array_with_fixed_len( size: usize, null_density: f32, str_len: usize, ) -> StringViewArray { - let rng = &mut seedable_rng(); + create_string_view_array_with_fixed_len_with_rng( + size, + null_density, + str_len, + &mut seedable_rng(), + ) +} + +/// Creates a string view array of a given size, null density and length +/// +/// Arguments: +/// - `size`: number of string view array +/// - `null_density`: density of nulls in the string view array +/// - `str_len`: size of each string in the string view array +/// - `seed`: seed for the random number generator +pub fn create_string_view_array_with_fixed_len_with_seed( + size: usize, + null_density: f32, + str_len: usize, + seed: u64, +) -> StringViewArray { + create_string_view_array_with_fixed_len_with_rng( + size, + null_density, + str_len, + &mut StdRng::seed_from_u64(seed), + ) +} + +/// Creates a string view array of a given size, null density and length +/// +/// Arguments: +/// - `size`: number of string view array +/// - `null_density`: density of nulls in the string view array +/// - `str_len`: size of each string in the string view array +/// - `rng` random number generator +fn create_string_view_array_with_fixed_len_with_rng( + size: usize, + null_density: f32, + str_len: usize, + rng: &mut StdRng, +) -> StringViewArray { (0..size) .map(|_| { if rng.random::() < null_density { From f0b722ce6b38714e4beedf4b995f36bc29bb53eb Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 25 Dec 2025 22:17:28 +0100 Subject: [PATCH 5/7] Add range for string view zip benchmarks --- arrow/benches/zip_kernels.rs | 24 ++++------- arrow/src/util/bench_util.rs | 81 +++++++++++++++--------------------- 2 files changed, 42 insertions(+), 63 deletions(-) diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs index f9761e8dd51a..dd548f4c812a 100644 --- a/arrow/benches/zip_kernels.rs +++ b/arrow/benches/zip_kernels.rs @@ -134,7 +134,7 @@ where } struct GenerateStringView { - str_len: usize, + range_length: std::ops::Range, description: String, _marker: std::marker::PhantomData, } @@ -153,10 +153,11 @@ impl InputGenerator for GenerateStringView { } fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef { - Arc::new(create_string_view_array_with_fixed_len_with_seed( + Arc::new(create_string_view_array_with_len_range_and_seed( array_length, null_percentage, - self.str_len, + self.range_length.start, + self.range_length.end - 1, seed, )) } @@ -306,17 +307,8 @@ fn add_benchmark(c: &mut Criterion) { bench_zip_on_input_generator( c, &GenerateStringView { - description: "string_views size 3".to_string(), - str_len: 3, - _marker: std::marker::PhantomData, - }, - ); - - bench_zip_on_input_generator( - c, - &GenerateStringView { - description: "string_views size 10".to_string(), - str_len: 10, + description: "string_views size (3..10)".to_string(), + range_length: 3..10, _marker: std::marker::PhantomData, }, ); @@ -324,8 +316,8 @@ fn add_benchmark(c: &mut Criterion) { bench_zip_on_input_generator( c, &GenerateStringView { - description: "string_views size 100".to_string(), - str_len: 100, + description: "string_views size (10..100)".to_string(), + range_length: 10..100, _marker: std::marker::PhantomData, }, ); diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index 3441a2a2dc88..fc9c13368cb3 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -208,6 +208,39 @@ pub fn create_string_array_with_len_range_and_prefix_and_seed StringViewArray { + assert!( + min_str_len <= max_str_len, + "min_str_len must be <= max_str_len" + ); + let rng = &mut StdRng::seed_from_u64(seed); + (0..size) + .map(|_| { + if rng.random::() < null_density { + None + } else { + let str_len = rng.random_range(min_str_len..max_str_len); + let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); + let value = String::from_utf8(value).unwrap(); + Some(value) + } + }) + .collect() +} fn create_string_view_array_with_len_range_and_prefix( size: usize, @@ -319,58 +352,12 @@ pub fn create_string_view_array_with_max_len( } /// Creates a random (but fixed-seeded) array of a given size, null density and length -/// -/// Arguments: -/// - `size`: number of string view array -/// - `null_density`: density of nulls in the string view array -/// - `str_len`: size of each string in the string view array pub fn create_string_view_array_with_fixed_len( size: usize, null_density: f32, str_len: usize, ) -> StringViewArray { - create_string_view_array_with_fixed_len_with_rng( - size, - null_density, - str_len, - &mut seedable_rng(), - ) -} - -/// Creates a string view array of a given size, null density and length -/// -/// Arguments: -/// - `size`: number of string view array -/// - `null_density`: density of nulls in the string view array -/// - `str_len`: size of each string in the string view array -/// - `seed`: seed for the random number generator -pub fn create_string_view_array_with_fixed_len_with_seed( - size: usize, - null_density: f32, - str_len: usize, - seed: u64, -) -> StringViewArray { - create_string_view_array_with_fixed_len_with_rng( - size, - null_density, - str_len, - &mut StdRng::seed_from_u64(seed), - ) -} - -/// Creates a string view array of a given size, null density and length -/// -/// Arguments: -/// - `size`: number of string view array -/// - `null_density`: density of nulls in the string view array -/// - `str_len`: size of each string in the string view array -/// - `rng` random number generator -fn create_string_view_array_with_fixed_len_with_rng( - size: usize, - null_density: f32, - str_len: usize, - rng: &mut StdRng, -) -> StringViewArray { + let rng = &mut seedable_rng(); (0..size) .map(|_| { if rng.random::() < null_density { From a9d02fbbfef4ef364a2a34464c54180fbd82a7b2 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Fri, 26 Dec 2025 11:44:21 +0100 Subject: [PATCH 6/7] Use range as argument --- arrow/benches/zip_kernels.rs | 9 ++++----- arrow/src/util/bench_util.rs | 12 +++--------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs index dd548f4c812a..3c975634b808 100644 --- a/arrow/benches/zip_kernels.rs +++ b/arrow/benches/zip_kernels.rs @@ -134,7 +134,7 @@ where } struct GenerateStringView { - range_length: std::ops::Range, + range: std::ops::Range, description: String, _marker: std::marker::PhantomData, } @@ -156,8 +156,7 @@ impl InputGenerator for GenerateStringView { Arc::new(create_string_view_array_with_len_range_and_seed( array_length, null_percentage, - self.range_length.start, - self.range_length.end - 1, + self.range.clone(), seed, )) } @@ -308,7 +307,7 @@ fn add_benchmark(c: &mut Criterion) { c, &GenerateStringView { description: "string_views size (3..10)".to_string(), - range_length: 3..10, + range: 3..10, _marker: std::marker::PhantomData, }, ); @@ -317,7 +316,7 @@ fn add_benchmark(c: &mut Criterion) { c, &GenerateStringView { description: "string_views size (10..100)".to_string(), - range_length: 10..100, + range: 10..100, _marker: std::marker::PhantomData, }, ); diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index fc9c13368cb3..1f1dcff9b62a 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -213,27 +213,21 @@ pub fn create_string_array_with_len_range_and_prefix_and_seed, seed: u64, ) -> StringViewArray { - assert!( - min_str_len <= max_str_len, - "min_str_len must be <= max_str_len" - ); let rng = &mut StdRng::seed_from_u64(seed); (0..size) .map(|_| { if rng.random::() < null_density { None } else { - let str_len = rng.random_range(min_str_len..max_str_len); + let str_len = rng.random_range(range.clone()); let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); let value = String::from_utf8(value).unwrap(); Some(value) From 184ccea79919b2fcef261093ef0253cc6718445c Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Fri, 26 Dec 2025 11:46:29 +0100 Subject: [PATCH 7/7] fixup! Use range as argument --- arrow/benches/zip_kernels.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs index 3c975634b808..65f6bb280f00 100644 --- a/arrow/benches/zip_kernels.rs +++ b/arrow/benches/zip_kernels.rs @@ -21,6 +21,7 @@ use rand::distr::{Distribution, StandardUniform}; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; use std::hint; +use std::ops::Range; use std::sync::Arc; use arrow::array::*; @@ -134,7 +135,7 @@ where } struct GenerateStringView { - range: std::ops::Range, + range: Range, description: String, _marker: std::marker::PhantomData, }