diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs index 31cbca639717..65f6bb280f00 100644 --- a/arrow/benches/zip_kernels.rs +++ b/arrow/benches/zip_kernels.rs @@ -21,6 +21,7 @@ use rand::distr::{Distribution, StandardUniform}; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; use std::hint; +use std::ops::Range; use std::sync::Arc; use arrow::array::*; @@ -133,6 +134,35 @@ where } } +struct GenerateStringView { + range: Range, + description: String, + _marker: std::marker::PhantomData, +} + +impl InputGenerator for GenerateStringView { + fn name(&self) -> &str { + self.description.as_str() + } + fn generate_scalar_with_null_value(&self) -> ArrayRef { + new_null_array(&DataType::Utf8View, 1) + } + + fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec { + let array = self.generate_array(seed, number_of_scalars, 0.0); + (0..number_of_scalars).map(|i| array.slice(i, 1)).collect() + } + + fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef { + Arc::new(create_string_view_array_with_len_range_and_seed( + array_length, + null_percentage, + self.range.clone(), + seed, + )) + } +} + fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> { vec![ ("all_true", create_boolean_array(len, 0.0, 1.0)), @@ -273,6 +303,24 @@ fn add_benchmark(c: &mut Criterion) { _marker: std::marker::PhantomData, }, ); + + bench_zip_on_input_generator( + c, + &GenerateStringView { + description: "string_views size (3..10)".to_string(), + range: 3..10, + _marker: std::marker::PhantomData, + }, + ); + + bench_zip_on_input_generator( + c, + &GenerateStringView { + description: "string_views size (10..100)".to_string(), + range: 10..100, + _marker: std::marker::PhantomData, + }, + ); } criterion_group!(benches, add_benchmark); diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index 9f83a50f4f8f..1f1dcff9b62a 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -208,6 +208,33 @@ pub fn create_string_array_with_len_range_and_prefix_and_seed, + seed: u64, +) -> StringViewArray { + let rng = &mut StdRng::seed_from_u64(seed); + (0..size) + .map(|_| { + if rng.random::() < null_density { + None + } else { + let str_len = rng.random_range(range.clone()); + let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); + let value = String::from_utf8(value).unwrap(); + Some(value) + } + }) + .collect() +} fn create_string_view_array_with_len_range_and_prefix( size: usize,