diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs index c71a66681273..93ad32bbfc0e 100644 --- a/rust/arrow/benches/arithmetic_kernels.rs +++ b/rust/arrow/benches/arithmetic_kernels.rs @@ -19,6 +19,7 @@ extern crate criterion; use criterion::Criterion; +use rand::Rng; use std::sync::Arc; extern crate arrow; @@ -27,10 +28,17 @@ use arrow::array::*; use arrow::compute::kernels::arithmetic::*; use arrow::compute::kernels::limit::*; -fn create_array(size: usize) -> ArrayRef { +fn create_array(size: usize, with_nulls: bool) -> ArrayRef { + // use random numbers to avoid spurious compiler optimizations wrt to branching + let mut rng = rand::thread_rng(); let mut builder = Float32Builder::new(size); - for i in 0..size { - builder.append_value(1.0 + 1.0 * i as f32).unwrap(); + + for _ in 0..size { + if with_nulls && rng.gen::() > 0.5 { + builder.append_null().unwrap(); + } else { + builder.append_value(rng.gen()).unwrap(); + } } Arc::new(builder.finish()) } @@ -64,8 +72,8 @@ fn bench_limit(arr_a: &ArrayRef, max: usize) { } fn add_benchmark(c: &mut Criterion) { - let arr_a = create_array(512); - let arr_b = create_array(512); + let arr_a = create_array(512, false); + let arr_b = create_array(512, false); c.bench_function("add 512", |b| b.iter(|| bench_add(&arr_a, &arr_b))); c.bench_function("subtract 512", |b| { @@ -76,6 +84,15 @@ fn add_benchmark(c: &mut Criterion) { }); c.bench_function("divide 512", |b| b.iter(|| bench_divide(&arr_a, &arr_b))); c.bench_function("limit 512, 512", |b| b.iter(|| bench_limit(&arr_a, 512))); + + let arr_a_nulls = create_array(512, false); + let arr_b_nulls = create_array(512, false); + c.bench_function("add_nulls_512", |b| { + b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls)) + }); + c.bench_function("divide_nulls_512", |b| { + b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls)) + }); } criterion_group!(benches, add_benchmark); diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs b/rust/arrow/src/compute/kernels/arithmetic.rs index 03dc169f7dc3..9b287628de90 100644 --- a/rust/arrow/src/compute/kernels/arithmetic.rs +++ b/rust/arrow/src/compute/kernels/arithmetic.rs @@ -31,7 +31,6 @@ use std::sync::Arc; use num::{One, Zero}; -use crate::array::*; #[cfg(feature = "simd")] use crate::bitmap::Bitmap; use crate::buffer::Buffer; @@ -43,11 +42,15 @@ use crate::compute::util::simd_load_set_invalid; use crate::datatypes; use crate::datatypes::ToByteSlice; use crate::error::{ArrowError, Result}; -use crate::util::bit_util; +use crate::{array::*, util::bit_util}; /// Helper function to perform math lambda function on values from two arrays. If either /// left or right value is null then the output value is also null, so `1 + null` is /// `null`. +/// +/// # Errors +/// +/// This function errors if the arrays have different lengths pub fn math_op( left: &PrimitiveArray, right: &PrimitiveArray, @@ -55,7 +58,47 @@ pub fn math_op( ) -> Result> where T: datatypes::ArrowNumericType, - F: Fn(T::Native, T::Native) -> Result, + F: Fn(T::Native, T::Native) -> T::Native, +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + + let null_bit_buffer = + combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?; + + let values = (0..left.len()) + .map(|i| op(left.value(i), right.value(i))) + .collect::>(); + + let data = ArrayData::new( + T::get_data_type(), + left.len(), + None, + null_bit_buffer, + 0, + vec![Buffer::from(values.to_byte_slice())], + vec![], + ); + Ok(PrimitiveArray::::from(Arc::new(data))) +} + +/// Helper function to divide two arrays. +/// +/// # Errors +/// +/// This function errors if: +/// * the arrays have different lengths +/// * a division by zero is found +fn math_divide( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: datatypes::ArrowNumericType, + T::Native: Div + Zero, { if left.len() != right.len() { return Err(ArrowError::ComputeError( @@ -68,20 +111,32 @@ where let mut values = Vec::with_capacity(left.len()); if let Some(b) = &null_bit_buffer { + // some value is null for i in 0..left.len() { - unsafe { + values.push(unsafe { if bit_util::get_bit_raw(b.raw_data(), i) { - values.push(op(left.value(i), right.value(i))?); + let right_value = right.value(i); + if right_value.is_zero() { + return Err(ArrowError::DivideByZero); + } else { + left.value(i) / right_value + } } else { - values.push(T::default_value()) + T::default_value() } - } + }); } } else { + // no value is null for i in 0..left.len() { - values.push(op(left.value(i), right.value(i))?); + let right_value = right.value(i); + values.push(if right_value.is_zero() { + return Err(ArrowError::DivideByZero); + } else { + left.value(i) / right_value + }); } - } + }; let data = ArrayData::new( T::get_data_type(), @@ -170,7 +225,7 @@ where // Create the combined `Bitmap` let null_bit_buffer = combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?; - let bitmap = null_bit_buffer.map(Bitmap::from); + let bitmap = null_bit_buffer.clone().map(Bitmap::from); let lanes = T::lanes(); let buffer_size = left.len() * mem::size_of::(); @@ -183,8 +238,6 @@ where if T::mask_any(is_zero) { return Err(ArrowError::DivideByZero); } - let right_no_invalid_zeros = - unsafe { simd_load_set_invalid(right, &bitmap, i, lanes, T::Native::one()) }; let simd_left = T::load(left.value_slice(i, lanes)); let simd_result = T::bin_op(simd_left, right_no_invalid_zeros, |a, b| a / b); @@ -197,8 +250,6 @@ where T::write(simd_result, result_slice); } - let null_bit_buffer = bitmap.map(|b| b.bits); - let data = ArrayData::new( T::get_data_type(), left.len(), @@ -229,7 +280,7 @@ where return simd_math_op(&left, &right, |a, b| a + b); #[allow(unreachable_code)] - math_op(left, right, |a, b| Ok(a + b)) + math_op(left, right, |a, b| a + b) } /// Perform `left - right` operation on two arrays. If either left or right value is null @@ -250,7 +301,7 @@ where return simd_math_op(&left, &right, |a, b| a - b); #[allow(unreachable_code)] - math_op(left, right, |a, b| Ok(a - b)) + math_op(left, right, |a, b| a - b) } /// Perform `left * right` operation on two arrays. If either left or right value is null @@ -271,7 +322,7 @@ where return simd_math_op(&left, &right, |a, b| a * b); #[allow(unreachable_code)] - math_op(left, right, |a, b| Ok(a * b)) + math_op(left, right, |a, b| a * b) } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -294,13 +345,7 @@ where return simd_divide(&left, &right); #[allow(unreachable_code)] - math_op(left, right, |a, b| { - if b.is_zero() { - Err(ArrowError::DivideByZero) - } else { - Ok(a / b) - } - }) + math_divide(&left, &right) } #[cfg(test)]