Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions rust/arrow/benches/arithmetic_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
extern crate criterion;
use criterion::Criterion;

use rand::Rng;
use std::sync::Arc;

extern crate arrow;
Expand All @@ -27,10 +28,17 @@ use arrow::array::*;
use arrow::compute::kernels::arithmetic::*;
use arrow::compute::kernels::limit::*;

fn create_array(size: usize) -> ArrayRef {
fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
// use random numbers to avoid spurious compiler optimizations wrt to branching
let mut rng = rand::thread_rng();
let mut builder = Float32Builder::new(size);
for i in 0..size {
builder.append_value(1.0 + 1.0 * i as f32).unwrap();

for _ in 0..size {
if with_nulls && rng.gen::<f32>() > 0.5 {
builder.append_null().unwrap();
} else {
builder.append_value(rng.gen()).unwrap();
}
}
Arc::new(builder.finish())
}
Expand Down Expand Up @@ -64,8 +72,8 @@ fn bench_limit(arr_a: &ArrayRef, max: usize) {
}

fn add_benchmark(c: &mut Criterion) {
let arr_a = create_array(512);
let arr_b = create_array(512);
let arr_a = create_array(512, false);
let arr_b = create_array(512, false);

c.bench_function("add 512", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
c.bench_function("subtract 512", |b| {
Expand All @@ -76,6 +84,15 @@ fn add_benchmark(c: &mut Criterion) {
});
c.bench_function("divide 512", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
c.bench_function("limit 512, 512", |b| b.iter(|| bench_limit(&arr_a, 512)));

let arr_a_nulls = create_array(512, false);
let arr_b_nulls = create_array(512, false);
c.bench_function("add_nulls_512", |b| {
b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls))
});
c.bench_function("divide_nulls_512", |b| {
b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls))
});
}

criterion_group!(benches, add_benchmark);
Expand Down
93 changes: 69 additions & 24 deletions rust/arrow/src/compute/kernels/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ use std::sync::Arc;

use num::{One, Zero};

use crate::array::*;
#[cfg(feature = "simd")]
use crate::bitmap::Bitmap;
use crate::buffer::Buffer;
Expand All @@ -43,19 +42,63 @@ use crate::compute::util::simd_load_set_invalid;
use crate::datatypes;
use crate::datatypes::ToByteSlice;
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
use crate::{array::*, util::bit_util};

/// Helper function to perform math lambda function on values from two arrays. If either
/// left or right value is null then the output value is also null, so `1 + null` is
/// `null`.
///
/// # Errors
///
/// This function errors if the arrays have different lengths
pub fn math_op<T, F>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
op: F,
) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
F: Fn(T::Native, T::Native) -> Result<T::Native>,
F: Fn(T::Native, T::Native) -> T::Native,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform math operation on arrays of different length".to_string(),
));
}

let null_bit_buffer =
combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;

let values = (0..left.len())
.map(|i| op(left.value(i), right.value(i)))
.collect::<Vec<T::Native>>();

let data = ArrayData::new(
T::get_data_type(),
left.len(),
None,
null_bit_buffer,
0,
vec![Buffer::from(values.to_byte_slice())],
vec![],
);
Ok(PrimitiveArray::<T>::from(Arc::new(data)))
}

/// Helper function to divide two arrays.
///
/// # Errors
///
/// This function errors if:
/// * the arrays have different lengths
/// * a division by zero is found
fn math_divide<T>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T::Native: Div<Output = T::Native> + Zero,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
Expand All @@ -68,20 +111,32 @@ where

let mut values = Vec::with_capacity(left.len());
if let Some(b) = &null_bit_buffer {
// some value is null
for i in 0..left.len() {
unsafe {
values.push(unsafe {
if bit_util::get_bit_raw(b.raw_data(), i) {
values.push(op(left.value(i), right.value(i))?);
let right_value = right.value(i);
if right_value.is_zero() {
return Err(ArrowError::DivideByZero);
} else {
left.value(i) / right_value
}
} else {
values.push(T::default_value())
T::default_value()
}
}
});
}
} else {
// no value is null
for i in 0..left.len() {
values.push(op(left.value(i), right.value(i))?);
let right_value = right.value(i);
values.push(if right_value.is_zero() {
return Err(ArrowError::DivideByZero);
} else {
left.value(i) / right_value
});
}
}
};

let data = ArrayData::new(
T::get_data_type(),
Expand Down Expand Up @@ -170,7 +225,7 @@ where
// Create the combined `Bitmap`
let null_bit_buffer =
combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
let bitmap = null_bit_buffer.map(Bitmap::from);
let bitmap = null_bit_buffer.clone().map(Bitmap::from);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this clone necessary?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe not, but I was unable to get a bitmap reference to set the mask for SIMD from a buffer without clone.


let lanes = T::lanes();
let buffer_size = left.len() * mem::size_of::<T::Native>();
Expand All @@ -183,8 +238,6 @@ where
if T::mask_any(is_zero) {
return Err(ArrowError::DivideByZero);
}
let right_no_invalid_zeros =
unsafe { simd_load_set_invalid(right, &bitmap, i, lanes, T::Native::one()) };
let simd_left = T::load(left.value_slice(i, lanes));
let simd_result = T::bin_op(simd_left, right_no_invalid_zeros, |a, b| a / b);

Expand All @@ -197,8 +250,6 @@ where
T::write(simd_result, result_slice);
}

let null_bit_buffer = bitmap.map(|b| b.bits);

let data = ArrayData::new(
T::get_data_type(),
left.len(),
Expand Down Expand Up @@ -229,7 +280,7 @@ where
return simd_math_op(&left, &right, |a, b| a + b);

#[allow(unreachable_code)]
math_op(left, right, |a, b| Ok(a + b))
math_op(left, right, |a, b| a + b)
}

/// Perform `left - right` operation on two arrays. If either left or right value is null
Expand All @@ -250,7 +301,7 @@ where
return simd_math_op(&left, &right, |a, b| a - b);

#[allow(unreachable_code)]
math_op(left, right, |a, b| Ok(a - b))
math_op(left, right, |a, b| a - b)
}

/// Perform `left * right` operation on two arrays. If either left or right value is null
Expand All @@ -271,7 +322,7 @@ where
return simd_math_op(&left, &right, |a, b| a * b);

#[allow(unreachable_code)]
math_op(left, right, |a, b| Ok(a * b))
math_op(left, right, |a, b| a * b)
}

/// Perform `left / right` operation on two arrays. If either left or right value is null
Expand All @@ -294,13 +345,7 @@ where
return simd_divide(&left, &right);

#[allow(unreachable_code)]
math_op(left, right, |a, b| {
if b.is_zero() {
Err(ArrowError::DivideByZero)
} else {
Ok(a / b)
}
})
math_divide(&left, &right)
}

#[cfg(test)]
Expand Down