From 9e38fe94a3faf45eaa46384a55a925f1f3583a04 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 12 Dec 2020 13:02:44 +0100 Subject: [PATCH 1/6] Some minor perf tweaks --- rust/arrow/src/array/array_binary.rs | 6 +++--- rust/arrow/src/array/array_string.rs | 2 +- rust/arrow/src/compute/kernels/comparison.rs | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index a32bcc075c3..ba58347613a 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -84,7 +84,7 @@ impl GenericBinaryArray { /// Returns the element at index `i` as a byte slice. pub fn value(&self, i: usize) -> &[u8] { - assert!(i < self.data.len(), "BinaryArray out of bounds access"); + debug_assert!(i < self.data.len(), "BinaryArray out of bounds access"); let offset = i.checked_add(self.data.offset()).unwrap(); unsafe { let pos = self.value_offset_at(offset); @@ -317,7 +317,7 @@ pub struct FixedSizeBinaryArray { impl FixedSizeBinaryArray { /// Returns the element at index `i` as a byte slice. pub fn value(&self, i: usize) -> &[u8] { - assert!( + debug_assert!( i < self.data.len(), "FixedSizeBinaryArray out of bounds access" ); @@ -468,7 +468,7 @@ pub struct DecimalArray { impl DecimalArray { /// Returns the element at index `i` as i128. pub fn value(&self, i: usize) -> i128 { - assert!(i < self.data.len(), "DecimalArray out of bounds access"); + debug_assert!(i < self.data.len(), "DecimalArray out of bounds access"); let offset = i.checked_add(self.data.offset()).unwrap(); let raw_val = unsafe { let pos = self.value_offset_at(offset); diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index 5f871b8f595..6a0adbb618f 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -85,7 +85,7 @@ impl GenericStringArray { /// Returns the element at index `i` as &str pub fn value(&self, i: usize) -> &str { - assert!(i < self.data.len(), "StringArray out of bounds access"); + debug_assert!(i < self.data.len(), "StringArray out of bounds access"); let offset = i.checked_add(self.data.offset()).unwrap(); unsafe { let pos = self.value_offset_at(offset); diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index 8d0f86574d9..b7cc5e5dba3 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -47,9 +47,18 @@ macro_rules! compare_op { let null_bit_buffer = combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?; - let mut result = BooleanBufferBuilder::new($left.len()); + let byte_capacity = bit_util::ceil($left.len(), 8); + let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); + let mut buffer = MutableBuffer::new(actual_capacity); + buffer.resize(byte_capacity); + let data = buffer.raw_data_mut(); + for i in 0..$left.len() { - result.append($op($left.value(i), $right.value(i)))?; + if $op($left.value(i), $right.value(i)) { + unsafe { + bit_util::set_bit_raw(data, i); + } + } } let data = ArrayData::new( @@ -58,7 +67,7 @@ macro_rules! compare_op { None, null_bit_buffer, 0, - vec![result.finish()], + vec![buffer.freeze()], vec![], ); Ok(BooleanArray::from(Arc::new(data))) From c5749d9a96cb0cfff471e88d3315142d1ba91456 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 12 Dec 2020 13:08:10 +0100 Subject: [PATCH 2/6] Also do for scalar --- rust/arrow/src/compute/kernels/comparison.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index b7cc5e5dba3..20d1fb98ffa 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -77,11 +77,21 @@ macro_rules! compare_op { macro_rules! compare_op_scalar { ($left: expr, $right:expr, $op:expr) => {{ let null_bit_buffer = $left.data().null_buffer().cloned(); - let mut result = BooleanBufferBuilder::new($left.len()); + + let byte_capacity = bit_util::ceil($left.len(), 8); + let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); + let mut buffer = MutableBuffer::new(actual_capacity); + buffer.resize(byte_capacity); + let data = buffer.raw_data_mut(); + for i in 0..$left.len() { - result.append($op($left.value(i), $right))?; + if $op($left.value(i), $right) { + unsafe { + bit_util::set_bit_raw(data, i); + } + } } - + let data = ArrayData::new( DataType::Boolean, $left.len(), From c64e6f00eb148ca0f9f05f879d0440efba1f99bf Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 12 Dec 2020 13:12:07 +0100 Subject: [PATCH 3/6] Fix buffer code --- rust/arrow/src/compute/kernels/comparison.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index 20d1fb98ffa..c58b5257648 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -98,7 +98,7 @@ macro_rules! compare_op_scalar { None, null_bit_buffer, 0, - vec![result.finish()], + vec![buffer.freeze()], vec![], ); Ok(BooleanArray::from(Arc::new(data))) From 3b67f70d670c8ba76f327fb4c8a757155d021f67 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 12 Dec 2020 13:41:53 +0100 Subject: [PATCH 4/6] fmt --- rust/arrow/src/compute/kernels/comparison.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index c58b5257648..27902ab5c6c 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -91,7 +91,7 @@ macro_rules! compare_op_scalar { } } } - + let data = ArrayData::new( DataType::Boolean, $left.len(), From 23c8ff28e56ccb381bcbb321dcbbb946d8fd7db0 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sun, 13 Dec 2020 09:33:53 +0100 Subject: [PATCH 5/6] Revert debug asserts, add comment about safety --- rust/arrow/src/array/array_binary.rs | 6 +++--- rust/arrow/src/array/array_string.rs | 2 +- rust/arrow/src/compute/kernels/comparison.rs | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index ba58347613a..a32bcc075c3 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -84,7 +84,7 @@ impl GenericBinaryArray { /// Returns the element at index `i` as a byte slice. pub fn value(&self, i: usize) -> &[u8] { - debug_assert!(i < self.data.len(), "BinaryArray out of bounds access"); + assert!(i < self.data.len(), "BinaryArray out of bounds access"); let offset = i.checked_add(self.data.offset()).unwrap(); unsafe { let pos = self.value_offset_at(offset); @@ -317,7 +317,7 @@ pub struct FixedSizeBinaryArray { impl FixedSizeBinaryArray { /// Returns the element at index `i` as a byte slice. pub fn value(&self, i: usize) -> &[u8] { - debug_assert!( + assert!( i < self.data.len(), "FixedSizeBinaryArray out of bounds access" ); @@ -468,7 +468,7 @@ pub struct DecimalArray { impl DecimalArray { /// Returns the element at index `i` as i128. pub fn value(&self, i: usize) -> i128 { - debug_assert!(i < self.data.len(), "DecimalArray out of bounds access"); + assert!(i < self.data.len(), "DecimalArray out of bounds access"); let offset = i.checked_add(self.data.offset()).unwrap(); let raw_val = unsafe { let pos = self.value_offset_at(offset); diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index 6a0adbb618f..5f871b8f595 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -85,7 +85,7 @@ impl GenericStringArray { /// Returns the element at index `i` as &str pub fn value(&self, i: usize) -> &str { - debug_assert!(i < self.data.len(), "StringArray out of bounds access"); + assert!(i < self.data.len(), "StringArray out of bounds access"); let offset = i.checked_add(self.data.offset()).unwrap(); unsafe { let pos = self.value_offset_at(offset); diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index 27902ab5c6c..898e4aee6a2 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -55,6 +55,8 @@ macro_rules! compare_op { for i in 0..$left.len() { if $op($left.value(i), $right.value(i)) { + // SAFETY: this is safe as `data` has at least $left.len() elements. + // and `i` is bound by $left.len() unsafe { bit_util::set_bit_raw(data, i); } From ef3bffacedaa6f00affdf05f9ded4b04d7375972 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 15 Dec 2020 12:42:58 +0100 Subject: [PATCH 6/6] Add safety comment for scalar kenel as well --- rust/arrow/src/compute/kernels/comparison.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index 898e4aee6a2..74a01e485b2 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -88,6 +88,8 @@ macro_rules! compare_op_scalar { for i in 0..$left.len() { if $op($left.value(i), $right) { + // SAFETY: this is safe as `data` has at least $left.len() elements + // and `i` is bound by $left.len() unsafe { bit_util::set_bit_raw(data, i); }