Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions rust/arrow/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,18 @@ use crate::util::bit_util;
#[cfg(feature = "simd")]
use std::borrow::BorrowMut;

/// Used to trim offset bits
const TRIM_OFFSET: [u8; 8] = [
0b00000001, 0b00000011, 0b00000111, 0b00001111, 0b00011111, 0b00111111, 0b01111111,
0b11111111,
];

/// Used to trim trailing bits
const TRIM_TRAIL: [u8; 8] = [
0b10000000, 0b11000000, 0b11100000, 0b11110000, 0b11111000, 0b11111100, 0b11111110,
0b11111111,
];

/// Buffer is a contiguous memory region of fixed size and is aligned at a 64-byte
/// boundary. Buffer is immutable.
#[derive(PartialEq, Debug)]
Expand Down Expand Up @@ -227,6 +239,62 @@ impl Buffer {
}
}

/// Returns a slice of this buffer, starting from `offset` and with a length of `len`.
///
/// Note that this is mostly useful when the offset is not a multiple of 8, as a
/// way of realigning the buffer.
pub(crate) fn slice_with_len(&self, offset: usize, len: usize) -> Self {
let mut arr = self.data().to_vec();
let mut arr: &mut [u8] = arr.as_mut();
let offset = self.offset + offset;
// shift by whole bytes
let pop_by = offset / 8;
let offset_rem = offset % 8;

let offset_diff = 8 - offset_rem;
arr = &mut arr[pop_by..];
let shift_by = bit_util::ceil(offset_rem + len, 8);
arr = &mut arr[..shift_by];

let mut left = 0u8;
// determine the length of the output buffer
let total_rem = (len + offset_rem) % 8;

// remove trailing bits at end of bytes
if total_rem > 0 {
if let Some(v) = arr.iter_mut().last() {
*v &= TRIM_OFFSET[total_rem - 1];
}
}

// shorter route if offset is div by 8
if offset_rem == 0 {
return Buffer::from(&arr[0..bit_util::ceil(len, 8)]);
} else {
// reverse array to enable carrying over
arr.reverse();
if let Some(v) = arr.iter_mut().last() {
*v &= TRIM_TRAIL[offset_diff - 1];
}
}

// replace bits from each byte in reverse order
arr.iter_mut().for_each(|byte| {
// assign the bits that we'll copy
let new_left = *byte & TRIM_OFFSET[offset_rem - 1];
// shift bytes to remove offset
*byte >>= offset_rem;
// add the bits copied from the previous byte
*byte |= left;
// shift the bytes we're copying to the next byte
left = new_left << offset_diff;
});

arr.reverse();

Buffer::from(&arr[0..bit_util::ceil(len, 8)])
}

/// Returns a raw pointer for this buffer.
///
/// Note that this should be used cautiously, and the returned pointer should not be
Expand Down Expand Up @@ -896,6 +964,30 @@ mod tests {
let _buf3 = (&buf1 | &buf2).unwrap();
}

#[test]
fn test_buffer_slice_with_len() {
let buf1 = Buffer::from([0b11111111, 0b11111111]);
let buf2 = buf1.slice_with_len(3, 10);
let buf3 = Buffer::from([0b11111111, 0b00000011]);
assert_eq!(buf2, buf3);

let buf1 = Buffer::from([0b01001110, 0b01001111]);
let buf2 = buf1.slice_with_len(4, 9);
let buf3 = Buffer::from([0b11110100, 0b00000000]);
assert_eq!(buf2, buf3);

let buf1 = Buffer::from([0b01001110, 0b01001111, 0b11001111]);
let buf2 = buf1.slice_with_len(7, 17);
let buf3 = Buffer::from([0b10011110, 0b10011110, 0b00000001]);
assert_eq!(buf2, buf3);

// this should remove the first byte, and mask the 24th bit
let buf1 = Buffer::from([0b01001110, 0b01001111, 0b11111111]);
let buf2 = buf1.slice_with_len(0, 17);
let buf3 = Buffer::from([0b01001110, 0b01001111, 0b00000001]);
assert_eq!(buf2, buf3);
}

#[test]
fn test_mutable_new() {
let buf = MutableBuffer::new(63);
Expand Down
105 changes: 77 additions & 28 deletions rust/arrow/src/compute/kernels/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,30 @@ where
));
}

if left.offset() % 8 != 0 || right.offset() % 8 != 0 {
return Err(ArrowError::ComputeError(
"Cannot perform bitwise operation when offsets are not byte-aligned."
.to_string(),
));
}

let left_data = left.data_ref();
let right_data = right.data_ref();
let null_bit_buffer = combine_option_bitmap(&left_data, &right_data, left.len())?;

let left_buffer = &left_data.buffers()[0];
let right_buffer = &right_data.buffers()[0];
let left_offset = &left.offset() / 8;
let right_offset = &right.offset() / 8;
let mut left_offset = &left.offset() / 8;
let mut right_offset = &right.offset() / 8;

// A BooleanArray Buffer does not carry the data's offset, thus we offset with data's offset
let left_buffer = if left.offset() % 8 != 0 {
let b = &left_data.buffers()[0].clone();
let sliced = b.slice_with_len(left.offset(), left.len());
left_offset = 0;
sliced
} else {
left_data.buffers()[0].clone()
};
let right_buffer = if right.offset() % 8 != 0 {
let b = &right_data.buffers()[0].clone();
let sliced = b.slice_with_len(right.offset(), right.len());
right_offset = 0;
sliced
} else {
right_data.buffers()[0].clone()
};

let len = ceil(left.len(), 8);

Expand Down Expand Up @@ -93,14 +102,7 @@ pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
/// Performs unary `NOT` operation on an arrays. If value is null then the result is also
/// null.
pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
if left.offset() % 8 != 0 {
return Err(ArrowError::ComputeError(
"Cannot perform bitwise operation when offsets are not byte-aligned."
.to_string(),
));
}

let left_offset = left.offset() / 8;
let mut left_offset = left.offset() / 8;
let len = ceil(left.len(), 8);

let data = left.data_ref();
Expand All @@ -109,7 +111,16 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
.as_ref()
.map(|b| b.bits.slice(left_offset));

let values = buffer_unary_not(&data.buffers()[0], left_offset, len);
let buf = if left.offset() % 8 != 0 {
let b = &data.buffers()[0].clone();
let sliced = b.slice_with_len(left.offset(), left.len());
left_offset = 0;
sliced
} else {
data.buffers()[0].clone()
};

let values = buffer_unary_not(&buf, left_offset, len);

let data = ArrayData::new(
DataType::Boolean,
Expand Down Expand Up @@ -171,6 +182,19 @@ mod tests {
assert_eq!(false, c.value(3));
}

#[test]
fn test_bool_array_not_sliced() {
let a =
BooleanArray::from(vec![true, true, false, false, false, false, true, true]);
let a = a.slice(4, 4);
let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
let c = not(&a).unwrap();
assert_eq!(true, c.value(0));
assert_eq!(true, c.value(1));
assert_eq!(false, c.value(2));
assert_eq!(false, c.value(3));
}

#[test]
fn test_bool_array_and_nulls() {
let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]);
Expand All @@ -182,6 +206,32 @@ mod tests {
assert_eq!(false, c.is_null(3));
}

#[test]
fn test_bool_array_and_nulls_and_offsets() {
let a =
BooleanArray::from(vec![Some(true), None, Some(false), None, Some(false)]);
let a = a.slice(1, 4);
let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();

let b = BooleanArray::from(vec![
None,
Some(false),
None,
None,
None,
Some(false),
Some(false),
]);
let b = b.slice(3, 4);
let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();

let c = and(&a, &b).unwrap();
assert_eq!(true, c.is_null(0));
assert_eq!(true, c.is_null(1));
assert_eq!(true, c.is_null(2));
assert_eq!(false, c.is_null(3));
}

#[test]
fn test_bool_array_and_sliced_same_offset() {
let a = BooleanArray::from(vec![
Expand Down Expand Up @@ -233,12 +283,12 @@ mod tests {
#[test]
fn test_bool_array_and_sliced_offset1() {
let a = BooleanArray::from(vec![
false, false, false, false, false, false, false, false, false, false, true,
true,
false, false, false, false, false, false, false, false, false, false, false,
true, true,
]);
let b = BooleanArray::from(vec![false, true, false, true]);

let a = a.slice(8, 4);
let a = a.slice(9, 4);
let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();

let c = and(&a, &b).unwrap();
Expand All @@ -253,18 +303,17 @@ mod tests {
fn test_bool_array_and_sliced_offset2() {
let a = BooleanArray::from(vec![false, false, true, true]);
let b = BooleanArray::from(vec![
false, false, false, false, false, false, false, false, false, true, false,
true,
true, true, true, false, false, false, false, false, false, true, false, true,
]);

let b = b.slice(8, 4);
let b = b.slice(7, 4);
let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();

let c = and(&a, &b).unwrap();
assert_eq!(4, c.len());
assert_eq!(false, c.value(0));
assert_eq!(false, c.value(1));
assert_eq!(false, c.value(2));
assert_eq!(true, c.value(3));
assert_eq!(true, c.value(2));
assert_eq!(false, c.value(3));
}
}
6 changes: 2 additions & 4 deletions rust/arrow/src/compute/kernels/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,8 @@ fn take_boolean(values: &ArrayRef, indices: &UInt32Array) -> Result<ArrayRef> {
let index = indices.value(i) as usize;
if array.is_null(index) {
bit_util::unset_bit(null_slice, i);
} else {
if array.value(index) {
bit_util::set_bit(val_slice, i);
}
} else if array.value(index) {
bit_util::set_bit(val_slice, i);
}
});

Expand Down
35 changes: 25 additions & 10 deletions rust/arrow/src/compute/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::bitmap::Bitmap;
use crate::buffer::{buffer_bin_and, Buffer};
#[cfg(feature = "simd")]
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::error::Result;
use crate::util::bit_util::ceil;
#[cfg(feature = "simd")]
use num::One;
Expand All @@ -38,19 +38,34 @@ pub(super) fn combine_option_bitmap(
right_data: &ArrayDataRef,
len_in_bits: usize,
) -> Result<Option<Buffer>> {
let left_offset_in_bits = left_data.offset();
let right_offset_in_bits = right_data.offset();
let mut left_offset_in_bits = left_data.offset();
let mut right_offset_in_bits = right_data.offset();

let left_rem = left_offset_in_bits % 8;
let right_rem = right_offset_in_bits % 8;

// check if offsets are aligned, and align as necessary
let left = left_data.null_buffer();
let right = right_data.null_buffer();

if (left.is_some() && left_offset_in_bits % 8 != 0)
|| (right.is_some() && right_offset_in_bits % 8 != 0)
{
return Err(ArrowError::ComputeError(
"Cannot combine option bitmaps that are not byte-aligned.".to_string(),
));
}
// if there are remainders, slice the buffers to realign them
let left = if left.is_some() && left_rem > 0 {
let b = left.cloned().unwrap();
let sliced = b.slice_with_len(left_offset_in_bits, len_in_bits);
left_offset_in_bits = 0;
Some(sliced)
} else {
left.cloned()
};

let right = if right.is_some() && right_rem > 0 {
let b = right.cloned().unwrap();
let sliced = b.slice_with_len(right_offset_in_bits, len_in_bits);
right_offset_in_bits = 0;
Some(sliced)
} else {
right.cloned()
};

let left_offset = left_offset_in_bits / 8;
let right_offset = right_offset_in_bits / 8;
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/util/bit_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,9 @@ mod tests {
unsafe {
set_bits_raw(buf.as_mut_ptr(), start, end);
}
for i in start..end {
(start..end).for_each(|i| {
expected[i] = true;
}
});
}

let raw_ptr = buf.as_ptr();
Expand Down