Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions rust/arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
.add_buffer(v.data_ref().buffers()[0].clone())
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
if let Some(bitmap) = v.data_ref().null_bitmap() {
builder = builder
.null_count(v.data_ref().null_count())
.null_bit_buffer(bitmap.bits.clone())
builder = builder.null_bit_buffer(bitmap.bits.clone())
}

let data = builder.build();
Expand Down Expand Up @@ -453,9 +451,7 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
.len(v.len())
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
if let Some(bitmap) = v.data_ref().null_bitmap() {
builder = builder
.null_count(v.data_ref().null_count())
.null_bit_buffer(bitmap.bits.clone())
builder = builder.null_bit_buffer(bitmap.bits.clone())
}

let data = builder.build();
Expand Down Expand Up @@ -572,9 +568,7 @@ impl DecimalArray {
.len(v.len())
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
if let Some(bitmap) = v.data_ref().null_bitmap() {
builder = builder
.null_count(v.data_ref().null_count())
.null_bit_buffer(bitmap.bits.clone())
builder = builder.null_bit_buffer(bitmap.bits.clone())
}

let data = builder.build();
Expand Down
4 changes: 1 addition & 3 deletions rust/arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,7 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
.add_buffer(v.data_ref().buffers()[0].clone())
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
if let Some(bitmap) = v.data().null_bitmap() {
builder = builder
.null_count(v.data_ref().null_count())
.null_bit_buffer(bitmap.bits.clone())
builder = builder.null_bit_buffer(bitmap.bits.clone())
}

let data = builder.build();
Expand Down
14 changes: 5 additions & 9 deletions rust/arrow/src/array/array_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
.len(len)
.child_data(child_data);
if let Some(null_buffer) = null {
let null_count = len - null_buffer.count_set_bits();
builder = builder.null_count(null_count).null_bit_buffer(null_buffer);
builder = builder.null_bit_buffer(null_buffer);
}

Ok(StructArray::from(builder.build()))
Expand Down Expand Up @@ -237,9 +236,9 @@ impl fmt::Debug for StructArray {
}
}

impl From<(Vec<(Field, ArrayRef)>, Buffer, usize)> for StructArray {
fn from(triple: (Vec<(Field, ArrayRef)>, Buffer, usize)) -> Self {
let (field_types, field_values): (Vec<_>, Vec<_>) = triple.0.into_iter().unzip();
impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray {
fn from(pair: (Vec<(Field, ArrayRef)>, Buffer)) -> Self {
let (field_types, field_values): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();

// Check the length of the child arrays
let length = field_values[0].len();
Expand All @@ -257,10 +256,9 @@ impl From<(Vec<(Field, ArrayRef)>, Buffer, usize)> for StructArray {
}

let data = ArrayData::builder(DataType::Struct(field_types))
.null_bit_buffer(triple.1)
.null_bit_buffer(pair.1)
.child_data(field_values.into_iter().map(|a| a.data()).collect())
.len(length)
.null_count(triple.2)
.build();
Self::from(data)
}
Expand Down Expand Up @@ -358,15 +356,13 @@ mod tests {

let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
.null_count(2)
.null_bit_buffer(Buffer::from(&[9_u8]))
.add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from(b"joemark"))
.build();

let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
.null_count(1)
.null_bit_buffer(Buffer::from(&[11_u8]))
.add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
.build();
Expand Down
27 changes: 8 additions & 19 deletions rust/arrow/src/array/array_union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@
//! # Ok(())
//! # }
//! ```
use crate::array::{make_array, Array, ArrayData, ArrayDataRef, ArrayRef};
use crate::array::{
data::count_nulls, make_array, Array, ArrayData, ArrayDataRef, ArrayRef,
};
use crate::buffer::Buffer;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
Expand Down Expand Up @@ -118,7 +120,7 @@ impl UnionArray {
type_ids: Buffer,
value_offsets: Option<Buffer>,
child_arrays: Vec<(Field, ArrayRef)>,
bitmap_data: Option<(Buffer, usize)>,
bitmap_data: Option<Buffer>,
) -> Self {
let (field_types, field_values): (Vec<_>, Vec<_>) =
child_arrays.into_iter().unzip();
Expand All @@ -127,8 +129,8 @@ impl UnionArray {
.add_buffer(type_ids)
.child_data(field_values.into_iter().map(|a| a.data()).collect())
.len(len);
if let Some((bitmap, null_count)) = bitmap_data {
builder = builder.null_bit_buffer(bitmap).null_count(null_count);
if let Some(bitmap) = bitmap_data {
builder = builder.null_bit_buffer(bitmap)
}
let data = match value_offsets {
Some(b) => builder.add_buffer(b).build(),
Expand All @@ -143,16 +145,8 @@ impl UnionArray {
child_arrays: Vec<(Field, ArrayRef)>,
bitmap: Option<Buffer>,
) -> Result<Self> {
let bitmap_data = bitmap.map(|b| {
let null_count = type_ids.len() - b.count_set_bits();
(b, null_count)
});

if let Some(b) = &value_offsets {
let nulls = match bitmap_data {
Some((_, n)) => n,
None => 0,
};
let nulls = count_nulls(bitmap.as_ref(), 0, type_ids.len());
if ((type_ids.len() - nulls) * 4) != b.len() {
return Err(ArrowError::InvalidArgumentError(
"Type Ids and Offsets represent a different number of array slots."
Expand Down Expand Up @@ -192,12 +186,7 @@ impl UnionArray {
}
}

Ok(Self::new(
type_ids,
value_offsets,
child_arrays,
bitmap_data,
))
Ok(Self::new(type_ids, value_offsets, child_arrays, bitmap))
}

/// Accesses the child array for `type_id`.
Expand Down
30 changes: 4 additions & 26 deletions rust/arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,7 @@ impl BooleanBuilder {
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
builder = builder
.null_count(null_count)
.null_bit_buffer(null_bit_buffer);
builder = builder.null_bit_buffer(null_bit_buffer);
}
let data = builder.build();
BooleanArray::from(data)
Expand Down Expand Up @@ -648,9 +646,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
builder = builder
.null_count(null_count)
.null_bit_buffer(null_bit_buffer);
builder = builder.null_bit_buffer(null_bit_buffer);
}
let data = builder.build();
PrimitiveArray::<T>::from(data)
Expand All @@ -669,9 +665,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
builder = builder
.null_count(null_count)
.null_bit_buffer(null_bit_buffer);
builder = builder.null_bit_buffer(null_bit_buffer);
}
builder = builder.add_child_data(values.data());
DictionaryArray::<T>::from(builder.build())
Expand Down Expand Up @@ -778,15 +772,13 @@ where

let offset_buffer = self.offsets_builder.finish();
let null_bit_buffer = self.bitmap_builder.finish();
let nulls = null_bit_buffer.count_set_bits();
self.offsets_builder.append(0);
let data = ArrayData::builder(DataType::List(Box::new(Field::new(
"item",
values_data.data_type().clone(),
true, // TODO: find a consistent way of getting this
))))
.len(len)
.null_count(len - nulls)
.add_buffer(offset_buffer)
.add_child_data(values_data)
.null_bit_buffer(null_bit_buffer)
Expand Down Expand Up @@ -896,15 +888,13 @@ where

let offset_buffer = self.offsets_builder.finish();
let null_bit_buffer = self.bitmap_builder.finish();
let nulls = null_bit_buffer.count_set_bits();
self.offsets_builder.append(0);
let data = ArrayData::builder(DataType::LargeList(Box::new(Field::new(
"item",
values_data.data_type().clone(),
true,
))))
.len(len)
.null_count(len - nulls)
.add_buffer(offset_buffer)
.add_child_data(values_data)
.null_bit_buffer(null_bit_buffer)
Expand Down Expand Up @@ -1027,13 +1017,11 @@ where
}

let null_bit_buffer = self.bitmap_builder.finish();
let nulls = null_bit_buffer.count_set_bits();
let data = ArrayData::builder(DataType::FixedSizeList(
Box::new(Field::new("item", values_data.data_type().clone(), true)),
self.list_len,
))
.len(len)
.null_count(len - nulls)
.add_child_data(values_data)
.null_bit_buffer(null_bit_buffer)
.build();
Expand Down Expand Up @@ -1763,9 +1751,7 @@ impl StructBuilder {
.len(self.len)
.child_data(child_data);
if null_count > 0 {
builder = builder
.null_count(null_count)
.null_bit_buffer(null_bit_buffer);
builder = builder.null_bit_buffer(null_bit_buffer);
}

self.len = 0;
Expand Down Expand Up @@ -1793,8 +1779,6 @@ struct FieldData {
values_buffer: Option<MutableBuffer>,
/// The number of array slots represented by the buffer
slots: usize,
/// The number of null array slots in this child array
null_count: usize,
/// A builder for the bitmap if required (for Sparse Unions)
bitmap_builder: Option<BooleanBufferBuilder>,
}
Expand All @@ -1811,7 +1795,6 @@ impl FieldData {
data_type,
values_buffer: Some(MutableBuffer::new(1)),
slots: 0,
null_count: 0,
bitmap_builder,
}
}
Expand Down Expand Up @@ -1851,7 +1834,6 @@ impl FieldData {
let mutable_buffer = builder_to_mutable_buffer(builder);
self.values_buffer = Some(mutable_buffer);
self.slots += 1;
self.null_count += 1;
b.append(false);
};
Ok(())
Expand Down Expand Up @@ -2024,7 +2006,6 @@ impl UnionBuilder {
values_buffer,
slots,
bitmap_builder,
null_count,
},
) in self.fields.into_iter()
{
Expand All @@ -2033,7 +2014,6 @@ impl UnionBuilder {
.freeze();
let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
.add_buffer(buffer)
.null_count(null_count)
.len(slots);
// .build();
let arr_data_ref = match bitmap_builder {
Expand Down Expand Up @@ -3136,15 +3116,13 @@ mod tests {

let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
.null_count(2)
.null_bit_buffer(Buffer::from(&[9_u8]))
.add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from(b"joemark"))
.build();

let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
.null_count(1)
.null_bit_buffer(Buffer::from(&[11_u8]))
.add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
.build();
Expand Down
20 changes: 13 additions & 7 deletions rust/arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,6 @@ impl ArrayDataBuilder {
self
}

#[inline]
pub const fn null_count(mut self, n: usize) -> Self {
self.null_count = Some(n);
self
}

pub fn null_bit_buffer(mut self, buf: Buffer) -> Self {
self.null_bit_buffer = Some(buf);
self
Expand Down Expand Up @@ -373,9 +367,11 @@ mod tests {
let b1 = Buffer::from(&v[..]);
let arr_data = ArrayData::builder(DataType::Int32)
.len(20)
.null_count(10)
.offset(5)
.add_buffer(b1)
.null_bit_buffer(Buffer::from(vec![
0b01011111, 0b10110101, 0b01100011, 0b00011110,
]))
.add_child_data(child_arr_data.clone())
.build();

Expand Down Expand Up @@ -456,4 +452,14 @@ mod tests {
let float_data = ArrayData::builder(DataType::Float32).build();
assert_ne!(int_data, float_data);
}

#[test]
fn test_count_nulls() {
let null_buffer = Some(Buffer::from(vec![0b00010110, 0b10011111]));
let count = count_nulls(null_buffer.as_ref(), 0, 16);
assert_eq!(count, 7);

let count = count_nulls(null_buffer.as_ref(), 4, 8);
assert_eq!(count, 3);
}
}
Loading