diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index db4097aee69..40c785b322c 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -140,9 +140,7 @@ impl GenericBinaryArray { .add_buffer(v.data_ref().buffers()[0].clone()) .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone()); if let Some(bitmap) = v.data_ref().null_bitmap() { - builder = builder - .null_count(v.data_ref().null_count()) - .null_bit_buffer(bitmap.bits.clone()) + builder = builder.null_bit_buffer(bitmap.bits.clone()) } let data = builder.build(); @@ -453,9 +451,7 @@ impl From for FixedSizeBinaryArray { .len(v.len()) .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone()); if let Some(bitmap) = v.data_ref().null_bitmap() { - builder = builder - .null_count(v.data_ref().null_count()) - .null_bit_buffer(bitmap.bits.clone()) + builder = builder.null_bit_buffer(bitmap.bits.clone()) } let data = builder.build(); @@ -572,9 +568,7 @@ impl DecimalArray { .len(v.len()) .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone()); if let Some(bitmap) = v.data_ref().null_bitmap() { - builder = builder - .null_count(v.data_ref().null_count()) - .null_bit_buffer(bitmap.bits.clone()) + builder = builder.null_bit_buffer(bitmap.bits.clone()) } let data = builder.build(); diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index 5f871b8f595..5545fce3c45 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -116,9 +116,7 @@ impl GenericStringArray { .add_buffer(v.data_ref().buffers()[0].clone()) .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone()); if let Some(bitmap) = v.data().null_bitmap() { - builder = builder - .null_count(v.data_ref().null_count()) - .null_bit_buffer(bitmap.bits.clone()) + builder = builder.null_bit_buffer(bitmap.bits.clone()) } let data = builder.build(); diff --git a/rust/arrow/src/array/array_struct.rs b/rust/arrow/src/array/array_struct.rs index a447837aa82..a5137cc1008 100644 --- a/rust/arrow/src/array/array_struct.rs +++ b/rust/arrow/src/array/array_struct.rs @@ -154,8 +154,7 @@ impl TryFrom> for StructArray { .len(len) .child_data(child_data); if let Some(null_buffer) = null { - let null_count = len - null_buffer.count_set_bits(); - builder = builder.null_count(null_count).null_bit_buffer(null_buffer); + builder = builder.null_bit_buffer(null_buffer); } Ok(StructArray::from(builder.build())) @@ -237,9 +236,9 @@ impl fmt::Debug for StructArray { } } -impl From<(Vec<(Field, ArrayRef)>, Buffer, usize)> for StructArray { - fn from(triple: (Vec<(Field, ArrayRef)>, Buffer, usize)) -> Self { - let (field_types, field_values): (Vec<_>, Vec<_>) = triple.0.into_iter().unzip(); +impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray { + fn from(pair: (Vec<(Field, ArrayRef)>, Buffer)) -> Self { + let (field_types, field_values): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip(); // Check the length of the child arrays let length = field_values[0].len(); @@ -257,10 +256,9 @@ impl From<(Vec<(Field, ArrayRef)>, Buffer, usize)> for StructArray { } let data = ArrayData::builder(DataType::Struct(field_types)) - .null_bit_buffer(triple.1) + .null_bit_buffer(pair.1) .child_data(field_values.into_iter().map(|a| a.data()).collect()) .len(length) - .null_count(triple.2) .build(); Self::from(data) } @@ -358,7 +356,6 @@ mod tests { let expected_string_data = ArrayData::builder(DataType::Utf8) .len(4) - .null_count(2) .null_bit_buffer(Buffer::from(&[9_u8])) .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice())) .add_buffer(Buffer::from(b"joemark")) @@ -366,7 +363,6 @@ mod tests { let expected_int_data = ArrayData::builder(DataType::Int32) .len(4) - .null_count(1) .null_bit_buffer(Buffer::from(&[11_u8])) .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice())) .build(); diff --git a/rust/arrow/src/array/array_union.rs b/rust/arrow/src/array/array_union.rs index ea42843589f..7bced12fa33 100644 --- a/rust/arrow/src/array/array_union.rs +++ b/rust/arrow/src/array/array_union.rs @@ -73,7 +73,9 @@ //! # Ok(()) //! # } //! ``` -use crate::array::{make_array, Array, ArrayData, ArrayDataRef, ArrayRef}; +use crate::array::{ + data::count_nulls, make_array, Array, ArrayData, ArrayDataRef, ArrayRef, +}; use crate::buffer::Buffer; use crate::datatypes::*; use crate::error::{ArrowError, Result}; @@ -118,7 +120,7 @@ impl UnionArray { type_ids: Buffer, value_offsets: Option, child_arrays: Vec<(Field, ArrayRef)>, - bitmap_data: Option<(Buffer, usize)>, + bitmap_data: Option, ) -> Self { let (field_types, field_values): (Vec<_>, Vec<_>) = child_arrays.into_iter().unzip(); @@ -127,8 +129,8 @@ impl UnionArray { .add_buffer(type_ids) .child_data(field_values.into_iter().map(|a| a.data()).collect()) .len(len); - if let Some((bitmap, null_count)) = bitmap_data { - builder = builder.null_bit_buffer(bitmap).null_count(null_count); + if let Some(bitmap) = bitmap_data { + builder = builder.null_bit_buffer(bitmap) } let data = match value_offsets { Some(b) => builder.add_buffer(b).build(), @@ -143,16 +145,8 @@ impl UnionArray { child_arrays: Vec<(Field, ArrayRef)>, bitmap: Option, ) -> Result { - let bitmap_data = bitmap.map(|b| { - let null_count = type_ids.len() - b.count_set_bits(); - (b, null_count) - }); - if let Some(b) = &value_offsets { - let nulls = match bitmap_data { - Some((_, n)) => n, - None => 0, - }; + let nulls = count_nulls(bitmap.as_ref(), 0, type_ids.len()); if ((type_ids.len() - nulls) * 4) != b.len() { return Err(ArrowError::InvalidArgumentError( "Type Ids and Offsets represent a different number of array slots." @@ -192,12 +186,7 @@ impl UnionArray { } } - Ok(Self::new( - type_ids, - value_offsets, - child_arrays, - bitmap_data, - )) + Ok(Self::new(type_ids, value_offsets, child_arrays, bitmap)) } /// Accesses the child array for `type_id`. diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index 3dff38be846..6cc765d5181 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -499,9 +499,7 @@ impl BooleanBuilder { .len(len) .add_buffer(self.values_builder.finish()); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(null_bit_buffer); + builder = builder.null_bit_buffer(null_bit_buffer); } let data = builder.build(); BooleanArray::from(data) @@ -648,9 +646,7 @@ impl PrimitiveBuilder { .len(len) .add_buffer(self.values_builder.finish()); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(null_bit_buffer); + builder = builder.null_bit_buffer(null_bit_buffer); } let data = builder.build(); PrimitiveArray::::from(data) @@ -669,9 +665,7 @@ impl PrimitiveBuilder { .len(len) .add_buffer(self.values_builder.finish()); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(null_bit_buffer); + builder = builder.null_bit_buffer(null_bit_buffer); } builder = builder.add_child_data(values.data()); DictionaryArray::::from(builder.build()) @@ -778,7 +772,6 @@ where let offset_buffer = self.offsets_builder.finish(); let null_bit_buffer = self.bitmap_builder.finish(); - let nulls = null_bit_buffer.count_set_bits(); self.offsets_builder.append(0); let data = ArrayData::builder(DataType::List(Box::new(Field::new( "item", @@ -786,7 +779,6 @@ where true, // TODO: find a consistent way of getting this )))) .len(len) - .null_count(len - nulls) .add_buffer(offset_buffer) .add_child_data(values_data) .null_bit_buffer(null_bit_buffer) @@ -896,7 +888,6 @@ where let offset_buffer = self.offsets_builder.finish(); let null_bit_buffer = self.bitmap_builder.finish(); - let nulls = null_bit_buffer.count_set_bits(); self.offsets_builder.append(0); let data = ArrayData::builder(DataType::LargeList(Box::new(Field::new( "item", @@ -904,7 +895,6 @@ where true, )))) .len(len) - .null_count(len - nulls) .add_buffer(offset_buffer) .add_child_data(values_data) .null_bit_buffer(null_bit_buffer) @@ -1027,13 +1017,11 @@ where } let null_bit_buffer = self.bitmap_builder.finish(); - let nulls = null_bit_buffer.count_set_bits(); let data = ArrayData::builder(DataType::FixedSizeList( Box::new(Field::new("item", values_data.data_type().clone(), true)), self.list_len, )) .len(len) - .null_count(len - nulls) .add_child_data(values_data) .null_bit_buffer(null_bit_buffer) .build(); @@ -1763,9 +1751,7 @@ impl StructBuilder { .len(self.len) .child_data(child_data); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(null_bit_buffer); + builder = builder.null_bit_buffer(null_bit_buffer); } self.len = 0; @@ -1793,8 +1779,6 @@ struct FieldData { values_buffer: Option, /// The number of array slots represented by the buffer slots: usize, - /// The number of null array slots in this child array - null_count: usize, /// A builder for the bitmap if required (for Sparse Unions) bitmap_builder: Option, } @@ -1811,7 +1795,6 @@ impl FieldData { data_type, values_buffer: Some(MutableBuffer::new(1)), slots: 0, - null_count: 0, bitmap_builder, } } @@ -1851,7 +1834,6 @@ impl FieldData { let mutable_buffer = builder_to_mutable_buffer(builder); self.values_buffer = Some(mutable_buffer); self.slots += 1; - self.null_count += 1; b.append(false); }; Ok(()) @@ -2024,7 +2006,6 @@ impl UnionBuilder { values_buffer, slots, bitmap_builder, - null_count, }, ) in self.fields.into_iter() { @@ -2033,7 +2014,6 @@ impl UnionBuilder { .freeze(); let arr_data_builder = ArrayDataBuilder::new(data_type.clone()) .add_buffer(buffer) - .null_count(null_count) .len(slots); // .build(); let arr_data_ref = match bitmap_builder { @@ -3136,7 +3116,6 @@ mod tests { let expected_string_data = ArrayData::builder(DataType::Utf8) .len(4) - .null_count(2) .null_bit_buffer(Buffer::from(&[9_u8])) .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice())) .add_buffer(Buffer::from(b"joemark")) @@ -3144,7 +3123,6 @@ mod tests { let expected_int_data = ArrayData::builder(DataType::Int32) .len(4) - .null_count(1) .null_bit_buffer(Buffer::from(&[11_u8])) .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice())) .build(); diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs index d634ed18d5c..caa93518694 100644 --- a/rust/arrow/src/array/data.rs +++ b/rust/arrow/src/array/data.rs @@ -286,12 +286,6 @@ impl ArrayDataBuilder { self } - #[inline] - pub const fn null_count(mut self, n: usize) -> Self { - self.null_count = Some(n); - self - } - pub fn null_bit_buffer(mut self, buf: Buffer) -> Self { self.null_bit_buffer = Some(buf); self @@ -373,9 +367,11 @@ mod tests { let b1 = Buffer::from(&v[..]); let arr_data = ArrayData::builder(DataType::Int32) .len(20) - .null_count(10) .offset(5) .add_buffer(b1) + .null_bit_buffer(Buffer::from(vec![ + 0b01011111, 0b10110101, 0b01100011, 0b00011110, + ])) .add_child_data(child_arr_data.clone()) .build(); @@ -456,4 +452,14 @@ mod tests { let float_data = ArrayData::builder(DataType::Float32).build(); assert_ne!(int_data, float_data); } + + #[test] + fn test_count_nulls() { + let null_buffer = Some(Buffer::from(vec![0b00010110, 0b10011111])); + let count = count_nulls(null_buffer.as_ref(), 0, 16); + assert_eq!(count, 7); + + let count = count_nulls(null_buffer.as_ref(), 4, 8); + assert_eq!(count, 3); + } } diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs index 3574a8056c3..412d951da5b 100644 --- a/rust/arrow/src/array/equal/mod.rs +++ b/rust/arrow/src/array/equal/mod.rs @@ -946,7 +946,6 @@ mod tests { ])) .null_bit_buffer(Buffer::from(vec![0b00001011])) .len(5) - .null_count(2) .add_child_data(strings.data_ref().clone()) .add_child_data(ints.data_ref().clone()) .build(); @@ -958,7 +957,6 @@ mod tests { ])) .null_bit_buffer(Buffer::from(vec![0b00001011])) .len(5) - .null_count(2) .add_child_data(strings.data_ref().clone()) .add_child_data(ints_non_null.data_ref().clone()) .build(); @@ -974,7 +972,6 @@ mod tests { ])) .null_bit_buffer(Buffer::from(vec![0b00001011])) .len(5) - .null_count(2) .add_child_data(strings.data_ref().clone()) .add_child_data(c_ints_non_null.data_ref().clone()) .build(); @@ -990,7 +987,6 @@ mod tests { )])) .null_bit_buffer(Buffer::from(vec![0b00011110])) .len(5) - .null_count(1) .add_child_data(a.data_ref().clone()) .build(); let a = crate::array::make_array(a); @@ -1009,7 +1005,6 @@ mod tests { ])) .null_bit_buffer(Buffer::from(vec![0b00001011])) .len(5) - .null_count(2) .add_child_data(strings.data_ref().clone()) .add_child_data(ints_non_null.data_ref().clone()) .build(); @@ -1021,7 +1016,6 @@ mod tests { )])) .null_bit_buffer(Buffer::from(vec![0b00011110])) .len(5) - .null_count(1) .add_child_data(b) .build(); let b = crate::array::make_array(b); @@ -1054,7 +1048,6 @@ mod tests { )])) .null_bit_buffer(Buffer::from(vec![0b00001010])) .len(5) - .null_count(3) .add_child_data(strings1.data_ref().clone()) .build(); let a = crate::array::make_array(a); @@ -1066,7 +1059,6 @@ mod tests { )])) .null_bit_buffer(Buffer::from(vec![0b00001010])) .len(5) - .null_count(3) .add_child_data(strings2.data_ref().clone()) .build(); let b = crate::array::make_array(b); @@ -1088,7 +1080,6 @@ mod tests { )])) .null_bit_buffer(Buffer::from(vec![0b00001011])) .len(5) - .null_count(2) .add_child_data(strings3.data_ref().clone()) .build(); let c = crate::array::make_array(c); diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index f77ce57f76c..bc5dd315ad2 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -1029,7 +1029,6 @@ mod tests { let list_data = ArrayData::builder(list_data_type) .len(4) .add_buffer(value_offsets) - .null_count(1) .add_child_data(value_data) .null_bit_buffer(Buffer::from([0b00001011])) .build(); diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs index c1d31f928ff..c85578068e3 100644 --- a/rust/arrow/src/compute/kernels/take.rs +++ b/rust/arrow/src/compute/kernels/take.rs @@ -559,7 +559,6 @@ where // create a new list with taken data and computed null information let list_data = ArrayDataBuilder::new(values.data_type().clone()) .len(indices.len()) - .null_count(null_count) .null_bit_buffer(null_buf.freeze()) .offset(0) .add_child_data(taken.data()) @@ -586,7 +585,6 @@ where let taken = take_impl::(values.values().as_ref(), &list_indices, None)?; // determine null count and null buffer, which are a function of `values` and `indices` - let mut null_count = 0; let num_bytes = bit_util::ceil(indices.len(), 8); let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true); let null_slice = null_buf.data_mut(); @@ -597,13 +595,11 @@ where })?; if !indices.is_valid(i) || values.is_null(index) { bit_util::unset_bit(null_slice, i); - null_count += 1; } } let list_data = ArrayDataBuilder::new(values.data_type().clone()) .len(indices.len()) - .null_count(null_count) .null_bit_buffer(null_buf.freeze()) .offset(0) .add_child_data(taken.data()) @@ -701,7 +697,6 @@ mod tests { field_types.push(Field::new("b", DataType::Int32, true)); let struct_array_data = ArrayData::builder(DataType::Struct(field_types)) .len(4) - .null_count(0) .add_child_data(boolean_data) .add_child_data(int_data) .build(); @@ -1009,7 +1004,6 @@ mod tests { // construct list array from the two let expected_list_data = ArrayData::builder(list_data_type) .len(5) - .null_count(1) // null buffer remains the same as only the indices have nulls .null_bit_buffer( index.data().null_bitmap().as_ref().unwrap().bits.clone(), @@ -1050,7 +1044,6 @@ mod tests { let list_data = ArrayData::builder(list_data_type.clone()) .len(4) .add_buffer(value_offsets) - .null_count(0) .null_bit_buffer(Buffer::from([0b10111101, 0b00000000])) .add_child_data(value_data) .build(); @@ -1083,7 +1076,6 @@ mod tests { // construct list array from the two let expected_list_data = ArrayData::builder(list_data_type) .len(5) - .null_count(1) // null buffer remains the same as only the indices have nulls .null_bit_buffer( index.data().null_bitmap().as_ref().unwrap().bits.clone(), @@ -1123,7 +1115,6 @@ mod tests { let list_data = ArrayData::builder(list_data_type.clone()) .len(4) .add_buffer(value_offsets) - .null_count(1) .null_bit_buffer(Buffer::from([0b01111101])) .add_child_data(value_data) .build(); @@ -1159,7 +1150,6 @@ mod tests { bit_util::set_bit(&mut null_bits, 4); let expected_list_data = ArrayData::builder(list_data_type) .len(5) - .null_count(2) // null buffer must be recalculated as both values and indices have nulls .null_bit_buffer(Buffer::from(null_bits)) .add_buffer(expected_offsets) @@ -1319,7 +1309,6 @@ mod tests { field_types.push(Field::new("b", DataType::Int32, true)); let struct_array_data = ArrayData::builder(DataType::Struct(field_types)) .len(5) - .null_count(0) .add_child_data(expected_bool_data) .add_child_data(expected_int_data) .build(); @@ -1350,7 +1339,6 @@ mod tests { let struct_array_data = ArrayData::builder(DataType::Struct(field_types)) .len(5) // TODO: see https://issues.apache.org/jira/browse/ARROW-5408 for why count != 2 - .null_count(0) .add_child_data(expected_bool_data) .add_child_data(expected_int_data) .build(); diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs index 043b5a36533..36684d670fa 100644 --- a/rust/arrow/src/compute/util.rs +++ b/rust/arrow/src/compute/util.rs @@ -335,7 +335,6 @@ pub(super) mod tests { let list_data = ArrayData::builder(list_data_type) .len(list_len) - .null_count(list_null_count) .null_bit_buffer(list_bitmap.freeze()) .add_buffer(value_offsets) .add_child_data(value_data) @@ -401,7 +400,6 @@ pub(super) mod tests { let list_data = ArrayData::builder(list_data_type) .len(list_len) - .null_count(list_null_count) .null_bit_buffer(list_bitmap.freeze()) .add_child_data(child_data) .build(); diff --git a/rust/arrow/src/ipc/reader.rs b/rust/arrow/src/ipc/reader.rs index 809e7177210..5bd465b78eb 100644 --- a/rust/arrow/src/ipc/reader.rs +++ b/rust/arrow/src/ipc/reader.rs @@ -163,7 +163,6 @@ fn create_array( StructArray::from(( struct_arrays, null_buffer, - struct_node.null_count() as usize, )) } else { StructArray::from(struct_arrays) @@ -232,9 +231,7 @@ fn create_primitive_array( .buffers(buffers[1..3].to_vec()) .offset(0); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } builder.build() } @@ -245,9 +242,7 @@ fn create_primitive_array( .buffers(buffers[1..2].to_vec()) .offset(0); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } builder.build() } @@ -267,9 +262,7 @@ fn create_primitive_array( .buffers(buffers[1..].to_vec()) .offset(0); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } let values = Arc::new(Int64Array::from(builder.build())) as ArrayRef; // this cast is infallible, the unwrap is safe @@ -281,9 +274,7 @@ fn create_primitive_array( .buffers(buffers[1..].to_vec()) .offset(0); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } builder.build() } @@ -296,9 +287,7 @@ fn create_primitive_array( .buffers(buffers[1..].to_vec()) .offset(0); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } let values = Arc::new(Float64Array::from(builder.build())) as ArrayRef; // this cast is infallible, the unwrap is safe @@ -310,9 +299,7 @@ fn create_primitive_array( .buffers(buffers[1..].to_vec()) .offset(0); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } builder.build() } @@ -331,9 +318,7 @@ fn create_primitive_array( .buffers(buffers[1..].to_vec()) .offset(0); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } builder.build() } @@ -344,9 +329,7 @@ fn create_primitive_array( .buffers(buffers[1..2].to_vec()) .offset(0); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } builder.build() } @@ -372,9 +355,7 @@ fn create_list_array( .offset(0) .child_data(vec![child_array.data()]); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } make_array(builder.build()) } else if let DataType::LargeList(_) = *data_type { @@ -385,9 +366,7 @@ fn create_list_array( .offset(0) .child_data(vec![child_array.data()]); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } make_array(builder.build()) } else if let DataType::FixedSizeList(_, _) = *data_type { @@ -398,9 +377,7 @@ fn create_list_array( .offset(0) .child_data(vec![child_array.data()]); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } make_array(builder.build()) } else { @@ -424,9 +401,7 @@ fn create_dictionary_array( .offset(0) .child_data(vec![value_array.data()]); if null_count > 0 { - builder = builder - .null_count(null_count) - .null_bit_buffer(buffers[0].clone()) + builder = builder.null_bit_buffer(buffers[0].clone()) } make_array(builder.build()) } else { diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs index d8bbd86691d..d43b02cee98 100644 --- a/rust/arrow/src/json/reader.rs +++ b/rust/arrow/src/json/reader.rs @@ -1947,14 +1947,12 @@ mod tests { // build expected output let d = StringArray::from(vec![Some("text"), None, Some("text"), None]); let c = ArrayDataBuilder::new(c_field.data_type().clone()) - .null_count(2) .len(4) .add_child_data(d.data()) .null_bit_buffer(Buffer::from(vec![0b00000101])) .build(); let b = BooleanArray::from(vec![Some(true), Some(false), Some(true), None]); let a = ArrayDataBuilder::new(a_field.data_type().clone()) - .null_count(1) .len(4) .add_child_data(b.data()) .add_child_data(c) @@ -2010,7 +2008,6 @@ mod tests { None, ]); let c = ArrayDataBuilder::new(c_field.data_type().clone()) - .null_count(2) .len(7) .add_child_data(d.data()) .null_bit_buffer(Buffer::from(vec![0b00111011])) @@ -2031,7 +2028,6 @@ mod tests { .null_bit_buffer(Buffer::from(vec![0b00111111])) .build(); let a_list = ArrayDataBuilder::new(a_field.data_type().clone()) - .null_count(1) .len(5) .add_buffer(Buffer::from(vec![0i32, 2, 3, 6, 6, 6].to_byte_slice())) .add_child_data(a) diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index aae251b3486..3f1b4a85e48 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -932,15 +932,11 @@ impl ArrayReader for ListArrayReader { } let value_offsets = Buffer::from(&offsets.to_byte_slice()); - // null list has def_level = 0 - let null_count = def_levels.iter().filter(|x| x == &&0).count(); - let list_data = ArrayData::builder(self.get_data_type().clone()) .len(offsets.len() - 1) .add_buffer(value_offsets) .add_child_data(batch_values.data()) .null_bit_buffer(null_buf.freeze()) - .null_count(null_count) .offset(next_batch_array.offset()) .build(); @@ -1076,19 +1072,14 @@ impl ArrayReader for StructArrayReader { // calculate bitmap for current array let mut bitmap_builder = BooleanBufferBuilder::new(children_array_len); - let mut null_count = 0; for def_level in def_level_data { let not_null = *def_level >= self.struct_def_level; - if !not_null { - null_count += 1; - } bitmap_builder.append(not_null); } // Now we can build array data let array_data = ArrayDataBuilder::new(self.data_type.clone()) .len(children_array_len) - .null_count(null_count) .null_bit_buffer(bitmap_builder.finish()) .child_data( children_array