From ebdfead93c06c75d611fc21ff2dc728d29eec6c8 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 1 Mar 2023 14:29:26 -0300 Subject: [PATCH 01/11] Add skeleton of the new APIs for handling null checks correctly for all types Bonus: add ArrayData::IsValid() to make it consistent with Array and ArraySpan. --- cpp/src/arrow/array/array_base.cc | 4 + cpp/src/arrow/array/array_base.h | 37 ++++-- cpp/src/arrow/array/data.cc | 73 ++++++++++++ cpp/src/arrow/array/data.h | 189 ++++++++++++++++++++++++++++-- 4 files changed, 285 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc index 1ccde5222f3..92c7d83c35c 100644 --- a/cpp/src/arrow/array/array_base.cc +++ b/cpp/src/arrow/array/array_base.cc @@ -52,6 +52,10 @@ class ExtensionArray; int64_t Array::null_count() const { return data_->GetNullCount(); } +int64_t Array::ComputeLogicalNullCount() const { + return data_->ComputeLogicalNullCount(); +} + namespace internal { struct ScalarFromArraySlotImpl { diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h index 2333a0c06fb..48d65e30de8 100644 --- a/cpp/src/arrow/array/array_base.h +++ b/cpp/src/arrow/array/array_base.h @@ -55,18 +55,28 @@ class ARROW_EXPORT Array { virtual ~Array() = default; /// \brief Return true if value at index is null. Does not boundscheck - bool IsNull(int64_t i) const { - return null_bitmap_data_ != NULLPTR - ? !bit_util::GetBit(null_bitmap_data_, i + data_->offset) - : data_->null_count == data_->length; - } + bool IsNull(int64_t i) const { return !IsValid(i); } /// \brief Return true if value at index is valid (not null). Does not /// boundscheck bool IsValid(int64_t i) const { - return null_bitmap_data_ != NULLPTR - ? bit_util::GetBit(null_bitmap_data_, i + data_->offset) - : data_->null_count != data_->length; + if (null_bitmap_data_ != NULLPTR) { + return bit_util::GetBit(null_bitmap_data_, i + data_->offset); + } + // Dispatching with a few conditionals like this makes IsNull more + // efficient for how it is used in practice. Making IsNull virtual + // would add a vtable lookup to every call and prevent inlining + + // a potential inner-branch removal. + if (type_id() == Type::SPARSE_UNION) { + return !internal::IsNullSparseUnion(*data_, i); + } + if (type_id() == Type::DENSE_UNION) { + return !internal::IsNullDenseUnion(*data_, i); + } + if (type_id() == Type::RUN_END_ENCODED) { + return !internal::IsNullRunEndEncoded(*data_, i); + } + return data_->null_count != data_->length; } /// \brief Return a Scalar containing the value of this array at i @@ -85,6 +95,17 @@ class ARROW_EXPORT Array { /// function int64_t null_count() const; + /// \brief Computes the logical null count for arrays of all types including + /// those that do not have a validity bitmap like union and run-end encoded + /// arrays + /// + /// If the array has a validity bitmap, this function behaves the same as + /// null_count(). For types that have no validity bitmap, this function will + /// recompute the null count every time it is called. + /// + /// \see GetNullCount + int64_t ComputeLogicalNullCount() const; + std::shared_ptr type() const { return data_->type; } Type::type type_id() const { return data_->type->id(); } diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index e23facce639..20f180ce9f1 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -60,6 +60,35 @@ static inline void AdjustNonNullable(Type::type type_id, int64_t length, } } +namespace internal { + +bool IsNullSparseUnion(const ArrayData& data, int64_t i) { + // TODO(felipecrv): Implement IsNullSparseUnion + return data.null_count == data.length; // mimics the old behavior +} + +bool IsNullDenseUnion(const ArrayData& data, int64_t i) { + // TODO(felipecrv): Implement IsNullDenseUnion + return data.null_count == data.length; // mimics the old behavior +} + +bool IsNullRunEndEncoded(const ArrayData& data, int64_t i) { + // TODO(felipecrv): Implement IsNullDenseUnion + return data.null_count == data.length; // mimics the old behavior +} + +bool UnionMayHaveLogicalNulls(const ArrayData& data) { + // TODO(felipecrv): Implement UnionMayHaveLogicalNulls + return false; // mimics the old behavior +} + +bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data) { + // TODO(felipecrv): Implement RunEndEncodedMayHaveLogicalNulls + return false; // mimics the old behavior +} + +} // namespace internal + std::shared_ptr ArrayData::Make(std::shared_ptr type, int64_t length, std::vector> buffers, int64_t null_count, int64_t offset) { @@ -132,6 +161,13 @@ int64_t ArrayData::GetNullCount() const { return precomputed; } +int64_t ArrayData::ComputeLogicalNullCount() const { + if (this->buffers[0]) { + return GetNullCount(); + } + return ArraySpan(*this).ComputeLogicalNullCount(); +} + // ---------------------------------------------------------------------- // Methods for ArraySpan @@ -407,6 +443,18 @@ int64_t ArraySpan::GetNullCount() const { return precomputed; } +int64_t ArraySpan::ComputeLogicalNullCount() const { + const auto t = this->type->id(); + // TODO(felipecrv): implement logical null count for unions and REEs + if (t == Type::SPARSE_UNION || t == Type::DENSE_UNION) { + ; // mimic old behavior + } + if (t == Type::RUN_END_ENCODED) { + ; // mimic old behavior + } + return GetNullCount(); +} + int ArraySpan::num_buffers() const { return GetNumBuffers(*this->type); } std::shared_ptr ArraySpan::ToArrayData() const { @@ -445,6 +493,31 @@ std::shared_ptr ArraySpan::ToArray() const { return MakeArray(this->ToArrayData()); } +bool ArraySpan::IsNullSparseUnion(int64_t i) const { + // TODO(felipecrv): Implement IsNullSparseUnion + return null_count == length; // mimics the old behavior +} + +bool ArraySpan::IsNullDenseUnion(int64_t i) const { + // TODO(felipecrv): Implement IsNullDenseUnion + return null_count == length; // mimics the old behavior +} + +bool ArraySpan::IsNullRunEndEncoded(int64_t i) const { + // TODO(felipecrv): Implement IsNullRunEndEncoded + return null_count == length; // mimics the old behavior +} + +bool ArraySpan::UnionMayHaveLogicalNulls() const { + // TODO(felipecrv): Implement UnionMayHaveLogicalNulls + return false; // mimics the old behavior +} + +bool ArraySpan::RunEndEncodedMayHaveLogicalNulls() const { + // TODO(felipecrv): Implement RunEndEncodedMayHaveLogicalNulls + return false; // mimics the old behavior +} + // ---------------------------------------------------------------------- // Implement internal::GetArrayView diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index e024483f665..7ed75131306 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -33,6 +33,19 @@ namespace arrow { class Array; +struct ArrayData; + +namespace internal { +// ---------------------------------------------------------------------- +// Null handling for types without a validity bitmap + +bool IsNullSparseUnion(const ArrayData& data, int64_t i); +bool IsNullDenseUnion(const ArrayData& data, int64_t i); +bool IsNullRunEndEncoded(const ArrayData& data, int64_t i); + +bool UnionMayHaveLogicalNulls(const ArrayData& data); +bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data); +} // namespace internal // When slicing, we do not know the null count of the sliced range without // doing some computation. To avoid doing this eagerly, we set the null count @@ -167,9 +180,23 @@ struct ARROW_EXPORT ArrayData { std::shared_ptr Copy() const { return std::make_shared(*this); } - bool IsNull(int64_t i) const { - return ((buffers[0] != NULLPTR) ? !bit_util::GetBit(buffers[0]->data(), i + offset) - : null_count.load() == length); + bool IsNull(int64_t i) const { return !IsValid(i); } + + bool IsValid(int64_t i) const { + if (buffers[0] != NULLPTR) { + return bit_util::GetBit(buffers[0]->data(), i + offset); + } + const auto type = this->type->id(); + if (type == Type::SPARSE_UNION) { + return !internal::IsNullSparseUnion(*this, i); + } + if (type == Type::DENSE_UNION) { + return !internal::IsNullDenseUnion(*this, i); + } + if (type == Type::RUN_END_ENCODED) { + return !internal::IsNullRunEndEncoded(*this, i); + } + return null_count.load() != length; } // Access a buffer's data as a typed C pointer @@ -229,15 +256,88 @@ struct ARROW_EXPORT ArrayData { void SetNullCount(int64_t v) { null_count.store(v); } - /// \brief Return null count, or compute and set it if it's not known + /// \brief Return physical null count, or compute and set it if it's not known int64_t GetNullCount() const; + /// \brief Return true if the data has a validity bitmap and the physical null + /// count is known to be non-zero or not yet known. + /// + /// Note that this is not the same as MayHaveLogicalNulls, which also checks + /// for the presence of nulls in child data for types like unions and run-end + /// encoded types. + /// + /// \see HasValidityBitmap + /// \see MayHaveLogicalNulls bool MayHaveNulls() const { // If an ArrayData is slightly malformed it may have kUnknownNullCount set // but no buffer return null_count.load() != 0 && buffers[0] != NULLPTR; } + /// \brief Return true if the data has a validity bitmap + bool HasValidityBitmap() const { return buffers[0] != NULLPTR; } + + /// \brief Return true if the validity bitmap may have 0's in it, or if the + /// child arrays (in the case of types without a validity bitmap) may have + /// nulls + /// + /// This is not a drop-in replacement for MayHaveNulls, as historically + /// MayHaveNulls() has been used to check for the presence of a validity + /// bitmap that needs to be checked. + /// + /// Code that previously used MayHaveNulls() and then dealt with the validity + /// bitmap directly can be fixed to handle all types correctly without + /// performance degradation when handling most types by adopting + /// HasValidityBitmap and MayHaveLogicalNulls. + /// + /// BEFORE: + /// + /// uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR; + /// for (int64_t i = 0; i < array.length; ++i) { + /// if (validity && !bit_util::GetBit(validity, i)) { + /// continue; // skip a NULL + /// } + /// ... + /// } + /// + /// AFTER: + /// + /// bool all_valid = !array.MayHaveLogicalNulls(); + /// uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR; + /// for (int64_t i = 0; i < array.length; ++i) { + /// bool is_valid = all_valid || + /// (validity && bit_util::GetBit(validity, i)) || + /// array.IsValid(i); + /// if (!is_valid) { + /// continue; // skip a NULL + /// } + /// ... + /// } + bool MayHaveLogicalNulls() const { + if (buffers[0] != NULLPTR) { + return null_count.load() != 0; + } + const auto t = type->id(); + if (t == Type::SPARSE_UNION || t == Type::DENSE_UNION) { + return internal::UnionMayHaveLogicalNulls(*this); + } + if (t == Type::RUN_END_ENCODED) { + return internal::RunEndEncodedMayHaveLogicalNulls(*this); + } + return false; + } + + /// \brief Computes the logical null count for arrays of all types including + /// those that do not have a validity bitmap like union and run-end encoded + /// arrays + /// + /// If the array has a validity bitmap, this function behaves the same as + /// GetNullCount. For types that have no validity bitmap, this function will + /// recompute the null count every time it is called. + /// + /// \see GetNullCount + int64_t ComputeLogicalNullCount() const; + std::shared_ptr type; int64_t length = 0; mutable std::atomic null_count{0}; @@ -329,14 +429,26 @@ struct ARROW_EXPORT ArraySpan { return GetValues(i, this->offset); } + inline bool IsNull(int64_t i) const { return !IsValid(i); } + inline bool IsValid(int64_t i) const { - return ((this->buffers[0].data != NULLPTR) - ? bit_util::GetBit(this->buffers[0].data, i + this->offset) - : this->null_count != this->length); + if (this->buffers[0].data != NULLPTR) { + return bit_util::GetBit(this->buffers[0].data, i + this->offset); + } else { + const auto type = this->type->id(); + if (type == Type::SPARSE_UNION) { + return !IsNullSparseUnion(i); + } + if (type == Type::DENSE_UNION) { + return !IsNullDenseUnion(i); + } + if (type == Type::RUN_END_ENCODED) { + return !IsNullRunEndEncoded(i); + } + return this->null_count != this->length; + } } - inline bool IsNull(int64_t i) const { return !IsValid(i); } - std::shared_ptr ToArrayData() const; std::shared_ptr ToArray() const; @@ -363,14 +475,71 @@ struct ARROW_EXPORT ArraySpan { } } - /// \brief Return null count, or compute and set it if it's not known + /// \brief Return physical null count, or compute and set it if it's not known int64_t GetNullCount() const; + /// \brief Return true if the array has a validity bitmap and the physical null + /// count is known to be non-zero or not yet known + /// + /// Note that this is not the same as MayHaveLogicalNulls, which also checks + /// for the presence of nulls in child data for types like unions and run-end + /// encoded types. + /// + /// \see HasValidityBitmap + /// \see MayHaveLogicalNulls bool MayHaveNulls() const { // If an ArrayData is slightly malformed it may have kUnknownNullCount set // but no buffer return null_count != 0 && buffers[0].data != NULLPTR; } + + /// \brief Return true if the array has a validity bitmap + bool HasValidityBitmap() const { return buffers[0].data != NULLPTR; } + + /// \brief Return true if the validity bitmap may have 0's in it, or if the + /// child arrays (in the case of types without a validity bitmap) may have + /// nulls + /// + /// \see ArrayData::MayHaveLogicalNulls + bool MayHaveLogicalNulls() const { + if (buffers[0].data != NULLPTR) { + return null_count != 0; + } + const auto t = type->id(); + if (t == Type::SPARSE_UNION || t == Type::DENSE_UNION) { + return UnionMayHaveLogicalNulls(); + } + if (t == Type::RUN_END_ENCODED) { + return RunEndEncodedMayHaveLogicalNulls(); + } + return false; + } + + /// \brief Compute the logical null count for arrays of all types including + /// those that do not have a validity bitmap like union and run-end encoded + /// arrays + /// + /// If the array has a validity bitmap, this function behaves the same as + /// GetNullCount. For types that have no validity bitmap, this function will + /// recompute the logical null count every time it is called. + /// + /// \see GetNullCount + int64_t ComputeLogicalNullCount() const; + + private: + bool IsNullSparseUnion(int64_t i) const; + bool IsNullDenseUnion(int64_t i) const; + + /// \brief Return true if the value at logical index i is null + /// + /// This function uses binary-search, so it has a O(log N) cost. + /// Iterating over the whole array and calling IsNull is O(N log N), so + /// for better performance it is recommended to use a + /// ree_util::RunEndEncodedArraySpan to iterate run by run instead. + bool IsNullRunEndEncoded(int64_t i) const; + + bool UnionMayHaveLogicalNulls() const; + bool RunEndEncodedMayHaveLogicalNulls() const; }; namespace internal { From 0be5470d5fdbb2f762752bb5539b51f7aac79eb8 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 1 Mar 2023 23:29:05 -0300 Subject: [PATCH 02/11] Add ree_util::LogicalNullCount(span) --- cpp/src/arrow/util/ree_util.cc | 36 ++++++++++++++++++++++++++++++++++ cpp/src/arrow/util/ree_util.h | 3 +++ 2 files changed, 39 insertions(+) diff --git a/cpp/src/arrow/util/ree_util.cc b/cpp/src/arrow/util/ree_util.cc index 9324a1adbca..11da64313a8 100644 --- a/cpp/src/arrow/util/ree_util.cc +++ b/cpp/src/arrow/util/ree_util.cc @@ -19,12 +19,48 @@ #include #include "arrow/builder.h" +#include "arrow/util/bit_util.h" #include "arrow/util/logging.h" #include "arrow/util/ree_util.h" namespace arrow { namespace ree_util { +namespace { + +template +int64_t LogicalNullCount(const ArraySpan& span) { + const auto& values = ValuesArray(span); + const auto& values_bitmap = values.buffers[0].data; + int64_t null_count = 0; + + RunEndEncodedArraySpan ree_span(span); + auto end = ree_span.end(); + for (auto it = ree_span.begin(); it != end; ++it) { + const bool is_null = + values_bitmap && + !bit_util::GetBit(values_bitmap, values.offset + it.index_into_array()); + if (is_null) { + null_count += it.run_length(); + } + } + return null_count; +} + +} // namespace + +int64_t LogicalNullCount(const ArraySpan& span) { + const auto type_id = RunEndsArray(span).type->id(); + if (type_id == Type::INT16) { + return LogicalNullCount(span); + } + if (type_id == Type::INT32) { + return LogicalNullCount(span); + } + DCHECK_EQ(type_id, Type::INT64); + return LogicalNullCount(span); +} + int64_t FindPhysicalIndex(const ArraySpan& span, int64_t i, int64_t absolute_offset) { const auto type_id = RunEndsArray(span).type->id(); if (type_id == Type::INT16) { diff --git a/cpp/src/arrow/util/ree_util.h b/cpp/src/arrow/util/ree_util.h index 1eb4d5a4db2..83d828bbe53 100644 --- a/cpp/src/arrow/util/ree_util.h +++ b/cpp/src/arrow/util/ree_util.h @@ -53,6 +53,9 @@ Status ValidateRunEndEncodedChildren(const RunEndEncodedType& type, const std::shared_ptr& values_data, int64_t null_count, int64_t logical_offset); +/// \brief Compute the logical null count of an REE array +int64_t LogicalNullCount(const ArraySpan& span); + namespace internal { /// \brief Uses binary-search to find the physical offset given a logical offset From 111ad6c5d7cec4a23662dfb7dfabfc7aa92146a6 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 2 Mar 2023 17:50:57 -0300 Subject: [PATCH 03/11] Add union_util.cc/h with functions for calculating logical null count --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/util/union_util.cc | 58 ++++++++++++++++++++++++++++++++ cpp/src/arrow/util/union_util.h | 31 +++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 cpp/src/arrow/util/union_util.cc create mode 100644 cpp/src/arrow/util/union_util.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 143fb13ddcc..22b2bf6e443 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -230,6 +230,7 @@ set(ARROW_SRCS util/time.cc util/tracing.cc util/trie.cc + util/union_util.cc util/unreachable.cc util/uri.cc util/utf8.cc diff --git a/cpp/src/arrow/util/union_util.cc b/cpp/src/arrow/util/union_util.cc new file mode 100644 index 00000000000..ca3ecf28644 --- /dev/null +++ b/cpp/src/arrow/util/union_util.cc @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/util/union_util.h" + +#include + +#include "arrow/array/data.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/logging.h" + +namespace arrow::union_util { + +int64_t LogicalSparseUnionNullCount(const ArraySpan& span) { + const auto* sparse_union_type = + internal::checked_cast(span.type); + DCHECK_LE(span.child_data.size(), 128); + + const int8_t* types = span.GetValues(1); // NOLINT + int64_t null_count = 0; + for (int64_t i = 0; i < span.length; i++) { + const int8_t child_id = sparse_union_type->child_ids()[types[span.offset + i]]; + + null_count += span.child_data[child_id].IsNull(i); + } + return null_count; +} + +int64_t LogicalDenseUnionNullCount(const ArraySpan& span) { + const auto* dense_union_type = internal::checked_cast(span.type); + DCHECK_LE(span.child_data.size(), 128); + + const int8_t* types = span.GetValues(1); // NOLINT + const int32_t* offsets = span.GetValues(2); // NOLINT + int64_t null_count = 0; + for (int64_t i = 0; i < span.length; i++) { + const int8_t child_id = dense_union_type->child_ids()[types[span.offset + i]]; + const int32_t offset = offsets[span.offset + i]; + null_count += span.child_data[child_id].IsNull(offset); + } + return null_count; +} + +} // namespace arrow::union_util diff --git a/cpp/src/arrow/util/union_util.h b/cpp/src/arrow/util/union_util.h new file mode 100644 index 00000000000..0f30d5a3278 --- /dev/null +++ b/cpp/src/arrow/util/union_util.h @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow/array/data.h" + +namespace arrow { +namespace union_util { + +/// \brief Compute the number of of logical nulls in a sparse union array +int64_t LogicalSparseUnionNullCount(const ArraySpan& span); + +/// \brief Compute the number of of logical nulls in a dense union array +int64_t LogicalDenseUnionNullCount(const ArraySpan& span); + +} // namespace union_util +} // namespace arrow From b07f9a2aa3c22f857c960599227f76466509e5d3 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 1 Mar 2023 14:10:28 -0300 Subject: [PATCH 04/11] Adopt HasValidityBitmap/MayHaveLogicalNulls --- cpp/src/arrow/array/builder_nested.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 3e9328bfdf0..74f7c04d6ad 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -131,9 +131,13 @@ class BaseListBuilder : public ArrayBuilder { Status AppendArraySlice(const ArraySpan& array, int64_t offset, int64_t length) override { const offset_type* offsets = array.GetValues(1); - const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR; + const bool all_valid = !array.MayHaveLogicalNulls(); + const uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR; for (int64_t row = offset; row < offset + length; row++) { - if (!validity || bit_util::GetBit(validity, array.offset + row)) { + const bool is_valid = + all_valid || (validity && bit_util::GetBit(validity, array.offset + row)) || + array.IsValid(row); + if (is_valid) { ARROW_RETURN_NOT_OK(Append()); int64_t slot_length = offsets[row + 1] - offsets[row]; ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(array.child_data[0], @@ -301,9 +305,13 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder { Status AppendArraySlice(const ArraySpan& array, int64_t offset, int64_t length) override { const int32_t* offsets = array.GetValues(1); - const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR; + const bool all_valid = !array.MayHaveLogicalNulls(); + const uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR; for (int64_t row = offset; row < offset + length; row++) { - if (!validity || bit_util::GetBit(validity, array.offset + row)) { + const bool is_valid = + all_valid || (validity && bit_util::GetBit(validity, array.offset + row)) || + array.IsValid(row); + if (is_valid) { ARROW_RETURN_NOT_OK(Append()); const int64_t slot_length = offsets[row + 1] - offsets[row]; // Add together the inner StructArray offset to the Map/List offset From a9b31943555d5ea3cd8a8ca01b401c06d03e76cc Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 1 Mar 2023 14:33:10 -0300 Subject: [PATCH 05/11] Add implementation that changes how nulls are checked on unions and REEs --- cpp/src/arrow/array/data.cc | 68 ++++++++++++++++++++++++------------- cpp/src/arrow/array/data.h | 2 ++ 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 20f180ce9f1..18c9eb720a0 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -34,7 +34,9 @@ #include "arrow/util/bitmap_ops.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/ree_util.h" #include "arrow/util/slice_util_internal.h" +#include "arrow/util/union_util.h" namespace arrow { @@ -63,28 +65,31 @@ static inline void AdjustNonNullable(Type::type type_id, int64_t length, namespace internal { bool IsNullSparseUnion(const ArrayData& data, int64_t i) { - // TODO(felipecrv): Implement IsNullSparseUnion - return data.null_count == data.length; // mimics the old behavior + auto* union_type = checked_cast(data.type.get()); + const auto* types = reinterpret_cast(data.buffers[1]->data()); + const int child_id = union_type->child_ids()[types[data.offset + i]]; + return data.child_data[child_id]->IsNull(i); } bool IsNullDenseUnion(const ArrayData& data, int64_t i) { - // TODO(felipecrv): Implement IsNullDenseUnion - return data.null_count == data.length; // mimics the old behavior + auto* union_type = checked_cast(data.type.get()); + const auto* types = reinterpret_cast(data.buffers[1]->data()); + const int child_id = union_type->child_ids()[types[data.offset + i]]; + const auto* offsets = reinterpret_cast(data.buffers[2]->data()); + const int64_t child_offset = offsets[data.offset + i]; + return data.child_data[child_id]->IsNull(child_offset); } bool IsNullRunEndEncoded(const ArrayData& data, int64_t i) { - // TODO(felipecrv): Implement IsNullDenseUnion - return data.null_count == data.length; // mimics the old behavior + return ArraySpan(data).IsNullRunEndEncoded(i); } bool UnionMayHaveLogicalNulls(const ArrayData& data) { - // TODO(felipecrv): Implement UnionMayHaveLogicalNulls - return false; // mimics the old behavior + return ArraySpan(data).MayHaveLogicalNulls(); } bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data) { - // TODO(felipecrv): Implement RunEndEncodedMayHaveLogicalNulls - return false; // mimics the old behavior + return ArraySpan(data).MayHaveLogicalNulls(); } } // namespace internal @@ -445,12 +450,14 @@ int64_t ArraySpan::GetNullCount() const { int64_t ArraySpan::ComputeLogicalNullCount() const { const auto t = this->type->id(); - // TODO(felipecrv): implement logical null count for unions and REEs - if (t == Type::SPARSE_UNION || t == Type::DENSE_UNION) { - ; // mimic old behavior + if (t == Type::SPARSE_UNION) { + return union_util::LogicalSparseUnionNullCount(*this); + } + if (t == Type::DENSE_UNION) { + return union_util::LogicalDenseUnionNullCount(*this); } if (t == Type::RUN_END_ENCODED) { - ; // mimic old behavior + return ree_util::LogicalNullCount(*this); } return GetNullCount(); } @@ -494,28 +501,41 @@ std::shared_ptr ArraySpan::ToArray() const { } bool ArraySpan::IsNullSparseUnion(int64_t i) const { - // TODO(felipecrv): Implement IsNullSparseUnion - return null_count == length; // mimics the old behavior + auto* union_type = checked_cast(this->type); + const auto* types = reinterpret_cast(this->buffers[1].data); + const int child_id = union_type->child_ids()[types[this->offset + i]]; + return this->child_data[child_id].IsNull(i); } bool ArraySpan::IsNullDenseUnion(int64_t i) const { - // TODO(felipecrv): Implement IsNullDenseUnion - return null_count == length; // mimics the old behavior + auto* union_type = checked_cast(this->type); + const auto* types = reinterpret_cast(this->buffers[1].data); + const auto* offsets = reinterpret_cast(this->buffers[2].data); + const int64_t child_id = union_type->child_ids()[types[this->offset + i]]; + const int64_t child_offset = offsets[this->offset + i]; + return this->child_data[child_id].IsNull(child_offset); } bool ArraySpan::IsNullRunEndEncoded(int64_t i) const { - // TODO(felipecrv): Implement IsNullRunEndEncoded - return null_count == length; // mimics the old behavior + const auto& values = ree_util::ValuesArray(*this); + if (values.MayHaveLogicalNulls()) { + const int64_t physical_offset = ree_util::FindPhysicalIndex(*this, i, this->offset); + return ree_util::ValuesArray(*this).IsNull(physical_offset); + } + return false; } bool ArraySpan::UnionMayHaveLogicalNulls() const { - // TODO(felipecrv): Implement UnionMayHaveLogicalNulls - return false; // mimics the old behavior + for (auto& child : this->child_data) { + if (child.MayHaveLogicalNulls()) { + return true; + } + } + return false; } bool ArraySpan::RunEndEncodedMayHaveLogicalNulls() const { - // TODO(felipecrv): Implement RunEndEncodedMayHaveLogicalNulls - return false; // mimics the old behavior + return ree_util::ValuesArray(*this).MayHaveLogicalNulls(); } // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index 7ed75131306..8d384bca413 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -527,6 +527,8 @@ struct ARROW_EXPORT ArraySpan { int64_t ComputeLogicalNullCount() const; private: + friend bool internal::IsNullRunEndEncoded(const ArrayData& span, int64_t i); + bool IsNullSparseUnion(int64_t i) const; bool IsNullDenseUnion(int64_t i) const; From ce5651415305e819c75279051e3707957fd1a5e5 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 1 Mar 2023 21:26:55 -0300 Subject: [PATCH 06/11] Fix ArrayTest to reflect changes in IsNull/IsValid and add more checks - Add REE to TestNullCount tests - Test new null handling functions in TestMakeArrayOfNull --- cpp/src/arrow/array/array_base.cc | 4 ++ cpp/src/arrow/array/array_test.cc | 88 ++++++++++++++++++++++++------- 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc index 92c7d83c35c..f7b8d7954e1 100644 --- a/cpp/src/arrow/array/array_base.cc +++ b/cpp/src/arrow/array/array_base.cc @@ -179,6 +179,10 @@ struct ScalarFromArraySlotImpl { array_.length()); } + // Skip checking for nulls in RUN_END_ENCODED arrays to avoid potentially + // making two O(log n) searches for the physical index of the slot -- one + // here and another in Visit(const RunEndEncodedArray&) in case the values + // is not null. if (array_.type()->id() != Type::RUN_END_ENCODED && array_.IsNull(index_)) { auto null = MakeNullScalar(array_.type()); if (is_dictionary(array_.type()->id())) { diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index d18b16ba78d..d030460ff05 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -37,6 +37,7 @@ #include "arrow/array/builder_binary.h" #include "arrow/array/builder_decimal.h" #include "arrow/array/builder_dict.h" +#include "arrow/array/builder_run_end.h" #include "arrow/array/builder_time.h" #include "arrow/array/data.h" #include "arrow/array/util.h" @@ -78,20 +79,58 @@ class TestArray : public ::testing::Test { MemoryPool* pool_; }; +template +std::shared_ptr MakeScalar(typename ScalarType::ValueType value) { + return std::make_shared(value); +} + TEST_F(TestArray, TestNullCount) { // These are placeholders auto data = std::make_shared(nullptr, 0); auto null_bitmap = std::make_shared(nullptr, 0); - std::unique_ptr arr(new Int32Array(100, data, null_bitmap, 10)); + std::shared_ptr arr(new Int32Array(100, data, null_bitmap, 10)); + ASSERT_EQ(10, arr->ComputeLogicalNullCount()); ASSERT_EQ(10, arr->null_count()); + ASSERT_TRUE(arr->data()->MayHaveNulls()); + ASSERT_TRUE(arr->data()->MayHaveLogicalNulls()); - std::unique_ptr arr_no_nulls(new Int32Array(100, data)); + std::shared_ptr arr_no_nulls(new Int32Array(100, data)); + ASSERT_EQ(0, arr_no_nulls->ComputeLogicalNullCount()); ASSERT_EQ(0, arr_no_nulls->null_count()); + ASSERT_FALSE(arr_no_nulls->data()->MayHaveNulls()); + ASSERT_FALSE(arr_no_nulls->data()->MayHaveLogicalNulls()); - std::unique_ptr arr_default_null_count( + std::shared_ptr arr_default_null_count( new Int32Array(100, data, null_bitmap)); ASSERT_EQ(kUnknownNullCount, arr_default_null_count->data()->null_count); + ASSERT_TRUE(arr_default_null_count->data()->MayHaveNulls()); + ASSERT_TRUE(arr_default_null_count->data()->MayHaveLogicalNulls()); + + RunEndEncodedBuilder ree_builder(pool_, std::make_shared(pool_), + std::make_shared(pool_), + run_end_encoded(int32(), int32())); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(2), 2)); + ASSERT_OK(ree_builder.AppendNull()); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(4), 3)); + ASSERT_OK(ree_builder.AppendNulls(2)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(8), 5)); + ASSERT_OK(ree_builder.AppendNulls(7)); + ASSERT_OK_AND_ASSIGN(auto ree, ree_builder.Finish()); + + ASSERT_EQ(0, ree->null_count()); + ASSERT_EQ(10, ree->ComputeLogicalNullCount()); + ASSERT_FALSE(ree->data()->MayHaveNulls()); + ASSERT_TRUE(ree->data()->MayHaveLogicalNulls()); + + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(2), 2)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(4), 3)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(8), 5)); + ASSERT_OK_AND_ASSIGN(auto ree_no_nulls, ree_builder.Finish()); + ASSERT_EQ(0, ree_no_nulls->null_count()); + ASSERT_EQ(0, ree_no_nulls->ComputeLogicalNullCount()); + ASSERT_FALSE(ree_no_nulls->data()->MayHaveNulls()); + ASSERT_FALSE(ree_no_nulls->data()->MayHaveLogicalNulls()); } TEST_F(TestArray, TestSlicePreservesAllNullCount) { @@ -377,20 +416,23 @@ TEST_F(TestArray, TestMakeArrayOfNull) { ASSERT_EQ(array->length(), length); if (is_union(type->id())) { ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->ComputeLogicalNullCount(), length); const auto& union_array = checked_cast(*array); for (int i = 0; i < union_array.num_fields(); ++i) { ASSERT_EQ(union_array.field(i)->null_count(), union_array.field(i)->length()); } } else if (type->id() == Type::RUN_END_ENCODED) { ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->ComputeLogicalNullCount(), length); const auto& ree_array = checked_cast(*array); ASSERT_EQ(ree_array.values()->null_count(), ree_array.values()->length()); } else { ASSERT_EQ(array->null_count(), length); - for (int64_t i = 0; i < length; ++i) { - ASSERT_TRUE(array->IsNull(i)); - ASSERT_FALSE(array->IsValid(i)); - } + ASSERT_EQ(array->ComputeLogicalNullCount(), length); + } + for (int64_t i = 0; i < length; ++i) { + ASSERT_TRUE(array->IsNull(i)); + ASSERT_FALSE(array->IsValid(i)); } } } @@ -482,12 +524,14 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr& scalar) std::unique_ptr builder; auto null_scalar = MakeNullScalar(scalar->type); ASSERT_OK(MakeBuilderExactIndex(pool, scalar->type, &builder)); - ASSERT_OK(builder->AppendScalar(*scalar)); - ASSERT_OK(builder->AppendScalar(*scalar)); - ASSERT_OK(builder->AppendScalar(*null_scalar)); - ASSERT_OK(builder->AppendScalars({scalar, null_scalar})); - ASSERT_OK(builder->AppendScalar(*scalar, /*n_repeats=*/2)); - ASSERT_OK(builder->AppendScalar(*null_scalar, /*n_repeats=*/2)); + ASSERT_OK(builder->AppendScalar(*scalar)); // [0] = scalar + ASSERT_OK(builder->AppendScalar(*scalar)); // [1] = scalar + ASSERT_OK(builder->AppendScalar(*null_scalar)); // [2] = NULL + ASSERT_OK(builder->AppendScalars({scalar, null_scalar})); // [3, 4] = {scalar, NULL} + ASSERT_OK( + builder->AppendScalar(*scalar, /*n_repeats=*/2)); // [5, 6] = {scalar, scalar} + ASSERT_OK( + builder->AppendScalar(*null_scalar, /*n_repeats=*/2)); // [7, 8] = {NULL, NULL} std::shared_ptr out; FinishAndCheckPadding(builder.get(), &out); @@ -495,22 +539,30 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr& scalar) AssertTypeEqual(scalar->type, out->type()); ASSERT_EQ(out->length(), 9); - const bool can_check_nulls = internal::HasValidityBitmap(out->type()->id()); + auto out_type_id = out->type()->id(); + const bool has_validity = internal::HasValidityBitmap(out_type_id); // For a dictionary builder, the output dictionary won't necessarily be the same - const bool can_check_values = !is_dictionary(out->type()->id()); + const bool can_check_values = !is_dictionary(out_type_id); - if (can_check_nulls) { + if (has_validity) { ASSERT_EQ(out->null_count(), 4); + } else { + ASSERT_EQ(out->null_count(), 0); + } + if (scalar->is_valid) { + ASSERT_EQ(out->ComputeLogicalNullCount(), 4); + } else { + ASSERT_EQ(out->ComputeLogicalNullCount(), 9); } for (const auto index : {0, 1, 3, 5, 6}) { - ASSERT_FALSE(out->IsNull(index)); + ASSERT_FALSE(out->IsNull(index) && scalar->is_valid); ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index)); ASSERT_OK(scalar_i->ValidateFull()); if (can_check_values) AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true); } for (const auto index : {2, 4, 7, 8}) { - ASSERT_EQ(out->IsNull(index), can_check_nulls); + ASSERT_TRUE(out->IsNull(index)); ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index)); ASSERT_OK(scalar_i->ValidateFull()); AssertScalarsEqual(*null_scalar, *scalar_i, /*verbose=*/true); From 8f07725aacc2d5af5d99d255af718e219ef803f2 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Mon, 6 Mar 2023 11:39:51 -0300 Subject: [PATCH 07/11] s/ASSERT_FALSE/ASSERT_NE --- cpp/src/arrow/array/array_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index d030460ff05..1d150e69fa0 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -556,7 +556,7 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr& scalar) } for (const auto index : {0, 1, 3, 5, 6}) { - ASSERT_FALSE(out->IsNull(index) && scalar->is_valid); + ASSERT_NE(out->IsNull(index), scalar->is_valid); ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index)); ASSERT_OK(scalar_i->ValidateFull()); if (can_check_values) AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true); From 45122977a9394a22452a73e34b39bb2150effde9 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Mon, 6 Mar 2023 11:42:13 -0300 Subject: [PATCH 08/11] Better fallback logic in MayHaveLogicalNulls() --- cpp/src/arrow/array/data.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index 8d384bca413..ee93485be8f 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -324,7 +324,7 @@ struct ARROW_EXPORT ArrayData { if (t == Type::RUN_END_ENCODED) { return internal::RunEndEncodedMayHaveLogicalNulls(*this); } - return false; + return null_count.load() != 0; } /// \brief Computes the logical null count for arrays of all types including @@ -512,7 +512,7 @@ struct ARROW_EXPORT ArraySpan { if (t == Type::RUN_END_ENCODED) { return RunEndEncodedMayHaveLogicalNulls(); } - return false; + return null_count != 0; } /// \brief Compute the logical null count for arrays of all types including From e5c77a11e5b1168ca1505a04524511c01bee4cb2 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Mon, 6 Mar 2023 12:05:13 -0300 Subject: [PATCH 09/11] Export internal functions called from inline functions --- cpp/src/arrow/array/data.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index ee93485be8f..fe43fbcf19d 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -39,12 +39,12 @@ namespace internal { // ---------------------------------------------------------------------- // Null handling for types without a validity bitmap -bool IsNullSparseUnion(const ArrayData& data, int64_t i); -bool IsNullDenseUnion(const ArrayData& data, int64_t i); -bool IsNullRunEndEncoded(const ArrayData& data, int64_t i); +ARROW_EXPORT bool IsNullSparseUnion(const ArrayData& data, int64_t i); +ARROW_EXPORT bool IsNullDenseUnion(const ArrayData& data, int64_t i); +ARROW_EXPORT bool IsNullRunEndEncoded(const ArrayData& data, int64_t i); -bool UnionMayHaveLogicalNulls(const ArrayData& data); -bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data); +ARROW_EXPORT bool UnionMayHaveLogicalNulls(const ArrayData& data); +ARROW_EXPORT bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data); } // namespace internal // When slicing, we do not know the null count of the sliced range without From 6ea0ab7b0abbd750a89da8d81ab6a058224a865f Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 7 Mar 2023 10:05:36 -0300 Subject: [PATCH 10/11] Fix doxygen comment --- cpp/src/arrow/array/data.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index fe43fbcf19d..27a30ade0bb 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -290,29 +290,29 @@ struct ARROW_EXPORT ArrayData { /// performance degradation when handling most types by adopting /// HasValidityBitmap and MayHaveLogicalNulls. /// - /// BEFORE: + /// Before: /// - /// uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR; - /// for (int64_t i = 0; i < array.length; ++i) { - /// if (validity && !bit_util::GetBit(validity, i)) { - /// continue; // skip a NULL + /// uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR; + /// for (int64_t i = 0; i < array.length; ++i) { + /// if (validity && !bit_util::GetBit(validity, i)) { + /// continue; // skip a NULL + /// } + /// ... /// } - /// ... - /// } /// - /// AFTER: + /// After: /// - /// bool all_valid = !array.MayHaveLogicalNulls(); - /// uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR; - /// for (int64_t i = 0; i < array.length; ++i) { - /// bool is_valid = all_valid || - /// (validity && bit_util::GetBit(validity, i)) || - /// array.IsValid(i); - /// if (!is_valid) { - /// continue; // skip a NULL + /// bool all_valid = !array.MayHaveLogicalNulls(); + /// uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR; + /// for (int64_t i = 0; i < array.length; ++i) { + /// bool is_valid = all_valid || + /// (validity && bit_util::GetBit(validity, i)) || + /// array.IsValid(i); + /// if (!is_valid) { + /// continue; // skip a NULL + /// } + /// ... /// } - /// ... - /// } bool MayHaveLogicalNulls() const { if (buffers[0] != NULLPTR) { return null_count.load() != 0; From fa593d7351e3659f648e581c5eb2724d5a0c8fe4 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 14 Mar 2023 21:29:37 -0300 Subject: [PATCH 11/11] Use MakeScalar from scalar.h --- cpp/src/arrow/array/array_test.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 1d150e69fa0..602a468fafb 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -79,11 +79,6 @@ class TestArray : public ::testing::Test { MemoryPool* pool_; }; -template -std::shared_ptr MakeScalar(typename ScalarType::ValueType value) { - return std::make_shared(value); -} - TEST_F(TestArray, TestNullCount) { // These are placeholders auto data = std::make_shared(nullptr, 0); @@ -110,11 +105,11 @@ TEST_F(TestArray, TestNullCount) { RunEndEncodedBuilder ree_builder(pool_, std::make_shared(pool_), std::make_shared(pool_), run_end_encoded(int32(), int32())); - ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(2), 2)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(2), 2)); ASSERT_OK(ree_builder.AppendNull()); - ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(4), 3)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(4), 3)); ASSERT_OK(ree_builder.AppendNulls(2)); - ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(8), 5)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(8), 5)); ASSERT_OK(ree_builder.AppendNulls(7)); ASSERT_OK_AND_ASSIGN(auto ree, ree_builder.Finish()); @@ -123,9 +118,9 @@ TEST_F(TestArray, TestNullCount) { ASSERT_FALSE(ree->data()->MayHaveNulls()); ASSERT_TRUE(ree->data()->MayHaveLogicalNulls()); - ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(2), 2)); - ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(4), 3)); - ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(8), 5)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(2), 2)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(4), 3)); + ASSERT_OK(ree_builder.AppendScalar(*MakeScalar(8), 5)); ASSERT_OK_AND_ASSIGN(auto ree_no_nulls, ree_builder.Finish()); ASSERT_EQ(0, ree_no_nulls->null_count()); ASSERT_EQ(0, ree_no_nulls->ComputeLogicalNullCount());