diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h index 48d65e30de8..fa2ddd049f5 100644 --- a/cpp/src/arrow/array/array_base.h +++ b/cpp/src/arrow/array/array_base.h @@ -69,16 +69,37 @@ class ARROW_EXPORT Array { // a potential inner-branch removal. if (type_id() == Type::SPARSE_UNION) { return !internal::IsNullSparseUnion(*data_, i); - } - if (type_id() == Type::DENSE_UNION) { + } else if (type_id() == Type::DENSE_UNION) { return !internal::IsNullDenseUnion(*data_, i); - } - if (type_id() == Type::RUN_END_ENCODED) { + } else if (type_id() == Type::RUN_END_ENCODED) { return !internal::IsNullRunEndEncoded(*data_, i); } return data_->null_count != data_->length; } + template + bool IsNullFast(int64_t i) const { + return !IsValidFast(i); + } + + template + bool IsValidFast(int64_t i) const { + if constexpr (ArrowType::type_id == Type::NA) { + return false; + } else if constexpr (ArrowType::type_id == Type::SPARSE_UNION) { + return !internal::IsNullSparseUnion(*data_, i); + } else if constexpr (ArrowType::type_id == Type::DENSE_UNION) { + return !internal::IsNullDenseUnion(*data_, i); + } else if constexpr (ArrowType::type_id == Type::RUN_END_ENCODED) { + return !internal::IsNullRunEndEncoded(*data_, i); + } else { + if (null_bitmap_data_ != NULLPTR) { + return bit_util::GetBit(null_bitmap_data_, i + data_->offset); + } + return data_->null_count != data_->length; + } + } + /// \brief Return a Scalar containing the value of this array at i Result> GetScalar(int64_t i) const; diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 602a468fafb..0a8441e662a 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -329,6 +329,8 @@ TEST_F(TestArray, TestIsNullIsValid) { for (size_t i = 0; i < null_bitmap.size(); ++i) { EXPECT_EQ(null_bitmap[i] != 0, !arr->IsNull(i)) << i; EXPECT_EQ(null_bitmap[i] != 0, arr->IsValid(i)) << i; + EXPECT_EQ(null_bitmap[i] != 0, !arr->IsNullFast(i)) << i; + EXPECT_EQ(null_bitmap[i] != 0, arr->IsValidFast(i)) << i; } } @@ -341,6 +343,8 @@ TEST_F(TestArray, TestIsNullIsValidNoNulls) { for (size_t i = 0; i < size; ++i) { EXPECT_TRUE(arr->IsValid(i)); EXPECT_FALSE(arr->IsNull(i)); + EXPECT_TRUE(arr->IsValidFast(i)); + EXPECT_FALSE(arr->IsNullFast(i)); } } @@ -428,6 +432,25 @@ TEST_F(TestArray, TestMakeArrayOfNull) { for (int64_t i = 0; i < length; ++i) { ASSERT_TRUE(array->IsNull(i)); ASSERT_FALSE(array->IsValid(i)); + switch (type->id()) { + case Type::NA: + ASSERT_TRUE(array->IsNullFast(i)); + break; + case Type::SPARSE_UNION: + ASSERT_TRUE(array->IsNullFast(i)); + break; + case Type::DENSE_UNION: + ASSERT_TRUE(array->IsNullFast(i)); + break; + case Type::RUN_END_ENCODED: + ASSERT_TRUE(array->IsNullFast(i)); + break; + case Type::INT32: // a non-special type for IsNullFast + ASSERT_TRUE(array->IsNullFast(i)); + break; + default: + break; + } } } } @@ -1788,6 +1811,7 @@ TEST(TestBooleanBuilder, AppendNullsAdvanceBuilder) { ASSERT_TRUE(barr.Value(0)); ASSERT_FALSE(barr.Value(1)); ASSERT_TRUE(barr.IsNull(2)); + ASSERT_TRUE(barr.IsNullFast(2)); ASSERT_TRUE(barr.Value(3)); } @@ -1821,9 +1845,11 @@ TEST(TestBooleanBuilder, TestStdBoolVectorAppend) { for (int i = 0; i < length; ++i) { if (is_valid[i]) { ASSERT_FALSE(arr.IsNull(i)); + ASSERT_FALSE(arr.IsNullFast(i)); ASSERT_EQ(values[i], arr.Value(i)); } else { ASSERT_TRUE(arr.IsNull(i)); + ASSERT_TRUE(arr.IsNullFast(i)); } ASSERT_EQ(values[i], arr_nn.Value(i)); } diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index 82a6e733727..b4788271899 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -180,25 +180,46 @@ struct ARROW_EXPORT ArrayData { std::shared_ptr Copy() const { return std::make_shared(*this); } - bool IsNull(int64_t i) const { return !IsValid(i); } + inline bool IsNull(int64_t i) const { return !IsValid(i); } - bool IsValid(int64_t i) const { + inline bool IsValid(int64_t i) const { if (buffers[0] != NULLPTR) { return bit_util::GetBit(buffers[0]->data(), i + offset); } const auto type = this->type->id(); if (type == Type::SPARSE_UNION) { return !internal::IsNullSparseUnion(*this, i); - } - if (type == Type::DENSE_UNION) { + } else if (type == Type::DENSE_UNION) { return !internal::IsNullDenseUnion(*this, i); - } - if (type == Type::RUN_END_ENCODED) { + } else if (type == Type::RUN_END_ENCODED) { return !internal::IsNullRunEndEncoded(*this, i); } return null_count.load() != length; } + template + bool IsNullFast(int64_t i) const { + return !IsValidFast(i); + } + + template + bool IsValidFast(int64_t i) const { + if constexpr (ArrowType::type_id == Type::NA) { + return false; + } else if constexpr (ArrowType::type_id == Type::SPARSE_UNION) { + return !internal::IsNullSparseUnion(*this, i); + } else if constexpr (ArrowType::type_id == Type::DENSE_UNION) { + return !internal::IsNullDenseUnion(*this, i); + } else if constexpr (ArrowType::type_id == Type::RUN_END_ENCODED) { + return !internal::IsNullRunEndEncoded(*this, i); + } else { + if (buffers[0] != NULLPTR) { + return bit_util::GetBit(buffers[0]->data(), i + offset); + } + return null_count.load() != length; + } + } + // Access a buffer's data as a typed C pointer template inline const T* GetValues(int i, int64_t absolute_offset) const { @@ -434,16 +455,36 @@ struct ARROW_EXPORT ArraySpan { inline bool IsValid(int64_t i) const { if (this->buffers[0].data != NULLPTR) { return bit_util::GetBit(this->buffers[0].data, i + this->offset); + } + const auto type = this->type->id(); + if (type == Type::SPARSE_UNION) { + return !IsNullSparseUnion(i); + } else if (type == Type::DENSE_UNION) { + return !IsNullDenseUnion(i); + } else if (type == Type::RUN_END_ENCODED) { + return !IsNullRunEndEncoded(i); + } + return this->null_count != this->length; + } + + template + inline bool IsNullFast(int64_t i) const { + return !IsValidFast(i); + } + + template + inline bool IsValidFast(int64_t i) const { + if constexpr (ArrowType::type_id == Type::NA) { + return false; + } else if constexpr (ArrowType::type_id == Type::SPARSE_UNION) { + return !IsNullSparseUnion(i); + } else if constexpr (ArrowType::type_id == Type::DENSE_UNION) { + return !IsNullDenseUnion(i); + } else if constexpr (ArrowType::type_id == Type::RUN_END_ENCODED) { + return !IsNullRunEndEncoded(i); } else { - const auto type = this->type->id(); - if (type == Type::SPARSE_UNION) { - return !IsNullSparseUnion(i); - } - if (type == Type::DENSE_UNION) { - return !IsNullDenseUnion(i); - } - if (type == Type::RUN_END_ENCODED) { - return !IsNullRunEndEncoded(i); + if (this->buffers[0].data != NULLPTR) { + return bit_util::GetBit(this->buffers[0].data, i + this->offset); } return this->null_count != this->length; }