-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-34315: [C++] Correct is_null kernel for Union and RunEndEncoded logical nulls #37642
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
010f2bb
3a13fba
1d480e4
0c9b19d
dd6d0e0
448eed9
ca937d8
fb4352a
0d3fb6a
ca2bcc6
0935b4d
cceea95
1eb1187
ddf2bee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,8 +20,11 @@ | |
| #include "arrow/compute/api_scalar.h" | ||
| #include "arrow/compute/kernels/common_internal.h" | ||
|
|
||
| #include "arrow/type.h" | ||
| #include "arrow/util/bit_util.h" | ||
| #include "arrow/util/bitmap_ops.h" | ||
| #include "arrow/util/checked_cast.h" | ||
| #include "arrow/util/ree_util.h" | ||
|
|
||
| namespace arrow { | ||
|
|
||
|
|
@@ -82,6 +85,72 @@ static void SetNanBits(const ArraySpan& arr, uint8_t* out_bitmap, int64_t out_of | |
| } | ||
| } | ||
|
|
||
| static void SetSparseUnionLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, | ||
| int64_t out_offset) { | ||
| const auto* sparse_union_type = | ||
| ::arrow::internal::checked_cast<const SparseUnionType*>(span.type); | ||
| DCHECK_LE(span.child_data.size(), 128); | ||
|
|
||
| const int8_t* types = span.GetValues<int8_t>(1); // NOLINT | ||
| for (int64_t i = 0; i < span.length; i++) { | ||
| const int8_t child_id = sparse_union_type->child_ids()[types[i]]; | ||
| if (span.child_data[child_id].IsNull(i + span.offset)) { | ||
| bit_util::SetBit(out_bitmap, i + out_offset); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| static void SetDenseUnionLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, | ||
| int64_t out_offset) { | ||
| const auto* dense_union_type = | ||
| ::arrow::internal::checked_cast<const DenseUnionType*>(span.type); | ||
| DCHECK_LE(span.child_data.size(), 128); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Someday if we have more dense union utilities the same trick as for sparse unions could be used here |
||
|
|
||
| const int8_t* types = span.GetValues<int8_t>(1); // NOLINT | ||
| const int32_t* offsets = span.GetValues<int32_t>(2); // NOLINT | ||
| for (int64_t i = 0; i < span.length; i++) { | ||
| const int8_t child_id = dense_union_type->child_ids()[types[i]]; | ||
| const int32_t offset = offsets[i]; | ||
| if (span.child_data[child_id].IsNull(offset)) { | ||
| bit_util::SetBit(out_bitmap, i + out_offset); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template <typename RunEndCType> | ||
| void SetREELogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here it seems we could trivially create an |
||
| int64_t out_offset) { | ||
| const auto& values = arrow::ree_util::ValuesArray(span); | ||
| DCHECK(!is_nested(values.type->id())); | ||
| const auto* values_bitmap = values.MayHaveNulls() ? values.buffers[0].data : NULLPTR; | ||
|
|
||
| if (!values_bitmap) { | ||
| return; | ||
| } | ||
|
|
||
| arrow::ree_util::RunEndEncodedArraySpan<RunEndCType> ree_span(span); | ||
| auto end = ree_span.end(); | ||
| for (auto it = ree_span.begin(); it != end; ++it) { | ||
| if (!bit_util::GetBit(values_bitmap, values.offset + it.index_into_array())) { | ||
| bit_util::SetBitsTo(out_bitmap, it.logical_position() + out_offset, it.run_length(), | ||
| true); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| void SetREELogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, | ||
| int64_t out_offset) { | ||
| const auto type_id = arrow::ree_util::RunEndsArray(span).type->id(); | ||
| if (type_id == Type::INT16) { | ||
| SetREELogicalNullBits<int16_t>(span, out_bitmap, out_offset); | ||
| } else if (type_id == Type::INT32) { | ||
| SetREELogicalNullBits<int32_t>(span, out_bitmap, out_offset); | ||
| } else { | ||
| DCHECK_EQ(type_id, Type::INT64); | ||
| SetREELogicalNullBits<int64_t>(span, out_bitmap, out_offset); | ||
| } | ||
| } | ||
|
|
||
| Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { | ||
| const ArraySpan& arr = batch[0].array; | ||
| ArraySpan* out_span = out->array_span_mutable(); | ||
|
|
@@ -91,17 +160,29 @@ Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { | |
| return Status::OK(); | ||
| } | ||
|
|
||
| const auto& options = NanOptionsState::Get(ctx); | ||
| uint8_t* out_bitmap = out_span->buffers[1].data; | ||
| if (arr.GetNullCount() > 0) { | ||
| // Input has nulls => output is the inverted null (validity) bitmap. | ||
| InvertBitmap(arr.buffers[0].data, arr.offset, arr.length, out_bitmap, | ||
| out_span->offset); | ||
| } else { | ||
| // Input has no nulls => output is entirely false. | ||
| // Input has no nulls => output is entirely false... | ||
| bit_util::SetBitsTo(out_bitmap, out_span->offset, out_span->length, false); | ||
| // ...except for some types (e.g. unions types and REE) which do not | ||
| // represent nulls in the validity bitmap. | ||
| const auto t = arr.type->id(); | ||
| if (t == Type::SPARSE_UNION) { | ||
| SetSparseUnionLogicalNullBits(arr, out_bitmap, out_span->offset); | ||
| } else if (t == Type::DENSE_UNION) { | ||
| SetDenseUnionLogicalNullBits(arr, out_bitmap, out_span->offset); | ||
| } else if (t == Type::RUN_END_ENCODED) { | ||
| SetREELogicalNullBits(arr, out_bitmap, out_span->offset); | ||
| } else { | ||
| DCHECK(arrow::internal::HasValidityBitmap(t)); | ||
| } | ||
| } | ||
|
|
||
| const auto& options = NanOptionsState::Get(ctx); | ||
| if (is_floating(arr.type->id()) && options.nan_is_null) { | ||
| switch (arr.type->id()) { | ||
| case Type::FLOAT: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be worth reusing the
choosekernel for this case?