From 0ded622677360597f83145437ae4714ab53009fd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 11 Apr 2023 12:37:23 +0200 Subject: [PATCH 01/12] GH-34315: [C++] Correct is_null kernel for Union and RunEndEncoded logical nulls --- cpp/src/arrow/array/data.cc | 8 +++- .../arrow/compute/kernels/scalar_validity.cc | 13 +++++++ cpp/src/arrow/util/ree_util.cc | 33 ++++++++++++++++ cpp/src/arrow/util/ree_util.h | 2 + cpp/src/arrow/util/union_util.cc | 29 ++++++++++++++ cpp/src/arrow/util/union_util.h | 4 ++ python/pyarrow/tests/test_compute.py | 38 +++++++++++++++++++ 7 files changed, 125 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 18c9eb720a0..5bbb76d9d5f 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -305,8 +305,12 @@ void ArraySpan::FillFromScalar(const Scalar& value) { Type::type type_id = value.type->id(); // Populate null count and validity bitmap (only for non-union/null types) - this->null_count = value.is_valid ? 0 : 1; - if (!is_union(type_id) && type_id != Type::NA) { + if (type_id == Type::NA) { + this->null_count = value.is_valid ? 0 : 1; + } else if (is_union(type_id)) { + this->null_count = 0; + } else { + this->null_count = value.is_valid ? 0 : 1; this->buffers[0].data = value.is_valid ? &kTrueBit : &kFalseBit; this->buffers[0].size = 1; } diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index 6b1cec0f5cc..65595824656 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -22,6 +22,8 @@ #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" +#include "arrow/util/union_util.h" +#include "arrow/util/ree_util.h" namespace arrow { @@ -102,6 +104,17 @@ Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { bit_util::SetBitsTo(out_bitmap, out_span->offset, out_span->length, false); } + const auto t = arr.type->id(); + if (t == Type::SPARSE_UNION) { + union_util::SetLogicalSparseUnionNullBits(arr, out_bitmap, out_span->offset); + } + else if (t == Type::DENSE_UNION) { + union_util::SetLogicalDenseUnionNullBits(arr, out_bitmap, out_span->offset); + } + else if (t == Type::RUN_END_ENCODED) { + ree_util::SetLogicalNullBits(arr, out_bitmap, out_span->offset); + } + if (is_floating(arr.type->id()) && options.nan_is_null) { switch (arr.type->id()) { case Type::FLOAT: diff --git a/cpp/src/arrow/util/ree_util.cc b/cpp/src/arrow/util/ree_util.cc index 11da64313a8..1d69ecdfc33 100644 --- a/cpp/src/arrow/util/ree_util.cc +++ b/cpp/src/arrow/util/ree_util.cc @@ -47,6 +47,25 @@ int64_t LogicalNullCount(const ArraySpan& span) { return null_count; } +template +void SetLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { + const auto& values = ValuesArray(span); + const auto& values_bitmap = values.buffers[0].data; + + RunEndEncodedArraySpan ree_span(span); + auto end = ree_span.end(); + for (auto it = ree_span.begin(); it != end; ++it) { + const bool is_null = + values_bitmap && + !bit_util::GetBit(values_bitmap, values.offset + it.index_into_array()); + if (is_null) { + for (int64_t i = 0; i < it.run_length(); i++) { + bit_util::SetBit(out_bitmap, it.logical_position() + i + out_offset); + } + } + } +} + } // namespace int64_t LogicalNullCount(const ArraySpan& span) { @@ -61,6 +80,20 @@ int64_t LogicalNullCount(const ArraySpan& span) { return LogicalNullCount(span); } +void SetLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { + const auto type_id = RunEndsArray(span).type->id(); + if (type_id == Type::INT16) { + SetLogicalNullBits(span, out_bitmap, out_offset); + } + else if (type_id == Type::INT32) { + SetLogicalNullBits(span, out_bitmap, out_offset); + } + else { + DCHECK_EQ(type_id, Type::INT64); + SetLogicalNullBits(span, out_bitmap, out_offset); + } +} + int64_t FindPhysicalIndex(const ArraySpan& span, int64_t i, int64_t absolute_offset) { const auto type_id = RunEndsArray(span).type->id(); if (type_id == Type::INT16) { diff --git a/cpp/src/arrow/util/ree_util.h b/cpp/src/arrow/util/ree_util.h index 83d828bbe53..e72752d55c8 100644 --- a/cpp/src/arrow/util/ree_util.h +++ b/cpp/src/arrow/util/ree_util.h @@ -56,6 +56,8 @@ Status ValidateRunEndEncodedChildren(const RunEndEncodedType& type, /// \brief Compute the logical null count of an REE array int64_t LogicalNullCount(const ArraySpan& span); +void SetLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset); + namespace internal { /// \brief Uses binary-search to find the physical offset given a logical offset diff --git a/cpp/src/arrow/util/union_util.cc b/cpp/src/arrow/util/union_util.cc index ca3ecf28644..771490d4a1b 100644 --- a/cpp/src/arrow/util/union_util.cc +++ b/cpp/src/arrow/util/union_util.cc @@ -55,4 +55,33 @@ int64_t LogicalDenseUnionNullCount(const ArraySpan& span) { return null_count; } +void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { + const auto* sparse_union_type = + internal::checked_cast(span.type); + DCHECK_LE(span.child_data.size(), 128); + + const int8_t* types = span.GetValues(1); // NOLINT + for (int64_t i = 0; i < span.length; i++) { + const int8_t child_id = sparse_union_type->child_ids()[types[span.offset + i]]; + if (span.child_data[child_id].IsNull(i)) { + bit_util::SetBit(out_bitmap, i + out_offset); + } + } +} + +void SetLogicalDenseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { + const auto* dense_union_type = internal::checked_cast(span.type); + DCHECK_LE(span.child_data.size(), 128); + + const int8_t* types = span.GetValues(1); // NOLINT + const int32_t* offsets = span.GetValues(2); // NOLINT + for (int64_t i = 0; i < span.length; i++) { + const int8_t child_id = dense_union_type->child_ids()[types[span.offset + i]]; + const int32_t offset = offsets[span.offset + i]; + if (span.child_data[child_id].IsNull(offset)) { + bit_util::SetBit(out_bitmap, i + out_offset); + } + } +} + } // namespace arrow::union_util diff --git a/cpp/src/arrow/util/union_util.h b/cpp/src/arrow/util/union_util.h index 0f30d5a3278..948ea3fdb2f 100644 --- a/cpp/src/arrow/util/union_util.h +++ b/cpp/src/arrow/util/union_util.h @@ -27,5 +27,9 @@ int64_t LogicalSparseUnionNullCount(const ArraySpan& span); /// \brief Compute the number of of logical nulls in a dense union array int64_t LogicalDenseUnionNullCount(const ArraySpan& span); +void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset); + +void SetLogicalDenseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset); + } // namespace union_util } // namespace arrow diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 875d0e613b6..173937644b3 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1637,6 +1637,44 @@ def test_is_null(): assert result.equals(expected) +def test_is_null_union(): + arr = pa.UnionArray.from_sparse( + pa.array([0, 1, 0, 0, 1], type=pa.int8()), + [ + pa.array([0.0, 1.1, None, 3.3, 4.4]), + pa.array([True, None, False, True, False]), + ] + ) + assert arr.to_pylist() == [0.0, None, None, 3.3, False] + result = arr.is_null() + expected = pa.array([False, True, True, False, False]) + assert result.equals(expected) + + arr = pa.UnionArray.from_dense( + pa.array([0, 1, 0, 0, 0, 1, 1], type=pa.int8()), + pa.array([0, 0, 1, 2, 3, 1, 2], type=pa.int32()), + [ + pa.array([0.0, 1.1, None, 3.3]), + pa.array([True, None, False]) + ] + ) + assert arr.to_pylist() == [0.0, True, 1.1, None, 3.3, None, False] + result = arr.is_null() + expected = pa.array([False, False, False, True, False, True, False]) + assert result.equals(expected) + + +@pytest.mark.parametrize("typ", ["int16", "int32", "int64"]) +def test_is_null_run_end_encoded(typ): + decoded = pa.array([1, 1, 1, None, 2, 2, None, None, 1]) + arr = pc.run_end_encode(decoded, run_end_type=typ) + result = arr.is_null() + expected = pa.array([False, False, False, True, False, False, True, True, False]) + assert result.equals(expected) + result = arr.slice(2, 5).is_null() + assert result.equals(expected.slice(2, 5)) + + def test_is_nan(): arr = pa.array([1, 2, 3, None, np.nan]) result = arr.is_nan() From 6fa5eebee91b855952ae6a0b563728a888b7fd0c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 11 Apr 2023 12:50:22 +0200 Subject: [PATCH 02/12] lint --- cpp/src/arrow/compute/kernels/scalar_validity.cc | 8 +++----- cpp/src/arrow/util/ree_util.cc | 6 ++---- cpp/src/arrow/util/union_util.cc | 6 ++++-- cpp/src/arrow/util/union_util.h | 6 ++++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index 65595824656..c406c620106 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -22,8 +22,8 @@ #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" -#include "arrow/util/union_util.h" #include "arrow/util/ree_util.h" +#include "arrow/util/union_util.h" namespace arrow { @@ -107,11 +107,9 @@ Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const auto t = arr.type->id(); if (t == Type::SPARSE_UNION) { union_util::SetLogicalSparseUnionNullBits(arr, out_bitmap, out_span->offset); - } - else if (t == Type::DENSE_UNION) { + } else if (t == Type::DENSE_UNION) { union_util::SetLogicalDenseUnionNullBits(arr, out_bitmap, out_span->offset); - } - else if (t == Type::RUN_END_ENCODED) { + } else if (t == Type::RUN_END_ENCODED) { ree_util::SetLogicalNullBits(arr, out_bitmap, out_span->offset); } diff --git a/cpp/src/arrow/util/ree_util.cc b/cpp/src/arrow/util/ree_util.cc index 1d69ecdfc33..fbbcfc2ed25 100644 --- a/cpp/src/arrow/util/ree_util.cc +++ b/cpp/src/arrow/util/ree_util.cc @@ -84,11 +84,9 @@ void SetLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_ const auto type_id = RunEndsArray(span).type->id(); if (type_id == Type::INT16) { SetLogicalNullBits(span, out_bitmap, out_offset); - } - else if (type_id == Type::INT32) { + } else if (type_id == Type::INT32) { SetLogicalNullBits(span, out_bitmap, out_offset); - } - else { + } else { DCHECK_EQ(type_id, Type::INT64); SetLogicalNullBits(span, out_bitmap, out_offset); } diff --git a/cpp/src/arrow/util/union_util.cc b/cpp/src/arrow/util/union_util.cc index 771490d4a1b..8dbf7a91d44 100644 --- a/cpp/src/arrow/util/union_util.cc +++ b/cpp/src/arrow/util/union_util.cc @@ -55,7 +55,8 @@ int64_t LogicalDenseUnionNullCount(const ArraySpan& span) { return null_count; } -void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { +void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, + int64_t out_offset) { const auto* sparse_union_type = internal::checked_cast(span.type); DCHECK_LE(span.child_data.size(), 128); @@ -69,7 +70,8 @@ void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, i } } -void SetLogicalDenseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { +void SetLogicalDenseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, + int64_t out_offset) { const auto* dense_union_type = internal::checked_cast(span.type); DCHECK_LE(span.child_data.size(), 128); diff --git a/cpp/src/arrow/util/union_util.h b/cpp/src/arrow/util/union_util.h index 948ea3fdb2f..0afd71a2d55 100644 --- a/cpp/src/arrow/util/union_util.h +++ b/cpp/src/arrow/util/union_util.h @@ -27,9 +27,11 @@ int64_t LogicalSparseUnionNullCount(const ArraySpan& span); /// \brief Compute the number of of logical nulls in a dense union array int64_t LogicalDenseUnionNullCount(const ArraySpan& span); -void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset); +void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, + int64_t out_offset); -void SetLogicalDenseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset); +void SetLogicalDenseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, + int64_t out_offset); } // namespace union_util } // namespace arrow From d661a166b4004a562aefc7bd944a654026b14c9a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 12 Apr 2023 10:59:34 +0200 Subject: [PATCH 03/12] move functions to scalar_validity.cc --- .../arrow/compute/kernels/scalar_validity.cc | 73 ++++++++++++++++++- cpp/src/arrow/util/ree_util.cc | 31 -------- cpp/src/arrow/util/ree_util.h | 2 - cpp/src/arrow/util/union_util.cc | 31 -------- cpp/src/arrow/util/union_util.h | 6 -- 5 files changed, 69 insertions(+), 74 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index c406c620106..a25729a5e91 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -22,8 +22,8 @@ #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" +#include "arrow/util/checked_cast.h" #include "arrow/util/ree_util.h" -#include "arrow/util/union_util.h" namespace arrow { @@ -84,6 +84,71 @@ static void SetNanBits(const ArraySpan& arr, uint8_t* out_bitmap, int64_t out_of } } +static void SetSparseUnionLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, + int64_t out_offset) { + const auto* sparse_union_type = + ::arrow::internal::checked_cast(span.type); + DCHECK_LE(span.child_data.size(), 128); + + const int8_t* types = span.GetValues(1); // NOLINT + for (int64_t i = 0; i < span.length; i++) { + const int8_t child_id = sparse_union_type->child_ids()[types[span.offset + i]]; + if (span.child_data[child_id].IsNull(i)) { + bit_util::SetBit(out_bitmap, i + out_offset); + } + } +} + +static void SetDenseUnionLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, + int64_t out_offset) { + const auto* dense_union_type = + ::arrow::internal::checked_cast(span.type); + DCHECK_LE(span.child_data.size(), 128); + + const int8_t* types = span.GetValues(1); // NOLINT + const int32_t* offsets = span.GetValues(2); // NOLINT + for (int64_t i = 0; i < span.length; i++) { + const int8_t child_id = dense_union_type->child_ids()[types[span.offset + i]]; + const int32_t offset = offsets[span.offset + i]; + if (span.child_data[child_id].IsNull(offset)) { + bit_util::SetBit(out_bitmap, i + out_offset); + } + } +} + +template +void SetREELogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, + int64_t out_offset) { + const auto& values = arrow::ree_util::ValuesArray(span); + const auto& values_bitmap = values.buffers[0].data; + + arrow::ree_util::RunEndEncodedArraySpan ree_span(span); + auto end = ree_span.end(); + for (auto it = ree_span.begin(); it != end; ++it) { + const bool is_null = + values_bitmap && + !bit_util::GetBit(values_bitmap, values.offset + it.index_into_array()); + if (is_null) { + for (int64_t i = 0; i < it.run_length(); i++) { + bit_util::SetBit(out_bitmap, it.logical_position() + i + out_offset); + } + } + } +} + +void SetREELogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, + int64_t out_offset) { + const auto type_id = arrow::ree_util::RunEndsArray(span).type->id(); + if (type_id == Type::INT16) { + SetREELogicalNullBits(span, out_bitmap, out_offset); + } else if (type_id == Type::INT32) { + SetREELogicalNullBits(span, out_bitmap, out_offset); + } else { + DCHECK_EQ(type_id, Type::INT64); + SetREELogicalNullBits(span, out_bitmap, out_offset); + } +} + Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const ArraySpan& arr = batch[0].array; ArraySpan* out_span = out->array_span_mutable(); @@ -106,11 +171,11 @@ Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const auto t = arr.type->id(); if (t == Type::SPARSE_UNION) { - union_util::SetLogicalSparseUnionNullBits(arr, out_bitmap, out_span->offset); + SetSparseUnionLogicalNullBits(arr, out_bitmap, out_span->offset); } else if (t == Type::DENSE_UNION) { - union_util::SetLogicalDenseUnionNullBits(arr, out_bitmap, out_span->offset); + SetDenseUnionLogicalNullBits(arr, out_bitmap, out_span->offset); } else if (t == Type::RUN_END_ENCODED) { - ree_util::SetLogicalNullBits(arr, out_bitmap, out_span->offset); + SetREELogicalNullBits(arr, out_bitmap, out_span->offset); } if (is_floating(arr.type->id()) && options.nan_is_null) { diff --git a/cpp/src/arrow/util/ree_util.cc b/cpp/src/arrow/util/ree_util.cc index fbbcfc2ed25..11da64313a8 100644 --- a/cpp/src/arrow/util/ree_util.cc +++ b/cpp/src/arrow/util/ree_util.cc @@ -47,25 +47,6 @@ int64_t LogicalNullCount(const ArraySpan& span) { return null_count; } -template -void SetLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { - const auto& values = ValuesArray(span); - const auto& values_bitmap = values.buffers[0].data; - - RunEndEncodedArraySpan ree_span(span); - auto end = ree_span.end(); - for (auto it = ree_span.begin(); it != end; ++it) { - const bool is_null = - values_bitmap && - !bit_util::GetBit(values_bitmap, values.offset + it.index_into_array()); - if (is_null) { - for (int64_t i = 0; i < it.run_length(); i++) { - bit_util::SetBit(out_bitmap, it.logical_position() + i + out_offset); - } - } - } -} - } // namespace int64_t LogicalNullCount(const ArraySpan& span) { @@ -80,18 +61,6 @@ int64_t LogicalNullCount(const ArraySpan& span) { return LogicalNullCount(span); } -void SetLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { - const auto type_id = RunEndsArray(span).type->id(); - if (type_id == Type::INT16) { - SetLogicalNullBits(span, out_bitmap, out_offset); - } else if (type_id == Type::INT32) { - SetLogicalNullBits(span, out_bitmap, out_offset); - } else { - DCHECK_EQ(type_id, Type::INT64); - SetLogicalNullBits(span, out_bitmap, out_offset); - } -} - int64_t FindPhysicalIndex(const ArraySpan& span, int64_t i, int64_t absolute_offset) { const auto type_id = RunEndsArray(span).type->id(); if (type_id == Type::INT16) { diff --git a/cpp/src/arrow/util/ree_util.h b/cpp/src/arrow/util/ree_util.h index e72752d55c8..83d828bbe53 100644 --- a/cpp/src/arrow/util/ree_util.h +++ b/cpp/src/arrow/util/ree_util.h @@ -56,8 +56,6 @@ Status ValidateRunEndEncodedChildren(const RunEndEncodedType& type, /// \brief Compute the logical null count of an REE array int64_t LogicalNullCount(const ArraySpan& span); -void SetLogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset); - namespace internal { /// \brief Uses binary-search to find the physical offset given a logical offset diff --git a/cpp/src/arrow/util/union_util.cc b/cpp/src/arrow/util/union_util.cc index 8dbf7a91d44..ca3ecf28644 100644 --- a/cpp/src/arrow/util/union_util.cc +++ b/cpp/src/arrow/util/union_util.cc @@ -55,35 +55,4 @@ int64_t LogicalDenseUnionNullCount(const ArraySpan& span) { return null_count; } -void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, - int64_t out_offset) { - const auto* sparse_union_type = - internal::checked_cast(span.type); - DCHECK_LE(span.child_data.size(), 128); - - const int8_t* types = span.GetValues(1); // NOLINT - for (int64_t i = 0; i < span.length; i++) { - const int8_t child_id = sparse_union_type->child_ids()[types[span.offset + i]]; - if (span.child_data[child_id].IsNull(i)) { - bit_util::SetBit(out_bitmap, i + out_offset); - } - } -} - -void SetLogicalDenseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, - int64_t out_offset) { - const auto* dense_union_type = internal::checked_cast(span.type); - DCHECK_LE(span.child_data.size(), 128); - - const int8_t* types = span.GetValues(1); // NOLINT - const int32_t* offsets = span.GetValues(2); // NOLINT - for (int64_t i = 0; i < span.length; i++) { - const int8_t child_id = dense_union_type->child_ids()[types[span.offset + i]]; - const int32_t offset = offsets[span.offset + i]; - if (span.child_data[child_id].IsNull(offset)) { - bit_util::SetBit(out_bitmap, i + out_offset); - } - } -} - } // namespace arrow::union_util diff --git a/cpp/src/arrow/util/union_util.h b/cpp/src/arrow/util/union_util.h index 0afd71a2d55..0f30d5a3278 100644 --- a/cpp/src/arrow/util/union_util.h +++ b/cpp/src/arrow/util/union_util.h @@ -27,11 +27,5 @@ int64_t LogicalSparseUnionNullCount(const ArraySpan& span); /// \brief Compute the number of of logical nulls in a dense union array int64_t LogicalDenseUnionNullCount(const ArraySpan& span); -void SetLogicalSparseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, - int64_t out_offset); - -void SetLogicalDenseUnionNullBits(const ArraySpan& span, uint8_t* out_bitmap, - int64_t out_offset); - } // namespace union_util } // namespace arrow From 197005af11c027cafe00da4aa01d31831bb40530 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 12 Apr 2023 13:48:05 +0200 Subject: [PATCH 04/12] address feedback (early return, use SetBitsTo) --- cpp/src/arrow/compute/kernels/scalar_validity.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index a25729a5e91..002912b7bba 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -120,18 +120,18 @@ template void SetREELogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { const auto& values = arrow::ree_util::ValuesArray(span); - const auto& values_bitmap = values.buffers[0].data; + const auto* values_bitmap = values.MayHaveNulls() ? values.buffers[0].data : NULLPTR; + + if (!values_bitmap) { + return; + } arrow::ree_util::RunEndEncodedArraySpan ree_span(span); auto end = ree_span.end(); for (auto it = ree_span.begin(); it != end; ++it) { - const bool is_null = - values_bitmap && - !bit_util::GetBit(values_bitmap, values.offset + it.index_into_array()); - if (is_null) { - for (int64_t i = 0; i < it.run_length(); i++) { - bit_util::SetBit(out_bitmap, it.logical_position() + i + out_offset); - } + if (!bit_util::GetBit(values_bitmap, values.offset + it.index_into_array())) { + bit_util::SetBitsTo(out_bitmap, it.logical_position() + out_offset, it.run_length(), + true); } } } From 45a1cdb609cbdc642fb4362aa01e9502f0940069 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 13 Apr 2023 15:29:46 +0200 Subject: [PATCH 05/12] add basic c++ tests + fix impl for sliced arrays --- .../arrow/compute/kernels/scalar_validity.cc | 8 +++--- .../compute/kernels/scalar_validity_test.cc | 28 +++++++++++++++++++ python/pyarrow/tests/test_compute.py | 4 +++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index 002912b7bba..82b351ebf34 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -92,8 +92,8 @@ static void SetSparseUnionLogicalNullBits(const ArraySpan& span, uint8_t* out_bi const int8_t* types = span.GetValues(1); // NOLINT for (int64_t i = 0; i < span.length; i++) { - const int8_t child_id = sparse_union_type->child_ids()[types[span.offset + i]]; - if (span.child_data[child_id].IsNull(i)) { + const int8_t child_id = sparse_union_type->child_ids()[types[i]]; + if (span.child_data[child_id].IsNull(i + span.offset)) { bit_util::SetBit(out_bitmap, i + out_offset); } } @@ -108,8 +108,8 @@ static void SetDenseUnionLogicalNullBits(const ArraySpan& span, uint8_t* out_bit const int8_t* types = span.GetValues(1); // NOLINT const int32_t* offsets = span.GetValues(2); // NOLINT for (int64_t i = 0; i < span.length; i++) { - const int8_t child_id = dense_union_type->child_ids()[types[span.offset + i]]; - const int32_t offset = offsets[span.offset + i]; + const int8_t child_id = dense_union_type->child_ids()[types[i]]; + const int32_t offset = offsets[i]; if (span.child_data[child_id].IsNull(offset)) { bit_util::SetBit(out_bitmap, i + out_offset); } diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc index 94d951c8382..5bef6787043 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc @@ -92,6 +92,34 @@ TEST_F(TestBooleanValidityKernels, IsNull) { "[true, false, false, true]", &nan_is_null_options); } +TEST_F(TestBooleanValidityKernels, IsNullUnion) { + auto field_i64 = ArrayFromJSON(int64(), "[null, 127, null, null, null]"); + auto field_str = ArrayFromJSON(utf8(), R"(["abcd", null, null, null, ""])"); + auto type_ids = ArrayFromJSON(int8(), R"([1, 0, 0, 1, 1])"); + ASSERT_OK_AND_ASSIGN(auto arr1, + SparseUnionArray::Make(*type_ids, {field_i64, field_str})); + auto expected = ArrayFromJSON(boolean(), "[false, false, true, true, false]"); + CheckScalarUnary("is_null", arr1, expected); + + auto dense_field_i64 = ArrayFromJSON(int64(), "[127, null]"); + auto dense_field_str = ArrayFromJSON(utf8(), R"(["abcd", null, ""])"); + auto value_offsets = ArrayFromJSON(int32(), R"([0, 0, 1, 1, 2])"); + ASSERT_OK_AND_ASSIGN(auto arr2, + DenseUnionArray::Make(*type_ids, *value_offsets, + {dense_field_i64, dense_field_str})); + CheckScalarUnary("is_null", arr2, expected); +} + +// TEST_F(TestBooleanValidityKernels, IsNullRunEndEncoded) { +// auto run_ends = ArrayFromJSON(int32(), R"([2, 3, 5, 7])"); +// auto values = ArrayFromJSON(int64(), R"([1, 2, null, 3])"); +// ASSERT_OK_AND_ASSIGN(auto ree_array, RunEndEncodedArray::Make(7, run_ends, values)); +// ASSERT_OK(ree_array->ValidateFull()); +// auto expected = +// ArrayFromJSON(boolean(), "[false, false, false, true, true, false, false]"); +// CheckScalarUnary("is_null", ree_array, expected); +// } + TEST(TestValidityKernels, IsFinite) { for (const auto& ty : IntTypes()) { CheckScalar("is_finite", {ArrayFromJSON(ty, "[0, 1, 42, null]")}, diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 173937644b3..a625506f305 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1649,6 +1649,8 @@ def test_is_null_union(): result = arr.is_null() expected = pa.array([False, True, True, False, False]) assert result.equals(expected) + result = arr.slice(1, 2).is_null() + assert result.equals(expected.slice(1, 2)) arr = pa.UnionArray.from_dense( pa.array([0, 1, 0, 0, 0, 1, 1], type=pa.int8()), @@ -1662,6 +1664,8 @@ def test_is_null_union(): result = arr.is_null() expected = pa.array([False, False, False, True, False, True, False]) assert result.equals(expected) + result = arr.slice(1, 3).is_null() + assert result.equals(expected.slice(1, 3)) @pytest.mark.parametrize("typ", ["int16", "int32", "int64"]) From f93ecaed39ecb9788968e45ec1f871ed9dbcce4b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 13 Apr 2023 16:13:39 +0200 Subject: [PATCH 06/12] support REE in FillFromScalar --- cpp/src/arrow/array/data.cc | 13 ++++++++++--- .../compute/kernels/scalar_validity_test.cc | 18 +++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 5bbb76d9d5f..097399e6576 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -304,10 +304,10 @@ void ArraySpan::FillFromScalar(const Scalar& value) { Type::type type_id = value.type->id(); - // Populate null count and validity bitmap (only for non-union/null types) + // Populate null count and validity bitmap (only for non-union/ree/null types) if (type_id == Type::NA) { - this->null_count = value.is_valid ? 0 : 1; - } else if (is_union(type_id)) { + this->null_count = 1; + } else if (is_union(type_id) || type_id == Type::RUN_END_ENCODED) { this->null_count = 0; } else { this->null_count = value.is_valid ? 0 : 1; @@ -426,6 +426,13 @@ void ArraySpan::FillFromScalar(const Scalar& value) { this->child_data[i].FillFromScalar(*scalar.value[i]); } } + } else if (type_id == Type::RUN_END_ENCODED) { + const auto& scalar = checked_cast(value); + this->child_data.resize(2); + auto run_end_type = scalar.run_end_type(); + auto run_end = MakeScalar(run_end_type, 1).ValueOrDie(); + this->child_data[0].FillFromScalar(*run_end); + this->child_data[1].FillFromScalar(*scalar.value); } else if (type_id == Type::EXTENSION) { // Pass through storage const auto& scalar = checked_cast(value); diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc index 5bef6787043..48353daf92e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc @@ -110,15 +110,15 @@ TEST_F(TestBooleanValidityKernels, IsNullUnion) { CheckScalarUnary("is_null", arr2, expected); } -// TEST_F(TestBooleanValidityKernels, IsNullRunEndEncoded) { -// auto run_ends = ArrayFromJSON(int32(), R"([2, 3, 5, 7])"); -// auto values = ArrayFromJSON(int64(), R"([1, 2, null, 3])"); -// ASSERT_OK_AND_ASSIGN(auto ree_array, RunEndEncodedArray::Make(7, run_ends, values)); -// ASSERT_OK(ree_array->ValidateFull()); -// auto expected = -// ArrayFromJSON(boolean(), "[false, false, false, true, true, false, false]"); -// CheckScalarUnary("is_null", ree_array, expected); -// } +TEST_F(TestBooleanValidityKernels, IsNullRunEndEncoded) { + auto run_ends = ArrayFromJSON(int32(), R"([2, 3, 5, 7])"); + auto values = ArrayFromJSON(int64(), R"([1, 2, null, 3])"); + ASSERT_OK_AND_ASSIGN(auto ree_array, RunEndEncodedArray::Make(7, run_ends, values)); + ASSERT_OK(ree_array->ValidateFull()); + auto expected = + ArrayFromJSON(boolean(), "[false, false, false, true, true, false, false]"); + CheckScalarUnary("is_null", ree_array, expected); +} TEST(TestValidityKernels, IsFinite) { for (const auto& ty : IntTypes()) { From 0cb0d8b2a25e75aaa6c38d8ace61ec90ef434fa7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 13 Apr 2023 17:04:47 +0200 Subject: [PATCH 07/12] address feedback --- cpp/src/arrow/array/data.cc | 4 ++-- .../arrow/compute/kernels/scalar_validity.cc | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 097399e6576..78e63953837 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -304,7 +304,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) { Type::type type_id = value.type->id(); - // Populate null count and validity bitmap (only for non-union/ree/null types) + // Populate null count and validity bitmap if (type_id == Type::NA) { this->null_count = 1; } else if (is_union(type_id) || type_id == Type::RUN_END_ENCODED) { @@ -429,7 +429,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) { } else if (type_id == Type::RUN_END_ENCODED) { const auto& scalar = checked_cast(value); this->child_data.resize(2); - auto run_end_type = scalar.run_end_type(); + auto& run_end_type = scalar.run_end_type(); auto run_end = MakeScalar(run_end_type, 1).ValueOrDie(); this->child_data[0].FillFromScalar(*run_end); this->child_data[1].FillFromScalar(*scalar.value); diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index 82b351ebf34..8df08631d08 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -167,15 +167,16 @@ Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { } else { // Input has no nulls => output is entirely false. bit_util::SetBitsTo(out_bitmap, out_span->offset, out_span->length, false); - } - - const auto t = arr.type->id(); - if (t == Type::SPARSE_UNION) { - SetSparseUnionLogicalNullBits(arr, out_bitmap, out_span->offset); - } else if (t == Type::DENSE_UNION) { - SetDenseUnionLogicalNullBits(arr, out_bitmap, out_span->offset); - } else if (t == Type::RUN_END_ENCODED) { - SetREELogicalNullBits(arr, out_bitmap, out_span->offset); + // Except for union/ree which never has physical nulls, but can have logical + // nulls from the child arrays -> set those bits to true + const auto t = arr.type->id(); + if (t == Type::SPARSE_UNION) { + SetSparseUnionLogicalNullBits(arr, out_bitmap, out_span->offset); + } else if (t == Type::DENSE_UNION) { + SetDenseUnionLogicalNullBits(arr, out_bitmap, out_span->offset); + } else if (t == Type::RUN_END_ENCODED) { + SetREELogicalNullBits(arr, out_bitmap, out_span->offset); + } } if (is_floating(arr.type->id()) && options.nan_is_null) { From 0535a01b17c8695c0320aa30d90a115e55ee39ab Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 21 Apr 2023 12:02:48 +0200 Subject: [PATCH 08/12] Update cpp/src/arrow/compute/kernels/scalar_validity.cc Co-authored-by: Weston Pace --- cpp/src/arrow/compute/kernels/scalar_validity.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index 8df08631d08..d518facf55e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -119,6 +119,7 @@ static void SetDenseUnionLogicalNullBits(const ArraySpan& span, uint8_t* out_bit template void SetREELogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { + DCHECK(!is_nested(span.type->id())); const auto& values = arrow::ree_util::ValuesArray(span); const auto* values_bitmap = values.MayHaveNulls() ? values.buffers[0].data : NULLPTR; From 9a6e4b1b6f64b7aa88e23a31de7a47a9962d5d5d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 2 May 2023 10:23:39 +0200 Subject: [PATCH 09/12] Update cpp/src/arrow/compute/kernels/scalar_validity.cc Co-authored-by: Weston Pace --- cpp/src/arrow/compute/kernels/scalar_validity.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index d518facf55e..bedeff1a127 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -119,8 +119,8 @@ static void SetDenseUnionLogicalNullBits(const ArraySpan& span, uint8_t* out_bit template void SetREELogicalNullBits(const ArraySpan& span, uint8_t* out_bitmap, int64_t out_offset) { - DCHECK(!is_nested(span.type->id())); const auto& values = arrow::ree_util::ValuesArray(span); + DCHECK(!is_nested(values.type->id())); const auto* values_bitmap = values.MayHaveNulls() ? values.buffers[0].data : NULLPTR; if (!values_bitmap) { From 9ee9820936630a8c85c7ddc678a08a919d69d940 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 17 May 2023 11:43:46 +0200 Subject: [PATCH 10/12] fix creating run end from scratch space --- cpp/src/arrow/array/data.cc | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index a9afef22a30..89c2e1511ba 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -236,6 +236,19 @@ void SetOffsetsForScalar(ArraySpan* span, offset_type* buffer, int64_t value_siz span->buffers[buffer_index].size = 2 * sizeof(offset_type); } +template +void SetRunForScalar(ArraySpan* span, std::shared_ptr type, + uint64_t* scratch_space) { + // Create a lenght-1 array with the value 1 for a REE scalar + using run_type = typename ArrowType::c_type; + auto buffer = reinterpret_cast(scratch_space); + buffer[0] = static_cast(1); + auto data_buffer = + std::make_shared(reinterpret_cast(buffer), sizeof(run_type)); + auto data = ArrayData::Make(type, 1, {nullptr, data_buffer}, 0); + span->child_data[0].SetMembers(*data); +} + int GetNumBuffers(const DataType& type) { switch (type.id()) { case Type::NA: @@ -430,8 +443,17 @@ void ArraySpan::FillFromScalar(const Scalar& value) { const auto& scalar = checked_cast(value); this->child_data.resize(2); auto& run_end_type = scalar.run_end_type(); - auto run_end = MakeScalar(run_end_type, 1).ValueOrDie(); - this->child_data[0].FillFromScalar(*run_end); + switch (run_end_type->id()) { + case Type::INT16: + SetRunForScalar(this, run_end_type, this->scratch_space); + break; + case Type::INT32: + SetRunForScalar(this, run_end_type, this->scratch_space); + break; + default: + DCHECK_EQ(run_end_type->id(), Type::INT64); + SetRunForScalar(this, run_end_type, this->scratch_space); + } this->child_data[1].FillFromScalar(*scalar.value); } else if (type_id == Type::EXTENSION) { // Pass through storage From bd34e35fb4e0ce5834e163ec91e9106f09d53545 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 17 May 2023 15:19:07 +0200 Subject: [PATCH 11/12] address feedback --- cpp/src/arrow/array/data.cc | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 89c2e1511ba..6a3e780de39 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -236,17 +236,16 @@ void SetOffsetsForScalar(ArraySpan* span, offset_type* buffer, int64_t value_siz span->buffers[buffer_index].size = 2 * sizeof(offset_type); } -template -void SetRunForScalar(ArraySpan* span, std::shared_ptr type, - uint64_t* scratch_space) { - // Create a lenght-1 array with the value 1 for a REE scalar - using run_type = typename ArrowType::c_type; - auto buffer = reinterpret_cast(scratch_space); - buffer[0] = static_cast(1); +template +void FillRunEndsArrayForScalar(ArraySpan* span, std::shared_ptr run_end_type) { + using RunEndCType = typename RunEndType::c_type; + auto buffer = reinterpret_cast(span->scratch_space); + buffer[0] = static_cast(1); auto data_buffer = - std::make_shared(reinterpret_cast(buffer), sizeof(run_type)); - auto data = ArrayData::Make(type, 1, {nullptr, data_buffer}, 0); - span->child_data[0].SetMembers(*data); + std::make_shared(reinterpret_cast(buffer), sizeof(RunEndCType)); + auto data = + ArrayData::Make(std::move(run_end_type), 1, {nullptr, std::move(data_buffer)}, 0); + span->SetMembers(*data); } int GetNumBuffers(const DataType& type) { @@ -445,14 +444,14 @@ void ArraySpan::FillFromScalar(const Scalar& value) { auto& run_end_type = scalar.run_end_type(); switch (run_end_type->id()) { case Type::INT16: - SetRunForScalar(this, run_end_type, this->scratch_space); + FillRunEndsArrayForScalar(&this->child_data[0], run_end_type); break; case Type::INT32: - SetRunForScalar(this, run_end_type, this->scratch_space); + FillRunEndsArrayForScalar(&this->child_data[0], run_end_type); break; default: DCHECK_EQ(run_end_type->id(), Type::INT64); - SetRunForScalar(this, run_end_type, this->scratch_space); + FillRunEndsArrayForScalar(&this->child_data[0], run_end_type); } this->child_data[1].FillFromScalar(*scalar.value); } else if (type_id == Type::EXTENSION) { From c89f43d4cecfec57aa22fd2a5ddf495347b68e66 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 May 2023 10:27:20 +0200 Subject: [PATCH 12/12] set members directly --- cpp/src/arrow/array/data.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 6a3e780de39..77b8209a180 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -237,15 +237,15 @@ void SetOffsetsForScalar(ArraySpan* span, offset_type* buffer, int64_t value_siz } template -void FillRunEndsArrayForScalar(ArraySpan* span, std::shared_ptr run_end_type) { +void FillRunEndsArrayForScalar(ArraySpan* span, const DataType* run_end_type) { using RunEndCType = typename RunEndType::c_type; auto buffer = reinterpret_cast(span->scratch_space); buffer[0] = static_cast(1); - auto data_buffer = - std::make_shared(reinterpret_cast(buffer), sizeof(RunEndCType)); - auto data = - ArrayData::Make(std::move(run_end_type), 1, {nullptr, std::move(data_buffer)}, 0); - span->SetMembers(*data); + span->type = run_end_type; + span->length = 1; + span->null_count = 0; + span->buffers[1].data = reinterpret_cast(buffer); + span->buffers[1].size = sizeof(RunEndCType); } int GetNumBuffers(const DataType& type) { @@ -444,14 +444,14 @@ void ArraySpan::FillFromScalar(const Scalar& value) { auto& run_end_type = scalar.run_end_type(); switch (run_end_type->id()) { case Type::INT16: - FillRunEndsArrayForScalar(&this->child_data[0], run_end_type); + FillRunEndsArrayForScalar(&this->child_data[0], run_end_type.get()); break; case Type::INT32: - FillRunEndsArrayForScalar(&this->child_data[0], run_end_type); + FillRunEndsArrayForScalar(&this->child_data[0], run_end_type.get()); break; default: DCHECK_EQ(run_end_type->id(), Type::INT64); - FillRunEndsArrayForScalar(&this->child_data[0], run_end_type); + FillRunEndsArrayForScalar(&this->child_data[0], run_end_type.get()); } this->child_data[1].FillFromScalar(*scalar.value); } else if (type_id == Type::EXTENSION) {