diff --git a/cpp/src/arrow/compute/exec_internal.h b/cpp/src/arrow/compute/exec_internal.h index 8beff2a6c63..7e4f364a928 100644 --- a/cpp/src/arrow/compute/exec_internal.h +++ b/cpp/src/arrow/compute/exec_internal.h @@ -46,7 +46,7 @@ class ARROW_EXPORT ExecSpanIterator { public: ExecSpanIterator() = default; - /// \brief Initialize itertor iterator and do basic argument validation + /// \brief Initialize iterator and do basic argument validation /// /// \param[in] batch the input ExecBatch /// \param[in] max_chunksize the maximum length of each ExecSpan. Depending diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h index d8960308dff..077bdcb31fa 100644 --- a/cpp/src/arrow/compute/kernel.h +++ b/cpp/src/arrow/compute/kernel.h @@ -538,7 +538,7 @@ struct ScalarKernel : public Kernel { // ---------------------------------------------------------------------- // VectorKernel (for VectorFunction) -/// \brief Kernel data structure for implementations of VectorFunction. In +/// \brief Kernel data structure for implementations of VectorFunction. It /// contains an optional finalizer function, the null handling and memory /// pre-allocation preferences (which have different defaults from /// ScalarKernel), and some other execution-related options. diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc index 5060b06465b..b1f54e68c69 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection.cc @@ -28,6 +28,7 @@ #include "arrow/array/builder_primitive.h" #include "arrow/array/concatenate.h" #include "arrow/buffer_builder.h" +#include "arrow/chunk_resolver.h" #include "arrow/chunked_array.h" #include "arrow/compute/api_vector.h" #include "arrow/compute/kernels/common.h" @@ -43,6 +44,7 @@ #include "arrow/util/bitmap_ops.h" #include "arrow/util/bitmap_reader.h" #include "arrow/util/int_util.h" +#include "arrow/util/vector.h" namespace arrow { @@ -50,8 +52,11 @@ using internal::BinaryBitBlockCounter; using internal::BitBlockCount; using internal::BitBlockCounter; using internal::CheckIndexBounds; +using internal::ChunkLocation; +using internal::ChunkResolver; using internal::CopyBitmap; using internal::CountSetBits; +using internal::MapVector; using internal::OptionalBitBlockCounter; using internal::OptionalBitIndexer; @@ -257,6 +262,155 @@ Status PreallocateData(KernelContext* ctx, int64_t length, int bit_width, return Status::OK(); } +/// \brief Wrapper for getting values from ArraySpan and ChunkedArray. +/// \tparam ArrayKind either ArraySpan or ChunkedArray. +/// \tparam ValueType C types from array values. +template +struct PrimitiveGetter {}; + +template <> +struct PrimitiveGetter { + // For boolean, we can't add offset at beginning because values is a bitmap. + explicit PrimitiveGetter(ArraySpan&& array) + : inner(std::move(array)), values(array.GetValues(1, 0)) { + inner.GetNullCount(); + } + + explicit PrimitiveGetter(const ArraySpan& array) + : inner(array), values(array.GetValues(1, 0)) { + inner.GetNullCount(); + } + + bool IsValid(int64_t i) const { + return bit_util::GetBit(inner.buffers[0].data, inner.offset + i); + } + + bool GetValue(int64_t i) const { return bit_util::GetBit(values, inner.offset + i); } + + int64_t null_count() const { return inner.null_count; } + int64_t length() const { return inner.length; } + + ArraySpan inner; + const uint8_t* values; +}; + +template +struct PrimitiveGetter { + explicit PrimitiveGetter(ArraySpan&& array) + : inner(std::move(array)), values(array.GetValues(1)) { + inner.GetNullCount(); + } + + explicit PrimitiveGetter(const ArraySpan& array) + : inner(array), values(array.GetValues(1)) { + inner.GetNullCount(); + } + + bool IsValid(int64_t i) const { + return bit_util::GetBit(inner.buffers[0].data, inner.offset + i); + } + + ValueType GetValue(int64_t i) const { return values[i]; } + + int64_t null_count() const { return inner.null_count; } + int64_t length() const { return inner.length; } + + ArraySpan inner; + const ValueType* values; +}; + +template <> +struct PrimitiveGetter { + explicit PrimitiveGetter(const ChunkedArray& array) + : inner(array), + resolver(ChunkResolver(array.chunks())), + null_count_(array.null_count()), + length_(array.length()) { + values_data = MapVector( + [](const auto& x) { return x->data()->template GetValues(1, 0); }, + array.chunks()); + values_is_valid = + MapVector([](const auto& x) { return x->null_bitmap_data(); }, array.chunks()); + values_offset = MapVector([](const auto& x) { return x->offset(); }, array.chunks()); + chunk_lengths = MapVector([](const auto& x) { return x->length(); }, array.chunks()); + } + + bool IsValid(int64_t i) const { + ChunkLocation loc = resolver.Resolve(i); + const uint8_t* validity_bitmap = values_is_valid[loc.chunk_index]; + if (validity_bitmap == nullptr) { + return true; + } else { + return bit_util::GetBit(validity_bitmap, + values_offset[loc.chunk_index] + loc.index_in_chunk); + } + } + + bool GetValue(int64_t i) const { + ChunkLocation loc = resolver.Resolve(i); + return bit_util::GetBit(values_data[loc.chunk_index], + values_offset[loc.chunk_index] + loc.index_in_chunk); + } + + int64_t null_count() const { return null_count_; } + int64_t length() const { return length_; } + + const ChunkedArray& inner; + ChunkResolver resolver; + int64_t null_count_; + int64_t length_; + std::vector chunk_lengths; + std::vector values_data; + std::vector values_is_valid; + std::vector values_offset; +}; + +template +struct PrimitiveGetter { + explicit PrimitiveGetter(const ChunkedArray& array) + : inner(array), + resolver(ChunkResolver(array.chunks())), + null_count_(array.null_count()), + length_(array.length()) { + values_data = MapVector( + [](const auto& x) { return x->data()->template GetValues(1); }, + array.chunks()); + values_is_valid = + MapVector([](const auto& x) { return x->null_bitmap_data(); }, array.chunks()); + values_offset = MapVector([](const auto& x) { return x->offset(); }, array.chunks()); + chunk_lengths = MapVector([](const auto& x) { return x->length(); }, array.chunks()); + } + + bool IsValid(int64_t i) const { + ChunkLocation loc = resolver.Resolve(i); + const uint8_t* validity_bitmap = values_is_valid[loc.chunk_index]; + if (validity_bitmap == nullptr) { + return true; + } else { + return bit_util::GetBit(validity_bitmap, + values_offset[loc.chunk_index] + loc.index_in_chunk); + } + } + + ValueType GetValue(int64_t i) const { + ChunkLocation loc = resolver.Resolve(i); + const ValueType* data = values_data[loc.chunk_index]; + return data[loc.index_in_chunk]; + } + + int64_t null_count() const { return null_count_; } + int64_t length() const { return length_; } + + const ChunkedArray& inner; + ChunkResolver resolver; + int64_t null_count_; + int64_t length_; + std::vector chunk_lengths; + std::vector values_data; + std::vector values_is_valid; + std::vector values_offset; +}; + // ---------------------------------------------------------------------- // Implement optimized take for primitive types from boolean to 1/2/4/8-byte // C-type based types. Use common implementation for every byte width and only @@ -271,12 +425,9 @@ Status PreallocateData(KernelContext* ctx, int64_t length, int bit_width, /// This function assumes that the indices have been boundschecked. template struct PrimitiveTakeImpl { - static void Exec(const ArraySpan& values, const ArraySpan& indices, - ArrayData* out_arr) { - const ValueCType* values_data = values.GetValues(1); - const uint8_t* values_is_valid = values.buffers[0].data; - auto values_offset = values.offset; - + template + static void ExecImpl(PrimitiveGetter& values, const ArraySpan& indices, + ArrayData* out_arr) { const IndexCType* indices_data = indices.GetValues(1); const uint8_t* indices_is_valid = indices.buffers[0].data; auto indices_offset = indices.offset; @@ -288,7 +439,7 @@ struct PrimitiveTakeImpl { // If either the values or indices have nulls, we preemptively zero out the // out validity bitmap so that we don't have to use ClearBit in each // iteration for nulls. - if (values.null_count != 0 || indices.null_count != 0) { + if (values.null_count() != 0 || indices.null_count != 0) { bit_util::SetBitsTo(out_is_valid, out_offset, indices.length, false); } @@ -298,14 +449,14 @@ struct PrimitiveTakeImpl { int64_t valid_count = 0; while (position < indices.length) { BitBlockCount block = indices_bit_counter.NextBlock(); - if (values.null_count == 0) { + if (values.null_count() == 0) { // Values are never null, so things are easier valid_count += block.popcount; if (block.popcount == block.length) { // Fastest path: neither values nor index nulls bit_util::SetBitsTo(out_is_valid, out_offset + position, block.length, true); for (int64_t i = 0; i < block.length; ++i) { - out[position] = values_data[indices_data[position]]; + out[position] = values.GetValue(indices_data[position]); ++position; } } else if (block.popcount > 0) { @@ -314,7 +465,7 @@ struct PrimitiveTakeImpl { if (bit_util::GetBit(indices_is_valid, indices_offset + position)) { // index is not null bit_util::SetBit(out_is_valid, out_offset + position); - out[position] = values_data[indices_data[position]]; + out[position] = values.GetValue(indices_data[position]); } else { out[position] = ValueCType{}; } @@ -329,10 +480,9 @@ struct PrimitiveTakeImpl { if (block.popcount == block.length) { // Faster path: indices are not null but values may be for (int64_t i = 0; i < block.length; ++i) { - if (bit_util::GetBit(values_is_valid, - values_offset + indices_data[position])) { + if (values.IsValid(indices_data[position])) { // value is not null - out[position] = values_data[indices_data[position]]; + out[position] = values.GetValue(indices_data[position]); bit_util::SetBit(out_is_valid, out_offset + position); ++valid_count; } else { @@ -346,10 +496,9 @@ struct PrimitiveTakeImpl { // one. for (int64_t i = 0; i < block.length; ++i) { if (bit_util::GetBit(indices_is_valid, indices_offset + position) && - bit_util::GetBit(values_is_valid, - values_offset + indices_data[position])) { + values.IsValid(indices_data[position])) { // index is not null && value is not null - out[position] = values_data[indices_data[position]]; + out[position] = values.GetValue(indices_data[position]); bit_util::SetBit(out_is_valid, out_offset + position); ++valid_count; } else { @@ -365,16 +514,31 @@ struct PrimitiveTakeImpl { } out_arr->null_count = out_arr->length - valid_count; } -}; -template -struct BooleanTakeImpl { static void Exec(const ArraySpan& values, const ArraySpan& indices, ArrayData* out_arr) { - const uint8_t* values_data = values.buffers[1].data; - const uint8_t* values_is_valid = values.buffers[0].data; - auto values_offset = values.offset; + auto getter = PrimitiveGetter(values); + ExecImpl(getter, indices, out_arr); + } + + static void Exec(const ChunkedArray& values, const ChunkedArray& indices_chunked, + ArrayDataVector* out_chunks) { + auto getter = PrimitiveGetter(values); + + for (int i = 0; i < indices_chunked.num_chunks(); ++i) { + ArraySpan indices_chunk(*indices_chunked.chunk(i)->data().get()); + ArrayData* out_arr = (*out_chunks)[i].get(); + + ExecImpl(getter, indices_chunk, out_arr); + } + } +}; +template +struct BooleanTakeImpl { + template + static void ExecImpl(PrimitiveGetter& values, const ArraySpan& indices, + ArrayData* out_arr) { const IndexCType* indices_data = indices.GetValues(1); const uint8_t* indices_is_valid = indices.buffers[0].data; auto indices_offset = indices.offset; @@ -386,15 +550,14 @@ struct BooleanTakeImpl { // If either the values or indices have nulls, we preemptively zero out the // out validity bitmap so that we don't have to use ClearBit in each // iteration for nulls. - if (values.null_count != 0 || indices.null_count != 0) { + if (values.null_count() != 0 || indices.null_count != 0) { bit_util::SetBitsTo(out_is_valid, out_offset, indices.length, false); } // Avoid uninitialized data in values array bit_util::SetBitsTo(out, out_offset, indices.length, false); auto PlaceDataBit = [&](int64_t loc, IndexCType index) { - bit_util::SetBitTo(out, out_offset + loc, - bit_util::GetBit(values_data, values_offset + index)); + bit_util::SetBitTo(out, out_offset + loc, values.GetValue(index)); }; OptionalBitBlockCounter indices_bit_counter(indices_is_valid, indices_offset, @@ -403,7 +566,7 @@ struct BooleanTakeImpl { int64_t valid_count = 0; while (position < indices.length) { BitBlockCount block = indices_bit_counter.NextBlock(); - if (values.null_count == 0) { + if (values.null_count() == 0) { // Values are never null, so things are easier valid_count += block.popcount; if (block.popcount == block.length) { @@ -431,8 +594,7 @@ struct BooleanTakeImpl { if (block.popcount == block.length) { // Faster path: indices are not null but values may be for (int64_t i = 0; i < block.length; ++i) { - if (bit_util::GetBit(values_is_valid, - values_offset + indices_data[position])) { + if (values.IsValid(indices_data[position])) { // value is not null bit_util::SetBit(out_is_valid, out_offset + position); PlaceDataBit(position, indices_data[position]); @@ -447,8 +609,7 @@ struct BooleanTakeImpl { for (int64_t i = 0; i < block.length; ++i) { if (bit_util::GetBit(indices_is_valid, indices_offset + position)) { // index is not null - if (bit_util::GetBit(values_is_valid, - values_offset + indices_data[position])) { + if (values.IsValid(indices_data[position])) { // value is not null PlaceDataBit(position, indices_data[position]); bit_util::SetBit(out_is_valid, out_offset + position); @@ -464,6 +625,24 @@ struct BooleanTakeImpl { } out_arr->null_count = out_arr->length - valid_count; } + + static void Exec(const ArraySpan& values, const ArraySpan& indices, + ArrayData* out_arr) { + auto getter = PrimitiveGetter(values); + ExecImpl(getter, indices, out_arr); + } + + static void Exec(const ChunkedArray& values, const ChunkedArray& indices_chunked, + ArrayDataVector* out_chunks) { + auto getter = PrimitiveGetter(values); + + for (int i = 0; i < indices_chunked.num_chunks(); ++i) { + ArraySpan indices_chunk(*indices_chunked.chunk(i)->data().get()); + ArrayData* out_arr = (*out_chunks)[i].get(); + + ExecImpl(getter, indices_chunk, out_arr); + } + } }; template