From eacedf8a5eacbe5701b77b3d79c46e964e1eba5f Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 13 Jun 2022 11:10:29 -0500 Subject: [PATCH 01/15] Start refactoring --- cpp/src/arrow/compute/api_vector.cc | 9 ++ cpp/src/arrow/compute/kernel.h | 10 +- .../arrow/compute/kernels/codegen_internal.h | 150 ------------------ .../compute/kernels/vector_array_sort.cc | 33 ++-- .../compute/kernels/vector_cumulative_ops.cc | 2 +- cpp/src/arrow/compute/kernels/vector_hash.cc | 52 +++--- .../arrow/compute/kernels/vector_nested.cc | 2 +- 7 files changed, 50 insertions(+), 208 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index ad4248fc6c1..d60517dd742 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -298,6 +298,15 @@ Result> SortIndices(const ChunkedArray& chunked_array, const ArraySortOptions& array_options, ExecContext* ctx) { SortOptions options({SortKey("", array_options.order)}, array_options.null_placement); + + uint64_t* out_begin = out_arr->GetValues(1); + uint64_t* out_end = out_begin + out_arr->length; + std::iota(out_begin, out_end, 0); + + return SortChunkedArray(ctx->exec_context(), out_begin, out_end, + *batch[0].chunked_array(), options.order, + options.null_placement); + ARROW_ASSIGN_OR_RAISE( Datum result, CallFunction("sort_indices", {Datum(chunked_array)}, &options, ctx)); return result.make_array(); diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h index e115c5194bc..110dd412b56 100644 --- a/cpp/src/arrow/compute/kernel.h +++ b/cpp/src/arrow/compute/kernel.h @@ -548,10 +548,6 @@ struct Kernel { using ArrayKernelExec = std::function; -/// \brief Kernel execution API being phased out per ARROW-16756 -using ArrayKernelExecOld = - std::function; - /// \brief Kernel data structure for implementations of ScalarFunction. In /// addition to the members found in Kernel, contains the null handling /// and memory pre-allocation preferences. @@ -600,13 +596,13 @@ struct VectorKernel : public Kernel { VectorKernel() = default; VectorKernel(std::vector in_types, OutputType out_type, - ArrayKernelExecOld exec, KernelInit init = NULLPTR, + ArrayKernelExec exec, KernelInit init = NULLPTR, FinalizeFunc finalize = NULLPTR) : Kernel(std::move(in_types), std::move(out_type), std::move(init)), exec(std::move(exec)), finalize(std::move(finalize)) {} - VectorKernel(std::shared_ptr sig, ArrayKernelExecOld exec, + VectorKernel(std::shared_ptr sig, ArrayKernelExec exec, KernelInit init = NULLPTR, FinalizeFunc finalize = NULLPTR) : Kernel(std::move(sig), std::move(init)), exec(std::move(exec)), @@ -614,7 +610,7 @@ struct VectorKernel : public Kernel { /// \brief Perform a single invocation of this kernel. Any required state is /// managed through the KernelContext. - ArrayKernelExecOld exec; + ArrayKernelExec exec; /// \brief For VectorKernel, convert intermediate results into finalized /// results. Mutates input argument. Some kernels may accumulate state diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 8c3c7e3d423..2da4ed8afa4 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -460,7 +460,6 @@ Result ListValuesType(KernelContext*, const std::vector& // Generate an array kernel given template classes Status ExecFail(KernelContext* ctx, const ExecSpan& batch, ExecResult* out); -Status ExecFailOld(KernelContext* ctx, const ExecBatch& batch, Datum* out); ArrayKernelExec MakeFlippedBinaryExec(ArrayKernelExec exec); @@ -1062,38 +1061,6 @@ ArrayKernelExec GenerateNumeric(detail::GetTypeId get_id) { } } -// TODO(wesm): for ARROW-16756, while in transition to a new kernel -// API I duplicated this generator dispatcher to be able to create old -// kernel types -template