diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 6700a409e1b..1ec1245e7d3 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -296,7 +296,7 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_BUILD_UTILITIES "Build Arrow commandline utilities" OFF) - define_option(ARROW_COMPUTE "Build the Arrow Compute Modules" OFF) + define_option(ARROW_COMPUTE "Build all Arrow Compute kernels" OFF) define_option(ARROW_CSV "Build the Arrow CSV Parser Module" OFF) @@ -361,7 +361,6 @@ takes precedence over ccache if a storage backend is configured" ON) "Build the Parquet libraries" OFF DEPENDS - ARROW_COMPUTE ARROW_IPC) define_option(ARROW_ORC diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index e0931c19eff..721812b4c09 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -375,73 +375,93 @@ if(ARROW_CSV) csv/column_decoder.cc csv/options.cc csv/parser.cc - csv/reader.cc) - if(ARROW_COMPUTE) - list(APPEND ARROW_SRCS csv/writer.cc) - endif() + csv/reader.cc + csv/writer.cc) list(APPEND ARROW_TESTING_SRCS csv/test_common.cc) endif() +# Baseline Compute functionality + scalar casts and a few select kernels +list(APPEND + ARROW_SRCS + compute/api_aggregate.cc + compute/api_scalar.cc + compute/api_vector.cc + compute/cast.cc + compute/exec.cc + compute/exec/groupby.cc + compute/exec/accumulation_queue.cc + compute/exec/aggregate_node.cc + compute/exec/asof_join_node.cc + compute/exec/bloom_filter.cc + compute/exec/exec_plan.cc + compute/exec/expression.cc + compute/exec/fetch_node.cc + compute/exec/filter_node.cc + compute/exec/hash_join.cc + compute/exec/hash_join_dict.cc + compute/exec/hash_join_node.cc + compute/exec/key_hash.cc + compute/exec/key_map.cc + compute/exec/map_node.cc + compute/exec/options.cc + compute/exec/order_by_impl.cc + compute/exec/partition_util.cc + compute/exec/project_node.cc + compute/exec/query_context.cc + compute/exec/sink_node.cc + compute/exec/source_node.cc + compute/exec/swiss_join.cc + compute/exec/task_util.cc + compute/exec/tpch_node.cc + compute/exec/union_node.cc + compute/exec/util.cc + compute/function.cc + compute/function_internal.cc + compute/kernel.cc + compute/light_array.cc + compute/ordering.cc + compute/registry.cc + compute/kernels/codegen_internal.cc + compute/kernels/row_encoder.cc + compute/kernels/scalar_cast_boolean.cc + compute/kernels/scalar_cast_dictionary.cc + compute/kernels/scalar_cast_extension.cc + compute/kernels/scalar_cast_internal.cc + compute/kernels/scalar_cast_nested.cc + compute/kernels/scalar_cast_numeric.cc + compute/kernels/scalar_cast_string.cc + compute/kernels/scalar_cast_temporal.cc + compute/kernels/util_internal.cc + compute/kernels/vector_hash.cc + compute/kernels/vector_selection.cc + compute/row/encode_internal.cc + compute/row/compare_internal.cc + compute/row/grouper.cc + compute/row/row_internal.cc) + +append_avx2_src(compute/exec/bloom_filter_avx2.cc) +append_avx2_src(compute/exec/key_hash_avx2.cc) +append_avx2_src(compute/exec/key_map_avx2.cc) +append_avx2_src(compute/exec/swiss_join_avx2.cc) +append_avx2_src(compute/exec/util_avx2.cc) +append_avx2_src(compute/row/compare_internal_avx2.cc) +append_avx2_src(compute/row/encode_internal_avx2.cc) + +list(APPEND ARROW_TESTING_SRCS compute/exec/test_util.cc) + if(ARROW_COMPUTE) + # Include the remaining kernels list(APPEND ARROW_SRCS - compute/api_aggregate.cc - compute/api_scalar.cc - compute/api_vector.cc - compute/cast.cc - compute/exec.cc - compute/exec/groupby.cc - compute/exec/accumulation_queue.cc - compute/exec/aggregate_node.cc - compute/exec/asof_join_node.cc - compute/exec/bloom_filter.cc - compute/exec/exec_plan.cc - compute/exec/expression.cc - compute/exec/fetch_node.cc - compute/exec/filter_node.cc - compute/exec/hash_join.cc - compute/exec/hash_join_dict.cc - compute/exec/hash_join_node.cc - compute/exec/key_hash.cc - compute/exec/key_map.cc - compute/exec/map_node.cc - compute/exec/options.cc - compute/exec/order_by_impl.cc - compute/exec/partition_util.cc - compute/exec/project_node.cc - compute/exec/query_context.cc - compute/exec/sink_node.cc - compute/exec/source_node.cc - compute/exec/swiss_join.cc - compute/exec/task_util.cc - compute/exec/tpch_node.cc - compute/exec/union_node.cc - compute/exec/util.cc - compute/function.cc - compute/function_internal.cc - compute/kernel.cc - compute/light_array.cc - compute/ordering.cc - compute/registry.cc compute/kernels/aggregate_basic.cc compute/kernels/aggregate_mode.cc compute/kernels/aggregate_quantile.cc compute/kernels/aggregate_tdigest.cc compute/kernels/aggregate_var_std.cc - compute/kernels/codegen_internal.cc compute/kernels/hash_aggregate.cc - compute/kernels/row_encoder.cc compute/kernels/scalar_arithmetic.cc compute/kernels/scalar_boolean.cc - compute/kernels/scalar_cast_boolean.cc - compute/kernels/scalar_cast_dictionary.cc - compute/kernels/scalar_cast_extension.cc - compute/kernels/scalar_cast_internal.cc - compute/kernels/scalar_cast_nested.cc - compute/kernels/scalar_cast_numeric.cc - compute/kernels/scalar_cast_string.cc - compute/kernels/scalar_cast_temporal.cc compute/kernels/scalar_compare.cc compute/kernels/scalar_if_else.cc compute/kernels/scalar_nested.cc @@ -453,33 +473,16 @@ if(ARROW_COMPUTE) compute/kernels/scalar_temporal_binary.cc compute/kernels/scalar_temporal_unary.cc compute/kernels/scalar_validity.cc - compute/kernels/util_internal.cc compute/kernels/vector_array_sort.cc compute/kernels/vector_cumulative_ops.cc - compute/kernels/vector_hash.cc compute/kernels/vector_nested.cc compute/kernels/vector_rank.cc compute/kernels/vector_replace.cc compute/kernels/vector_select_k.cc - compute/kernels/vector_selection.cc - compute/kernels/vector_sort.cc - compute/row/encode_internal.cc - compute/row/compare_internal.cc - compute/row/grouper.cc - compute/row/row_internal.cc) + compute/kernels/vector_sort.cc) append_avx2_src(compute/kernels/aggregate_basic_avx2.cc) append_avx512_src(compute/kernels/aggregate_basic_avx512.cc) - - append_avx2_src(compute/exec/bloom_filter_avx2.cc) - append_avx2_src(compute/exec/key_hash_avx2.cc) - append_avx2_src(compute/exec/key_map_avx2.cc) - append_avx2_src(compute/exec/swiss_join_avx2.cc) - append_avx2_src(compute/exec/util_avx2.cc) - append_avx2_src(compute/row/compare_internal_avx2.cc) - append_avx2_src(compute/row/encode_internal_avx2.cc) - - list(APPEND ARROW_TESTING_SRCS compute/exec/test_util.cc) endif() if(ARROW_FILESYSTEM) @@ -800,12 +803,7 @@ add_arrow_test(table_test add_arrow_test(tensor_test) add_arrow_test(sparse_tensor_test) -set(STL_TEST_SRCS stl_iterator_test.cc) -if(ARROW_COMPUTE) - # This unit test uses compute code - list(APPEND STL_TEST_SRCS stl_test.cc) -endif() -add_arrow_test(stl_test SOURCES ${STL_TEST_SRCS}) +add_arrow_test(stl_test SOURCES stl_iterator_test.cc stl_test.cc) add_arrow_benchmark(builder_benchmark) add_arrow_benchmark(compare_benchmark) @@ -821,6 +819,7 @@ add_subdirectory(testing) add_subdirectory(array) add_subdirectory(c) +add_subdirectory(compute) add_subdirectory(io) add_subdirectory(tensor) add_subdirectory(util) @@ -830,10 +829,6 @@ if(ARROW_CSV) add_subdirectory(csv) endif() -if(ARROW_COMPUTE) - add_subdirectory(compute) -endif() - if(ARROW_SUBSTRAIT) add_subdirectory(engine) endif() diff --git a/cpp/src/arrow/array/CMakeLists.txt b/cpp/src/arrow/array/CMakeLists.txt index c0fc17687db..d8dc83bb71d 100644 --- a/cpp/src/arrow/array/CMakeLists.txt +++ b/cpp/src/arrow/array/CMakeLists.txt @@ -16,11 +16,7 @@ # under the License. add_arrow_test(concatenate_test) - -if(ARROW_COMPUTE) - # This unit test uses compute code - add_arrow_test(diff_test) -endif() +add_arrow_test(diff_test) # Headers: top level arrow_install_all_headers("arrow/array") diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index 91fa796f6d4..cdf019b798b 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -26,8 +26,22 @@ arrow_add_pkg_config("arrow-compute") # Unit tests # +# The following kernels are always present: +# - array_filter +# - array_take +# - cast +# - dictionary_encode +# - drop_null +# - filter +# - indices_nonzero +# - take +# - unique +# - value_counts +# +# Tests that use additional kernels should specify REQUIRE_ALL_KERNELS to avoid +# being included in minimal builds. See: GH-34388 function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME) - set(options) + set(options REQUIRE_ALL_KERNELS) set(one_value_args PREFIX) set(multi_value_args LABELS) cmake_parse_arguments(ARG @@ -36,6 +50,10 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME) "${multi_value_args}" ${ARGN}) + if(ARG_REQUIRE_ALL_KERNELS AND (NOT ARROW_COMPUTE)) + return() + endif() + if(ARG_PREFIX) set(PREFIX ${ARG_PREFIX}) else() diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt index e20a4021897..ff979be84a7 100644 --- a/cpp/src/arrow/compute/exec/CMakeLists.txt +++ b/cpp/src/arrow/compute/exec/CMakeLists.txt @@ -18,6 +18,7 @@ arrow_install_all_headers("arrow/compute/exec") add_arrow_compute_test(expression_test + REQUIRE_ALL_KERNELS PREFIX "arrow-compute" SOURCES @@ -25,6 +26,7 @@ add_arrow_compute_test(expression_test subtree_test.cc) add_arrow_compute_test(plan_test + REQUIRE_ALL_KERNELS PREFIX "arrow-compute" SOURCES @@ -32,12 +34,14 @@ add_arrow_compute_test(plan_test test_nodes_test.cc test_nodes.cc) add_arrow_compute_test(fetch_node_test + REQUIRE_ALL_KERNELS PREFIX "arrow-compute" SOURCES fetch_node_test.cc test_nodes.cc) add_arrow_compute_test(hash_join_node_test + REQUIRE_ALL_KERNELS PREFIX "arrow-compute" SOURCES @@ -45,6 +49,7 @@ add_arrow_compute_test(hash_join_node_test bloom_filter_test.cc key_hash_test.cc) add_arrow_compute_test(asof_join_node_test + REQUIRE_ALL_KERNELS PREFIX "arrow-compute" SOURCES @@ -52,7 +57,7 @@ add_arrow_compute_test(asof_join_node_test test_nodes.cc) add_arrow_compute_test(tpch_node_test PREFIX "arrow-compute") add_arrow_compute_test(union_node_test PREFIX "arrow-compute") -add_arrow_compute_test(groupby_test PREFIX "arrow-compute") +add_arrow_compute_test(groupby_test REQUIRE_ALL_KERNELS PREFIX "arrow-compute") add_arrow_compute_test(util_test PREFIX "arrow-compute" diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index a4d0fc8582f..7db97041d7d 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -18,19 +18,30 @@ # ---------------------------------------------------------------------- # Scalar kernels +add_arrow_compute_test(scalar_cast_test SOURCES scalar_cast_test.cc test_util.cc) + add_arrow_compute_test(scalar_type_test + REQUIRE_ALL_KERNELS SOURCES scalar_boolean_test.cc - scalar_cast_test.cc scalar_nested_test.cc scalar_string_test.cc test_util.cc) -add_arrow_compute_test(scalar_if_else_test SOURCES scalar_if_else_test.cc test_util.cc) +add_arrow_compute_test(scalar_if_else_test + REQUIRE_ALL_KERNELS + SOURCES + scalar_if_else_test.cc + test_util.cc) -add_arrow_compute_test(scalar_temporal_test SOURCES scalar_temporal_test.cc test_util.cc) +add_arrow_compute_test(scalar_temporal_test + REQUIRE_ALL_KERNELS + SOURCES + scalar_temporal_test.cc + test_util.cc) add_arrow_compute_test(scalar_math_test + REQUIRE_ALL_KERNELS SOURCES scalar_arithmetic_test.cc scalar_compare_test.cc @@ -38,6 +49,7 @@ add_arrow_compute_test(scalar_math_test test_util.cc) add_arrow_compute_test(scalar_utility_test + REQUIRE_ALL_KERNELS SOURCES scalar_random_test.cc scalar_set_lookup_test.cc @@ -59,6 +71,7 @@ add_arrow_benchmark(scalar_temporal_benchmark PREFIX "arrow-compute") # Vector kernels add_arrow_compute_test(vector_test + REQUIRE_ALL_KERNELS SOURCES vector_cumulative_ops_test.cc vector_hash_test.cc @@ -82,6 +95,7 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute") # Aggregates add_arrow_compute_test(aggregate_test + REQUIRE_ALL_KERNELS SOURCES aggregate_test.cc hash_aggregate_test.cc diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index 0021aa11089..249da4758ea 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -761,16 +761,6 @@ struct ArithmeticFloatingPointFunction : public ArithmeticFunction { } }; -// A scalar kernel that ignores (assumed all-null) inputs and returns null. -Status NullToNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { - return Status::OK(); -} - -void AddNullExec(ScalarFunction* func) { - std::vector input_types(func->arity().num_args, InputType(Type::NA)); - DCHECK_OK(func->AddKernel(std::move(input_types), OutputType(null()), NullToNullExec)); -} - template std::shared_ptr MakeArithmeticFunction(std::string name, FunctionDoc doc) { diff --git a/cpp/src/arrow/compute/kernels/scalar_round.cc b/cpp/src/arrow/compute/kernels/scalar_round.cc index 41961ad50e5..fc2cb5b8a6e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_round.cc +++ b/cpp/src/arrow/compute/kernels/scalar_round.cc @@ -771,114 +771,6 @@ struct Trunc { } }; -// Generate a kernel given a bitwise arithmetic functor. Assumes the -// functor treats all integer types of equal width identically -template