From 36d22ce440ca61fdf3037ee792186824b053218c Mon Sep 17 00:00:00 2001 From: Li Jin Date: Sun, 2 Apr 2023 12:02:17 -0400 Subject: [PATCH 01/11] GH-34843: Fix windows R build --- r/src/compute-exec.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp index 7fb1b1394dc..fa2b6911335 100644 --- a/r/src/compute-exec.cpp +++ b/r/src/compute-exec.cpp @@ -30,6 +30,7 @@ #include #include +namespace acero = ::arrow::acero; namespace compute = ::arrow::compute; std::shared_ptr make_compute_options(std::string func_name, From 00ac7c33f21338c430b43d0b7b12118b6a0e5881 Mon Sep 17 00:00:00 2001 From: Li Jin Date: Mon, 3 Apr 2023 08:44:17 -0400 Subject: [PATCH 02/11] Try fixing r/configure.win --- r/configure.win | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/r/configure.win b/r/configure.win index f416e75f3c1..be584f49e62 100755 --- a/r/configure.win +++ b/r/configure.win @@ -59,11 +59,11 @@ function configure_release() { # NOTE: If you make changes to the libraries below, you should also change # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_DS_STATIC \ - -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET \ + -DARROW_R_WITH_PARQUET -DARROW_R_WITH_ACERO -DARROW_R_WITH_DATASET \ -DARROW_R_WITH_JSON" PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) ' PKG_LIBS="$PKG_LIBS -L${RWINLIB}/lib"'$(R_ARCH)$(CRT) ' - PKG_LIBS="$PKG_LIBS -larrow_dataset -lparquet -larrow -larrow_bundled_dependencies \ + PKG_LIBS="$PKG_LIBS -larrow_acero -larrow_dataset -lparquet -larrow -larrow_bundled_dependencies \ -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 -lbz2 ${BROTLI_LIBS} -lole32 \ ${MIMALLOC_LIBS} ${OPENSSL_LIBS}" @@ -98,6 +98,11 @@ function configure_dev() { PKG_CONFIG_PACKAGES="$PKG_CONFIG_PACKAGES parquet" fi + if [ $(cmake_option ARROW_ACERO) -eq 1 ]; then + PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ACERO" + PKG_CONFIG_PACKAGES="$PKG_CONFIG_PACKAGES arrow-acero" + fi + if [ $(cmake_option ARROW_DATASET) -eq 1 ]; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_DATASET" PKG_CONFIG_PACKAGES="$PKG_CONFIG_PACKAGES arrow-dataset" From 129f2d98e169ba913d9dd0300735fcf4c01d2b1e Mon Sep 17 00:00:00 2001 From: Li Jin Date: Mon, 3 Apr 2023 10:25:25 -0400 Subject: [PATCH 03/11] Try fixing arrow_types.h --- r/src/arrow_types.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 819346e2053..66f3a8fdd63 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -30,17 +30,19 @@ #include // forward declaration-only headers -#include #include #include #include +#if defined(ARROW_R_WITH_ACERO) +#include +#include +#endif + #if defined(ARROW_R_WITH_DATASET) #include #endif -#include - #include #include #include From c5da945f090aa63fc5c5db462e95d489f2ac8687 Mon Sep 17 00:00:00 2001 From: Li Jin Date: Mon, 3 Apr 2023 11:26:22 -0400 Subject: [PATCH 04/11] Try fixing homebrew --- dev/tasks/homebrew-formulae/apache-arrow.rb | 1 + dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb | 1 + dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb | 1 + 3 files changed, 3 insertions(+) diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index fdd77cf21a7..dfdeddb6aa5 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -72,6 +72,7 @@ def install # link against system libc++ instead of llvm provided libc++ ENV.remove "HOMEBREW_LIBRARY_PATHS", Formula["llvm"].opt_lib args = %W[ + -DARROW_ACERO=ON -DARROW_COMPUTE=ON -DARROW_CSV=ON -DARROW_DATASET=ON diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb index 84206978f35..c0df6a32175 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb @@ -53,6 +53,7 @@ def install args = %W[ -DARROW_BUILD_SHARED=OFF -DARROW_BUILD_UTILITIES=ON + -DARROW_ACERO=ON -DARROW_COMPUTE=ON -DARROW_CSV=ON -DARROW_DATASET=ON diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb index 90be13add08..c8adfdbaf7d 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb @@ -44,6 +44,7 @@ def install args = %W[ -DARROW_BUILD_SHARED=OFF -DARROW_BUILD_UTILITIES=ON + -DARROW_ACERO=ON -DARROW_COMPUTE=ON -DARROW_CSV=ON -DARROW_CXXFLAGS="-D_LIBCPP_DISABLE_AVAILABILITY" From 8f56065d2bcfb91a2368784550072d911f99bd04 Mon Sep 17 00:00:00 2001 From: Li Jin Date: Mon, 3 Apr 2023 11:39:13 -0400 Subject: [PATCH 05/11] Try fixing window build --- r/inst/build_arrow_static.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 800b9cde3f6..1baf011a412 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -55,6 +55,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_BUILD_TESTS=OFF \ -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_STATIC=ON \ + -DARROW_ACERO=${ARROW_ACERO:-ON} \ -DARROW_COMPUTE=ON \ -DARROW_CSV=ON \ -DARROW_DATASET=${ARROW_DATASET:-ON} \ From 71e8584dc9e19cfbb2c9b3239b93090118b55de8 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 4 Apr 2023 08:17:31 -0700 Subject: [PATCH 06/11] Some additional changes (#1) * Making Acero off on minimal builds. Adding 'skip if no snappy' to parquet test that needs snappy. * Various changes to allow R to build without acero * Skipping query engine tests if acero not enabled * Disabling example that depended on Acero if Acero is not enabled * Add arrow_with_acero function * Turns out @examplesIf has to replace @examples --- r/NAMESPACE | 1 + r/R/arrow-info.R | 8 + r/R/arrowExports.R | 4 + r/R/type.R | 2 +- r/data-raw/codegen.R | 2 +- r/inst/build_arrow_static.sh | 2 +- r/man/arrow_info.Rd | 3 + r/man/data-type.Rd | 2 + r/src/array.cpp | 1 + r/src/arrowExports.cpp | 141 ++++++++++++++++++ r/src/arrow_types.h | 4 +- r/src/compute-exec.cpp | 38 +++-- r/tests/testthat/test-dplyr-arrange.R | 2 + r/tests/testthat/test-dplyr-collapse.R | 2 + r/tests/testthat/test-dplyr-count.R | 2 + r/tests/testthat/test-dplyr-distinct.R | 2 + r/tests/testthat/test-dplyr-filter.R | 2 + .../testthat/test-dplyr-funcs-conditional.R | 1 + r/tests/testthat/test-dplyr-funcs-datetime.R | 2 + r/tests/testthat/test-dplyr-funcs-math.R | 1 + r/tests/testthat/test-dplyr-funcs-string.R | 1 + r/tests/testthat/test-dplyr-funcs-type.R | 1 + r/tests/testthat/test-dplyr-group-by.R | 2 + r/tests/testthat/test-dplyr-join.R | 2 + r/tests/testthat/test-dplyr-mutate.R | 2 + r/tests/testthat/test-dplyr-query.R | 2 + r/tests/testthat/test-dplyr-select.R | 2 + r/tests/testthat/test-dplyr-slice.R | 2 + r/tests/testthat/test-dplyr-summarize.R | 2 + r/tests/testthat/test-dplyr-union.R | 2 + r/tests/testthat/test-parquet.R | 2 + r/tests/testthat/test-query-engine.R | 2 + r/tests/testthat/test-udf.R | 3 +- 33 files changed, 225 insertions(+), 22 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 70b8fc3fc85..7ab8d5c9020 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -270,6 +270,7 @@ export(all_of) export(arrow_available) export(arrow_info) export(arrow_table) +export(arrow_with_acero) export(arrow_with_dataset) export(arrow_with_gcs) export(arrow_with_json) diff --git a/r/R/arrow-info.R b/r/R/arrow-info.R index d864f3de9f5..66581b49a49 100644 --- a/r/R/arrow-info.R +++ b/r/R/arrow-info.R @@ -79,6 +79,14 @@ arrow_available <- function() { TRUE } +#' @rdname arrow_info +#' @export +arrow_with_acero <- function() { + tryCatch(.Call(`_acero_available`), error = function(e) { + return(FALSE) + }) +} + #' @rdname arrow_info #' @export arrow_with_dataset <- function() { diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index a318c7a4f37..fd3b4e0a8b1 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -428,6 +428,10 @@ ExecPlan_create <- function(use_threads) { .Call(`_arrow_ExecPlan_create`, use_threads) } +MakeExecNodeOrStop <- function(factory_name, plan, inputs, options) { + .Call(`_arrow_MakeExecNodeOrStop`, factory_name, plan, inputs, options) +} + ExecPlanReader__batches <- function(reader) { .Call(`_arrow_ExecPlanReader__batches`, reader) } diff --git a/r/R/type.R b/r/R/type.R index bd69311b258..9c9c7fa8705 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -381,7 +381,7 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' @return An Arrow type object inheriting from [DataType]. #' @export #' @seealso [dictionary()] for creating a dictionary (factor-like) type. -#' @examples +#' @examplesIf arrow_with_acero() #' bool() #' struct(a = int32(), b = double()) #' timestamp("ms", timezone = "CEST") diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R index 92a4267153b..e8d53467d45 100644 --- a/r/data-raw/codegen.R +++ b/r/data-raw/codegen.R @@ -30,7 +30,7 @@ # Ensure that all machines are sorting the same way invisible(Sys.setlocale("LC_COLLATE", "C")) -features <- c("dataset", "substrait", "parquet", "s3", "gcs", "json") +features <- c("acero", "dataset", "substrait", "parquet", "s3", "gcs", "json") suppressPackageStartupMessages({ library(decor) diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 1baf011a412..e5a9f127edb 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -55,7 +55,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_BUILD_TESTS=OFF \ -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_STATIC=ON \ - -DARROW_ACERO=${ARROW_ACERO:-ON} \ + -DARROW_ACERO=${ARROW_ACERO:-$ARROW_DEFAULT_PARAM} \ -DARROW_COMPUTE=ON \ -DARROW_CSV=ON \ -DARROW_DATASET=${ARROW_DATASET:-ON} \ diff --git a/r/man/arrow_info.Rd b/r/man/arrow_info.Rd index e0f16d792f4..a839d3ba8fd 100644 --- a/r/man/arrow_info.Rd +++ b/r/man/arrow_info.Rd @@ -3,6 +3,7 @@ \name{arrow_info} \alias{arrow_info} \alias{arrow_available} +\alias{arrow_with_acero} \alias{arrow_with_dataset} \alias{arrow_with_substrait} \alias{arrow_with_parquet} @@ -15,6 +16,8 @@ arrow_info() arrow_available() +arrow_with_acero() + arrow_with_dataset() arrow_with_substrait() diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 9a2ddf5132d..79b09a4f321 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -201,6 +201,7 @@ Use \code{decimal128()} or \code{decimal256()} as the names are more informative \code{decimal()}. } \examples{ +\dontshow{if (arrow_with_acero()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} bool() struct(a = int32(), b = double()) timestamp("ms", timezone = "CEST") @@ -227,6 +228,7 @@ if (requireNamespace("dplyr", quietly = TRUE)) { ) \%>\% compute() } +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type. diff --git a/r/src/array.cpp b/r/src/array.cpp index c6c98d75b5f..ae76c01a949 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index db6d39dd7b8..89d500c9c22 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -912,6 +912,7 @@ BEGIN_CPP11 END_CPP11 } // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecPlan_create(bool use_threads); extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){ BEGIN_CPP11 @@ -919,7 +920,32 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecPlan_create(use_threads)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){ + Rf_error("Cannot call ExecPlan_create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) +std::shared_ptr MakeExecNodeOrStop(const std::string& factory_name, acero::ExecPlan* plan, std::vector inputs, const acero::ExecNodeOptions& options); +extern "C" SEXP _arrow_MakeExecNodeOrStop(SEXP factory_name_sexp, SEXP plan_sexp, SEXP inputs_sexp, SEXP options_sexp){ +BEGIN_CPP11 + arrow::r::Input::type factory_name(factory_name_sexp); + arrow::r::Input::type plan(plan_sexp); + arrow::r::Input>::type inputs(inputs_sexp); + arrow::r::Input::type options(options_sexp); + return cpp11::as_sexp(MakeExecNodeOrStop(factory_name, plan, inputs, options)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_MakeExecNodeOrStop(SEXP factory_name_sexp, SEXP plan_sexp, SEXP inputs_sexp, SEXP options_sexp){ + Rf_error("Cannot call MakeExecNodeOrStop(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) cpp11::list ExecPlanReader__batches(const std::shared_ptr& reader); extern "C" SEXP _arrow_ExecPlanReader__batches(SEXP reader_sexp){ BEGIN_CPP11 @@ -927,7 +953,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecPlanReader__batches(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecPlanReader__batches(SEXP reader_sexp){ + Rf_error("Cannot call ExecPlanReader__batches(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr Table__from_ExecPlanReader(const std::shared_ptr& reader); extern "C" SEXP _arrow_Table__from_ExecPlanReader(SEXP reader_sexp){ BEGIN_CPP11 @@ -935,7 +968,14 @@ BEGIN_CPP11 return cpp11::as_sexp(Table__from_ExecPlanReader(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_Table__from_ExecPlanReader(SEXP reader_sexp){ + Rf_error("Cannot call Table__from_ExecPlanReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecPlanReader__Plan(const std::shared_ptr& reader); extern "C" SEXP _arrow_ExecPlanReader__Plan(SEXP reader_sexp){ BEGIN_CPP11 @@ -943,7 +983,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecPlanReader__Plan(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecPlanReader__Plan(SEXP reader_sexp){ + Rf_error("Cannot call ExecPlanReader__Plan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::string ExecPlanReader__PlanStatus(const std::shared_ptr& reader); extern "C" SEXP _arrow_ExecPlanReader__PlanStatus(SEXP reader_sexp){ BEGIN_CPP11 @@ -951,7 +998,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecPlanReader__PlanStatus(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecPlanReader__PlanStatus(SEXP reader_sexp){ + Rf_error("Cannot call ExecPlanReader__PlanStatus(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecPlan_run(const std::shared_ptr& plan, const std::shared_ptr& final_node, cpp11::list sort_options, cpp11::strings metadata, int64_t head); extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp, SEXP sort_options_sexp, SEXP metadata_sexp, SEXP head_sexp){ BEGIN_CPP11 @@ -963,7 +1017,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecPlan_run(plan, final_node, sort_options, metadata, head)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp, SEXP sort_options_sexp, SEXP metadata_sexp, SEXP head_sexp){ + Rf_error("Cannot call ExecPlan_run(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::string ExecPlan_ToString(const std::shared_ptr& plan); extern "C" SEXP _arrow_ExecPlan_ToString(SEXP plan_sexp){ BEGIN_CPP11 @@ -971,7 +1032,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecPlan_ToString(plan)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecPlan_ToString(SEXP plan_sexp){ + Rf_error("Cannot call ExecPlan_ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) void ExecPlan_UnsafeDelete(const std::shared_ptr& plan); extern "C" SEXP _arrow_ExecPlan_UnsafeDelete(SEXP plan_sexp){ BEGIN_CPP11 @@ -980,7 +1048,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_ExecPlan_UnsafeDelete(SEXP plan_sexp){ + Rf_error("Cannot call ExecPlan_UnsafeDelete(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecNode_output_schema(const std::shared_ptr& node); extern "C" SEXP _arrow_ExecNode_output_schema(SEXP node_sexp){ BEGIN_CPP11 @@ -988,6 +1063,12 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecNode_output_schema(node)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecNode_output_schema(SEXP node_sexp){ + Rf_error("Cannot call ExecNode_output_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp #if defined(ARROW_R_WITH_DATASET) std::shared_ptr ExecNode_Scan(const std::shared_ptr& plan, const std::shared_ptr& dataset, const std::shared_ptr& filter, cpp11::list projection); @@ -1036,6 +1117,7 @@ extern "C" SEXP _arrow_ExecPlan_Write(SEXP plan_sexp, SEXP final_node_sexp, SEXP #endif // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecNode_Filter(const std::shared_ptr& input, const std::shared_ptr& filter); extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){ BEGIN_CPP11 @@ -1044,7 +1126,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecNode_Filter(input, filter)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){ + Rf_error("Cannot call ExecNode_Filter(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecNode_Project(const std::shared_ptr& input, const std::vector>& exprs, std::vector names); extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){ BEGIN_CPP11 @@ -1054,7 +1143,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecNode_Project(input, exprs, names)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){ + Rf_error("Cannot call ExecNode_Project(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecNode_Aggregate(const std::shared_ptr& input, cpp11::list options, std::vector key_names); extern "C" SEXP _arrow_ExecNode_Aggregate(SEXP input_sexp, SEXP options_sexp, SEXP key_names_sexp){ BEGIN_CPP11 @@ -1064,7 +1160,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecNode_Aggregate(input, options, key_names)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecNode_Aggregate(SEXP input_sexp, SEXP options_sexp, SEXP key_names_sexp){ + Rf_error("Cannot call ExecNode_Aggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecNode_Join(const std::shared_ptr& input, acero::JoinType join_type, const std::shared_ptr& right_data, std::vector left_keys, std::vector right_keys, std::vector left_output, std::vector right_output, std::string output_suffix_for_left, std::string output_suffix_for_right); extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp){ BEGIN_CPP11 @@ -1080,7 +1183,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecNode_Join(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp){ + Rf_error("Cannot call ExecNode_Join(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecNode_Union(const std::shared_ptr& input, const std::shared_ptr& right_data); extern "C" SEXP _arrow_ExecNode_Union(SEXP input_sexp, SEXP right_data_sexp){ BEGIN_CPP11 @@ -1089,7 +1199,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecNode_Union(input, right_data)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecNode_Union(SEXP input_sexp, SEXP right_data_sexp){ + Rf_error("Cannot call ExecNode_Union(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecNode_SourceNode(const std::shared_ptr& plan, const std::shared_ptr& reader); extern "C" SEXP _arrow_ExecNode_SourceNode(SEXP plan_sexp, SEXP reader_sexp){ BEGIN_CPP11 @@ -1098,7 +1215,14 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecNode_SourceNode(plan, reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecNode_SourceNode(SEXP plan_sexp, SEXP reader_sexp){ + Rf_error("Cannot call ExecNode_SourceNode(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp +#if defined(ARROW_R_WITH_ACERO) std::shared_ptr ExecNode_TableSourceNode(const std::shared_ptr& plan, const std::shared_ptr& table); extern "C" SEXP _arrow_ExecNode_TableSourceNode(SEXP plan_sexp, SEXP table_sexp){ BEGIN_CPP11 @@ -1107,6 +1231,12 @@ BEGIN_CPP11 return cpp11::as_sexp(ExecNode_TableSourceNode(plan, table)); END_CPP11 } +#else +extern "C" SEXP _arrow_ExecNode_TableSourceNode(SEXP plan_sexp, SEXP table_sexp){ + Rf_error("Cannot call ExecNode_TableSourceNode(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute-exec.cpp #if defined(ARROW_R_WITH_SUBSTRAIT) std::string substrait__internal__SubstraitToJSON(const std::shared_ptr& serialized_plan); @@ -5261,6 +5391,15 @@ BEGIN_CPP11 return cpp11::as_sexp(Array__infer_type(x)); END_CPP11 } +extern "C" SEXP _acero_available() { +return Rf_ScalarLogical( +#if defined(ARROW_R_WITH_ACERO) + TRUE +#else + FALSE +#endif +); +} extern "C" SEXP _dataset_available() { return Rf_ScalarLogical( #if defined(ARROW_R_WITH_DATASET) @@ -5316,6 +5455,7 @@ return Rf_ScalarLogical( ); } static const R_CallMethodDef CallEntries[] = { + { "_acero_available", (DL_FUNC)& _acero_available, 0 }, { "_dataset_available", (DL_FUNC)& _dataset_available, 0 }, { "_substrait_available", (DL_FUNC)& _substrait_available, 0 }, { "_parquet_available", (DL_FUNC)& _parquet_available, 0 }, @@ -5429,6 +5569,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, { "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, + { "_arrow_MakeExecNodeOrStop", (DL_FUNC) &_arrow_MakeExecNodeOrStop, 4}, { "_arrow_ExecPlanReader__batches", (DL_FUNC) &_arrow_ExecPlanReader__batches, 1}, { "_arrow_Table__from_ExecPlanReader", (DL_FUNC) &_arrow_Table__from_ExecPlanReader, 1}, { "_arrow_ExecPlanReader__Plan", (DL_FUNC) &_arrow_ExecPlanReader__Plan, 1}, diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 66f3a8fdd63..5f82275fe9c 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -35,8 +35,9 @@ #include #if defined(ARROW_R_WITH_ACERO) -#include #include +#include +namespace acero = ::arrow::acero; #endif #if defined(ARROW_R_WITH_DATASET) @@ -65,7 +66,6 @@ namespace ds = ::arrow::dataset; #endif namespace compute = ::arrow::compute; -namespace acero = ::arrow::acero; namespace fs = ::arrow::fs; std::shared_ptr RecordBatch__from_arrays(SEXP, SEXP); diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp index fa2b6911335..796f31bf316 100644 --- a/r/src/compute-exec.cpp +++ b/r/src/compute-exec.cpp @@ -16,6 +16,9 @@ // under the License. #include "./arrow_types.h" + +#if defined(ARROW_R_WITH_ACERO) + #include "./safe-call-into-r.h" #include @@ -38,7 +41,7 @@ std::shared_ptr make_compute_options(std::string func_ std::shared_ptr strings_to_kvm(cpp11::strings metadata); -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecPlan_create(bool use_threads) { static compute::ExecContext threaded_context{gc_memory_pool(), arrow::internal::GetCpuThreadPool()}; @@ -51,6 +54,7 @@ std::shared_ptr ExecPlan_create(bool use_threads) { return plan; } +// [[acero::export]] std::shared_ptr MakeExecNodeOrStop( const std::string& factory_name, acero::ExecPlan* plan, std::vector inputs, const acero::ExecNodeOptions& options) { @@ -186,7 +190,7 @@ class ExecPlanReader : public arrow::RecordBatchReader { } }; -// [[arrow::export]] +// [[acero::export]] cpp11::list ExecPlanReader__batches( const std::shared_ptr& reader) { auto result = RunWithCapturedRIfPossible( @@ -194,7 +198,7 @@ cpp11::list ExecPlanReader__batches( return arrow::r::to_r_list(ValueOrStop(result)); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr Table__from_ExecPlanReader( const std::shared_ptr& reader) { auto result = RunWithCapturedRIfPossible>( @@ -203,7 +207,7 @@ std::shared_ptr Table__from_ExecPlanReader( return ValueOrStop(result); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecPlanReader__Plan( const std::shared_ptr& reader) { if (reader->PlanStatus() == "PLAN_FINISHED") { @@ -213,12 +217,12 @@ std::shared_ptr ExecPlanReader__Plan( return reader->Plan(); } -// [[arrow::export]] +// [[acero::export]] std::string ExecPlanReader__PlanStatus(const std::shared_ptr& reader) { return reader->PlanStatus(); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecPlan_run( const std::shared_ptr& plan, const std::shared_ptr& final_node, cpp11::list sort_options, @@ -263,18 +267,18 @@ std::shared_ptr ExecPlan_run( return std::make_shared(plan, out_schema, sink_gen); } -// [[arrow::export]] +// [[acero::export]] std::string ExecPlan_ToString(const std::shared_ptr& plan) { return plan->ToString(); } -// [[arrow::export]] +// [[acero::export]] void ExecPlan_UnsafeDelete(const std::shared_ptr& plan) { auto& plan_unsafe = const_cast&>(plan); plan_unsafe.reset(); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecNode_output_schema( const std::shared_ptr& node) { return node->output_schema(); @@ -363,7 +367,7 @@ void ExecPlan_Write( #endif -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecNode_Filter( const std::shared_ptr& input, const std::shared_ptr& filter) { @@ -371,7 +375,7 @@ std::shared_ptr ExecNode_Filter( acero::FilterNodeOptions{*filter}); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecNode_Project( const std::shared_ptr& input, const std::vector>& exprs, @@ -386,7 +390,7 @@ std::shared_ptr ExecNode_Project( acero::ProjectNodeOptions{std::move(expressions), std::move(names)}); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecNode_Aggregate( const std::shared_ptr& input, cpp11::list options, std::vector key_names) { @@ -415,7 +419,7 @@ std::shared_ptr ExecNode_Aggregate( acero::AggregateNodeOptions{std::move(aggregates), std::move(keys)}); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecNode_Join( const std::shared_ptr& input, acero::JoinType join_type, const std::shared_ptr& right_data, @@ -450,14 +454,14 @@ std::shared_ptr ExecNode_Join( std::move(output_suffix_for_left), std::move(output_suffix_for_right)}); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecNode_Union( const std::shared_ptr& input, const std::shared_ptr& right_data) { return MakeExecNodeOrStop("union", input->plan(), {input.get(), right_data.get()}, {}); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecNode_SourceNode( const std::shared_ptr& plan, const std::shared_ptr& reader) { @@ -465,7 +469,7 @@ std::shared_ptr ExecNode_SourceNode( return MakeExecNodeOrStop("record_batch_reader_source", plan.get(), {}, options); } -// [[arrow::export]] +// [[acero::export]] std::shared_ptr ExecNode_TableSourceNode( const std::shared_ptr& plan, const std::shared_ptr& table) { @@ -476,6 +480,8 @@ std::shared_ptr ExecNode_TableSourceNode( return MakeExecNodeOrStop("table_source", plan.get(), {}, options); } +#endif + #if defined(ARROW_R_WITH_SUBSTRAIT) #include diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R index 3444e3ace5f..85c71fb7c9a 100644 --- a/r/tests/testthat/test-dplyr-arrange.R +++ b/r/tests/testthat/test-dplyr-arrange.R @@ -17,6 +17,8 @@ library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") + # randomize order of rows in test data tbl <- slice_sample(example_data_for_sorting, prop = 1L) diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index cca8412178e..198827e235b 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -20,6 +20,8 @@ withr::local_options(list(arrow.summarise.sort = TRUE)) library(dplyr, warn.conflicts = FALSE) library(stringr) +skip_if_not_available("acero") + tbl <- example_data # Add some better string data tbl$verses <- verses[[1]] diff --git a/r/tests/testthat/test-dplyr-count.R b/r/tests/testthat/test-dplyr-count.R index 333ac9ff336..6c45ded31d4 100644 --- a/r/tests/testthat/test-dplyr-count.R +++ b/r/tests/testthat/test-dplyr-count.R @@ -17,6 +17,8 @@ library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") + tbl <- example_data tbl$some_grouping <- rep(c(1, 2), 5) tbl$another_grouping <- rep(c(1, 2), 5) diff --git a/r/tests/testthat/test-dplyr-distinct.R b/r/tests/testthat/test-dplyr-distinct.R index 09a8d5f8f55..4c7f8894cd4 100644 --- a/r/tests/testthat/test-dplyr-distinct.R +++ b/r/tests/testthat/test-dplyr-distinct.R @@ -17,6 +17,8 @@ library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") + tbl <- example_data tbl$some_grouping <- rep(c(1, 2), 5) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 24754afcf84..8b144f47852 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -18,6 +18,8 @@ library(dplyr, warn.conflicts = FALSE) library(stringr) +skip_if_not_available("acero") + tbl <- example_data # Add some better string data tbl$verses <- verses[[1]] diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index e1dcd7bb091..85d21b73226 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -18,6 +18,7 @@ library(dplyr, warn.conflicts = FALSE) suppressPackageStartupMessages(library(bit64)) +skip_if_not_available("acero") tbl <- example_data tbl$verses <- verses[[1]] diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 059764861e9..d47f9232111 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -21,6 +21,8 @@ skip_on_r_older_than("3.5") library(lubridate, warn.conflicts = FALSE) library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") + # base::strptime() defaults to local timezone # but arrow's strptime defaults to UTC. # So that tests are consistent, set the local timezone to UTC diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 66b3a510f9c..733a7c6ea06 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -17,6 +17,7 @@ library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") test_that("abs()", { df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA)) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index bb60a79daa1..0dc834dbfea 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -16,6 +16,7 @@ # under the License. skip_if_not_available("utf8proc") +skip_if_not_available("acero") library(dplyr, warn.conflicts = FALSE) library(lubridate) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index ccf16dd4db4..435fa5fcb87 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -19,6 +19,7 @@ library(dplyr, warn.conflicts = FALSE) suppressPackageStartupMessages(library(bit64)) suppressPackageStartupMessages(library(lubridate)) +skip_if_not_available("acero") tbl <- example_data diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index 3c5d174b0c7..5847e11e0f9 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -18,6 +18,8 @@ library(dplyr, warn.conflicts = FALSE) library(stringr) +skip_if_not_available("acero") + tbl <- example_data test_that("group_by groupings are recorded", { diff --git a/r/tests/testthat/test-dplyr-join.R b/r/tests/testthat/test-dplyr-join.R index 2520d561cfd..e3e1e98cfca 100644 --- a/r/tests/testthat/test-dplyr-join.R +++ b/r/tests/testthat/test-dplyr-join.R @@ -17,6 +17,8 @@ library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") + left <- example_data left$some_grouping <- rep(c(1, 2), 5) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 5d431089ce7..ab37747458c 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -18,6 +18,8 @@ library(dplyr, warn.conflicts = FALSE) library(stringr) +skip_if_not_available("acero") + tbl <- example_data # Add some better string data tbl$verses <- verses[[1]] diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index 0b2b23ec860..5dbdb0e522b 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -18,6 +18,8 @@ library(dplyr, warn.conflicts = FALSE) library(stringr) +skip_if_not_available("acero") + tbl <- example_data # Add some better string data tbl$verses <- verses[[1]] diff --git a/r/tests/testthat/test-dplyr-select.R b/r/tests/testthat/test-dplyr-select.R index f71c4000442..dff73c063be 100644 --- a/r/tests/testthat/test-dplyr-select.R +++ b/r/tests/testthat/test-dplyr-select.R @@ -18,6 +18,8 @@ library(dplyr, warn.conflicts = FALSE) library(stringr) +skip_if_not_available("acero") + tbl <- example_data test_that("Empty select returns no columns", { diff --git a/r/tests/testthat/test-dplyr-slice.R b/r/tests/testthat/test-dplyr-slice.R index 9cef51d4f7a..6d0711589c2 100644 --- a/r/tests/testthat/test-dplyr-slice.R +++ b/r/tests/testthat/test-dplyr-slice.R @@ -17,6 +17,8 @@ library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") + tbl <- example_data test_that("slice_head/tail, ungrouped", { diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 12ccec21ee1..1b834df19ff 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -26,6 +26,8 @@ withr::local_options(list( library(dplyr, warn.conflicts = FALSE) library(stringr) +skip_if_not_available("acero") + tbl <- example_data # Add some better string data tbl$verses <- verses[[1]] diff --git a/r/tests/testthat/test-dplyr-union.R b/r/tests/testthat/test-dplyr-union.R index 1bf8610c560..9774c6c0c0e 100644 --- a/r/tests/testthat/test-dplyr-union.R +++ b/r/tests/testthat/test-dplyr-union.R @@ -16,6 +16,8 @@ library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") + withr::local_options(list(arrow.summarise.sort = FALSE)) test_that("union_all", { diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 12711521cc0..70e2b6b7c87 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -466,6 +466,8 @@ test_that("Can read parquet with nested lists and maps", { parquet_test_data <- file.path(base_path, "cpp", "submodules", "parquet-testing", "data") skip_if_not(dir.exists(parquet_test_data) | force_tests(), "Parquet test data missing") + skip_if_not_available("snappy") + pq <- read_parquet(paste0(parquet_test_data, "/nested_lists.snappy.parquet"), as_data_frame = FALSE) expect_type_equal(pq$a, list_of(field("element", list_of(field("element", list_of(field("element", utf8()))))))) diff --git a/r/tests/testthat/test-query-engine.R b/r/tests/testthat/test-query-engine.R index 1d8d876bf53..a33f004fc57 100644 --- a/r/tests/testthat/test-query-engine.R +++ b/r/tests/testthat/test-query-engine.R @@ -17,6 +17,8 @@ library(dplyr, warn.conflicts = FALSE) +skip_if_not_available("acero") + test_that("ExecPlanReader does not start evaluating a query", { skip_if_not(CanRunWithCapturedR()) diff --git a/r/tests/testthat/test-udf.R b/r/tests/testthat/test-udf.R index 7836255e863..0eb75b1dde6 100644 --- a/r/tests/testthat/test-udf.R +++ b/r/tests/testthat/test-udf.R @@ -20,7 +20,6 @@ test_that("list_compute_functions() works", { expect_true(all(!grepl("^hash_", list_compute_functions()))) }) - test_that("arrow_scalar_function() works", { # check in/out type as schema/data type fun <- arrow_scalar_function( @@ -106,6 +105,8 @@ test_that("register_scalar_function() adds a compute function to the registry", Scalar$create(32L, float64()) ) + skip_if_not_available("acero") + expect_identical( record_batch(a = 1L) %>% dplyr::mutate(b = times_32(a)) %>% From 26c73f2d56deef7879e1c824ee0060b6812c9c09 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 4 Apr 2023 09:13:19 -0700 Subject: [PATCH 07/11] Removed incorrectly added [[acero::export]] --- r/R/arrowExports.R | 4 ---- r/src/arrowExports.cpp | 19 ------------------- r/src/compute-exec.cpp | 1 - 3 files changed, 24 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index fd3b4e0a8b1..a318c7a4f37 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -428,10 +428,6 @@ ExecPlan_create <- function(use_threads) { .Call(`_arrow_ExecPlan_create`, use_threads) } -MakeExecNodeOrStop <- function(factory_name, plan, inputs, options) { - .Call(`_arrow_MakeExecNodeOrStop`, factory_name, plan, inputs, options) -} - ExecPlanReader__batches <- function(reader) { .Call(`_arrow_ExecPlanReader__batches`, reader) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 89d500c9c22..dc4d0e9c709 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -926,24 +926,6 @@ extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){ } #endif -// compute-exec.cpp -#if defined(ARROW_R_WITH_ACERO) -std::shared_ptr MakeExecNodeOrStop(const std::string& factory_name, acero::ExecPlan* plan, std::vector inputs, const acero::ExecNodeOptions& options); -extern "C" SEXP _arrow_MakeExecNodeOrStop(SEXP factory_name_sexp, SEXP plan_sexp, SEXP inputs_sexp, SEXP options_sexp){ -BEGIN_CPP11 - arrow::r::Input::type factory_name(factory_name_sexp); - arrow::r::Input::type plan(plan_sexp); - arrow::r::Input>::type inputs(inputs_sexp); - arrow::r::Input::type options(options_sexp); - return cpp11::as_sexp(MakeExecNodeOrStop(factory_name, plan, inputs, options)); -END_CPP11 -} -#else -extern "C" SEXP _arrow_MakeExecNodeOrStop(SEXP factory_name_sexp, SEXP plan_sexp, SEXP inputs_sexp, SEXP options_sexp){ - Rf_error("Cannot call MakeExecNodeOrStop(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); -} -#endif - // compute-exec.cpp #if defined(ARROW_R_WITH_ACERO) cpp11::list ExecPlanReader__batches(const std::shared_ptr& reader); @@ -5569,7 +5551,6 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, { "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, - { "_arrow_MakeExecNodeOrStop", (DL_FUNC) &_arrow_MakeExecNodeOrStop, 4}, { "_arrow_ExecPlanReader__batches", (DL_FUNC) &_arrow_ExecPlanReader__batches, 1}, { "_arrow_Table__from_ExecPlanReader", (DL_FUNC) &_arrow_Table__from_ExecPlanReader, 1}, { "_arrow_ExecPlanReader__Plan", (DL_FUNC) &_arrow_ExecPlanReader__Plan, 1}, diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp index 796f31bf316..9c7de915fa2 100644 --- a/r/src/compute-exec.cpp +++ b/r/src/compute-exec.cpp @@ -54,7 +54,6 @@ std::shared_ptr ExecPlan_create(bool use_threads) { return plan; } -// [[acero::export]] std::shared_ptr MakeExecNodeOrStop( const std::string& factory_name, acero::ExecPlan* plan, std::vector inputs, const acero::ExecNodeOptions& options) { From 946c701f0c2b76d301eb7b5572ee4841065de605 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 4 Apr 2023 10:11:11 -0700 Subject: [PATCH 08/11] Change acero/dataset linker order since dataset depends on acero --- r/configure | 12 ++++++------ r/configure.win | 2 +- r/tools/autobrew | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/r/configure b/r/configure index 3099e84b537..65528bdc38e 100755 --- a/r/configure +++ b/r/configure @@ -265,18 +265,18 @@ if [ $? -eq 0 ]; then # NOTE: parquet is assumed to have the same -L flag as arrow # so there is no need to add its location to PKG_DIRS fi - if arrow_built_with ARROW_ACERO; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ACERO" - PKG_LIBS="-larrow_acero $PKG_LIBS" - # NOTE: arrow-acero is assumed to have the same -L flag as arrow - # so there is no need to add its location to PKG_DIRS - fi if arrow_built_with ARROW_DATASET; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_DATASET" PKG_LIBS="-larrow_dataset $PKG_LIBS" # NOTE: arrow-dataset is assumed to have the same -L flag as arrow # so there is no need to add its location to PKG_DIRS fi + if arrow_built_with ARROW_ACERO; then + PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ACERO" + PKG_LIBS="-larrow_acero $PKG_LIBS" + # NOTE: arrow-acero is assumed to have the same -L flag as arrow + # so there is no need to add its location to PKG_DIRS + fi if arrow_built_with ARROW_SUBSTRAIT; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_SUBSTRAIT" PKG_LIBS="-larrow_substrait $PKG_LIBS" diff --git a/r/configure.win b/r/configure.win index be584f49e62..15eb1bf3dcf 100755 --- a/r/configure.win +++ b/r/configure.win @@ -63,7 +63,7 @@ function configure_release() { -DARROW_R_WITH_JSON" PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) ' PKG_LIBS="$PKG_LIBS -L${RWINLIB}/lib"'$(R_ARCH)$(CRT) ' - PKG_LIBS="$PKG_LIBS -larrow_acero -larrow_dataset -lparquet -larrow -larrow_bundled_dependencies \ + PKG_LIBS="$PKG_LIBS -larrow_dataset -larrow_acero -lparquet -larrow -larrow_bundled_dependencies \ -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 -lbz2 ${BROTLI_LIBS} -lole32 \ ${MIMALLOC_LIBS} ${OPENSSL_LIBS}" diff --git a/r/tools/autobrew b/r/tools/autobrew index 73e6e11a161..3e94010a71e 100644 --- a/r/tools/autobrew +++ b/r/tools/autobrew @@ -62,7 +62,7 @@ fi # Hardcode this for my custom autobrew build rm -f $BREWDIR/lib/*.dylib AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common -laws-crt-cpp -laws-c-io -laws-c-s3 -laws-c-auth -laws-c-http -laws-c-cal -laws-c-compression -laws-c-mqtt -lpthread -lcurl" -PKG_LIBS="-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lthrift -lbrotlienc-static -lbrotlidec-static -lbrotlicommon-static -llz4 -lsnappy -lzstd $AWS_LIBS" +PKG_LIBS="-lparquet -larrow_dataset -larrow_acero -larrow -larrow_bundled_dependencies -lthrift -lbrotlienc-static -lbrotlidec-static -lbrotlicommon-static -llz4 -lsnappy -lzstd $AWS_LIBS" PKG_DIRS="-L$BREWDIR/lib" # Prevent CRAN builder from linking against old libs in /usr/local/lib From 8bfa93ea819c8c18d360f01ddf0457729632a636 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 4 Apr 2023 16:21:16 -0700 Subject: [PATCH 09/11] Make sure that Acero doesn't mark symbols with dllimport since that confuses mingw when statically linking --- r/configure.win | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/configure.win b/r/configure.win index 15eb1bf3dcf..d9daffb5d98 100755 --- a/r/configure.win +++ b/r/configure.win @@ -59,8 +59,8 @@ function configure_release() { # NOTE: If you make changes to the libraries below, you should also change # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_DS_STATIC \ - -DARROW_R_WITH_PARQUET -DARROW_R_WITH_ACERO -DARROW_R_WITH_DATASET \ - -DARROW_R_WITH_JSON" + -DARROW_ACERO_STATIC -DARROW_R_WITH_PARQUET -DARROW_R_WITH_ACERO \ + -DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON" PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) ' PKG_LIBS="$PKG_LIBS -L${RWINLIB}/lib"'$(R_ARCH)$(CRT) ' PKG_LIBS="$PKG_LIBS -larrow_dataset -larrow_acero -lparquet -larrow -larrow_bundled_dependencies \ From 0f8daeac95b4bedffc0bea900601426e5c41a074 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 4 Apr 2023 21:38:07 -0700 Subject: [PATCH 10/11] One more spot that needed -DARROW_R_WITH_ACERO --- r/tools/autobrew | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tools/autobrew b/r/tools/autobrew index 3e94010a71e..f1813098921 100644 --- a/r/tools/autobrew +++ b/r/tools/autobrew @@ -74,7 +74,7 @@ for FILE in $BREWDIR/Cellar/*/*/lib/*.a; do PKG_LIBS=`echo $PKG_LIBS | sed "s/-l$LIBNAME/-lbrew$LIBNAME/g"` done -PKG_CFLAGS="-I$BREWDIR/opt/$PKG_BREW_NAME/include -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON -DARROW_R_WITH_S3 -DARROW_R_WITH_GCS -D_LIBCPP_DISABLE_AVAILABILITY" +PKG_CFLAGS="-I$BREWDIR/opt/$PKG_BREW_NAME/include -DARROW_R_WITH_PARQUET -DARROW_R_WITH_ACERO -DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON -DARROW_R_WITH_S3 -DARROW_R_WITH_GCS -D_LIBCPP_DISABLE_AVAILABILITY" unset HOMEBREW_NO_ANALYTICS unset HOMEBREW_NO_AUTO_UPDATE From fdbc370863c311fae21df82987b1702cbc87665a Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Wed, 5 Apr 2023 07:06:57 -0700 Subject: [PATCH 11/11] Removed excess blank line per lint. --- r/tests/testthat/test-io.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/tests/testthat/test-io.R b/r/tests/testthat/test-io.R index 8698250d47a..b4780af75dd 100644 --- a/r/tests/testthat/test-io.R +++ b/r/tests/testthat/test-io.R @@ -244,4 +244,3 @@ test_that("reencoding input stream errors for invalid characters", { unlink(temp_utf8) }) -