From 4ce0d93c2c542db8a95b5bb7ce66af366ca5166a Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 1 Mar 2021 17:52:00 -0500 Subject: [PATCH 01/28] Make dataset and parquet optional --- r/NAMESPACE | 2 + r/R/arrow-package.R | 25 +- r/R/parquet.R | 44 +-- r/configure | 49 +++- r/data-raw/codegen.R | 8 +- r/man/ParquetFileReader.Rd | 16 +- r/man/arrow_available.Rd | 19 +- r/man/read_parquet.Rd | 12 +- r/man/write_parquet.Rd | 16 +- r/src/arrowExports.cpp | 561 +++++++++++++++++++++++++++++++++++++ r/src/arrow_types.h | 16 ++ r/src/dataset.cpp | 90 +++--- r/src/expression.cpp | 12 +- r/src/parquet.cpp | 62 ++-- r/tools/autobrew | 3 +- 15 files changed, 795 insertions(+), 140 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 54061128ac7..f208d104480 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -167,6 +167,8 @@ export(Type) export(UnionDataset) export(arrow_available) export(arrow_info) +export(arrow_with_dataset) +export(arrow_with_parquet) export(arrow_with_s3) export(binary) export(bool) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 818d85c8580..4132964eb49 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -53,19 +53,36 @@ #' You won't generally need to call these function, but they're made available #' for diagnostic purposes. #' @return `TRUE` or `FALSE` depending on whether the package was installed -#' with the Arrow C++ library (check with `arrow_available()`) or with S3 -#' support enabled (check with `arrow_with_s3()`). +#' with: +#' * The Arrow C++ library (check with `arrow_available()`) +#' * Arrow Dataset support enabled (check with `arrow_with_dataset()`) +#' * Parquet support enabled (check with `arrow_with_parquet()`) +#' * Amazon S3 support enabled (check with `arrow_with_s3()`) #' @export #' @examples #' arrow_available() +#' arrow_with_dataset() +#' arrow_with_parquet() #' arrow_with_s3() -#' @seealso If either of these are `FALSE`, see +#' @seealso If any of these are `FALSE`, see #' `vignette("install", package = "arrow")` for guidance on reinstalling the #' package. arrow_available <- function() { .Call(`_arrow_available`) } +#' @rdname arrow_available +#' @export +arrow_with_dataset <- function() { + .Call(`_dataset_available`) +} + +#' @rdname arrow_available +#' @export +arrow_with_parquet <- function() { + .Call(`_parquet_available`) +} + #' @rdname arrow_available #' @export arrow_with_s3 <- function() { @@ -95,6 +112,8 @@ arrow_info <- function() { pool <- default_memory_pool() out <- c(out, list( capabilities = c( + dataset = arrow_with_dataset(), + parquet = arrow_with_parquet(), s3 = arrow_with_s3(), vapply(tolower(names(CompressionType)[-1]), codec_is_available, logical(1)) ), diff --git a/r/R/parquet.R b/r/R/parquet.R index 4fe321666af..237aebb880f 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -28,11 +28,13 @@ #' `TRUE` (the default). #' @examples #' \donttest{ -#' tf <- tempfile() -#' on.exit(unlink(tf)) -#' write_parquet(mtcars, tf) -#' df <- read_parquet(tf, col_select = starts_with("d")) -#' head(df) +#' if (arrow_with_parquet()) { +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' write_parquet(mtcars, tf) +#' df <- read_parquet(tf, col_select = starts_with("d")) +#' head(df) +#' } #' } #' @export read_parquet <- function(file, @@ -123,13 +125,15 @@ read_parquet <- function(file, #' #' @examples #' \donttest{ -#' tf1 <- tempfile(fileext = ".parquet") -#' write_parquet(data.frame(x = 1:5), tf1) -#' -#' # using compression -#' if (codec_is_available("gzip")) { -#' tf2 <- tempfile(fileext = ".gz.parquet") -#' write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) +#' if (arrow_with_parquet()) { +#' tf1 <- tempfile(fileext = ".parquet") +#' write_parquet(data.frame(x = 1:5), tf1) +#' +#' # using compression +#' if (codec_is_available("gzip")) { +#' tf2 <- tempfile(fileext = ".gz.parquet") +#' write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) +#' } #' } #' } #' @export @@ -450,13 +454,15 @@ ParquetFileWriter$create <- function(schema, #' @export #' @examples #' \donttest{ -#' f <- system.file("v0.7.1.parquet", package="arrow") -#' pq <- ParquetFileReader$create(f) -#' pq$GetSchema() -#' if (codec_is_available("snappy")) { -#' # This file has compressed data columns -#' tab <- pq$ReadTable() -#' tab$schema +#' if (arrow_with_parquet()) { +#' f <- system.file("v0.7.1.parquet", package="arrow") +#' pq <- ParquetFileReader$create(f) +#' pq$GetSchema() +#' if (codec_is_available("snappy")) { +#' # This file has compressed data columns +#' tab <- pq$ReadTable() +#' tab$schema +#' } #' } #' } #' @include arrow-package.R diff --git a/r/configure b/r/configure index d3b9d1dcffb..68f2b7fcb93 100755 --- a/r/configure +++ b/r/configure @@ -26,13 +26,14 @@ # R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib' # Library settings -PKG_CONFIG_NAME="arrow parquet arrow-dataset" +PKG_CONFIG_NAME="arrow" PKG_DEB_NAME="(unsuppored)" PKG_RPM_NAME="(unsuppored)" PKG_BREW_NAME="apache-arrow" PKG_TEST_HEADER="" -# These must be the same order as $(pkg-config --libs arrow-dataset) -PKG_LIBS="-larrow_dataset -lparquet -larrow" +PKG_LIBS="-larrow" +PKG_DIRS="" +BUNDLED_LIBS="" # Make some env vars case-insensitive ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'` @@ -69,19 +70,22 @@ fi if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then echo "*** Using INCLUDE_DIR/LIB_DIR" PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS" - PKG_LIBS="-L$LIB_DIR $PKG_LIBS" + PKG_DIRS="-L$LIB_DIR $PKG_DIRS" else # Use pkg-config if available and allowed pkg-config --version >/dev/null 2>&1 if [ "$ARROW_USE_PKG_CONFIG" != "false" ] && [ $? -eq 0 ]; then PKGCONFIG_CFLAGS=`pkg-config --cflags --silence-errors ${PKG_CONFIG_NAME}` - PKGCONFIG_LIBS=`pkg-config --libs --silence-errors ${PKG_CONFIG_NAME}` + PKGCONFIG_LIBS=`pkg-config --libs-only-l --silence-errors ${PKG_CONFIG_NAME}` + PKGCONFIG_DIRS=`pkg-config --libs-only-L --silence-errors ${PKG_CONFIG_NAME}` + # TODO: what about --libs-only-other? fi if [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then echo "*** Arrow C++ libraries found via pkg-config" PKG_CFLAGS="$PKGCONFIG_CFLAGS" PKG_LIBS=${PKGCONFIG_LIBS} + PKG_DIRS=${PKGCONFIG_DIRS} # Check for version mismatch PC_LIB_VERSION=`pkg-config --modversion arrow` @@ -98,7 +102,8 @@ else if [ "$FORCE_AUTOBREW" != "true" ] && [ "`command -v brew`" ] && [ "`brew ls --versions ${PKG_BREW_NAME}`" != "" ]; then echo "*** Using Homebrew ${PKG_BREW_NAME}" BREWDIR=`brew --prefix` - PKG_LIBS="-L$BREWDIR/opt/$PKG_BREW_NAME/lib $PKG_LIBS -larrow_bundled_dependencies" + PKG_LIBS="$PKG_LIBS -larrow_bundled_dependencies" + PKG_DIRS="-L$BREWDIR/opt/$PKG_BREW_NAME/lib $PKG_DIRS" PKG_CFLAGS="-I$BREWDIR/opt/$PKG_BREW_NAME/include" else echo "*** Downloading ${PKG_BREW_NAME}" @@ -114,7 +119,7 @@ else if [ $? -ne 0 ]; then echo "Failed to retrieve binary for ${PKG_BREW_NAME}" fi - # autobrew sets `PKG_LIBS` and `PKG_CFLAGS` + # autobrew sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` fi else # Set some default values/backwards compatibility @@ -135,8 +140,8 @@ else LIB_DIR="libarrow/arrow-${VERSION}/lib" if [ -d "$LIB_DIR" ]; then - # Enumerate the static libs and add to PKG_LIBS - # (technically repeating arrow libs so they're in the right order) + # Enumerate the static libs, add them to BUNDLED_LIBS, + # and prepend the location to PKG_DIRS # # If tools/linuxlibs.R fails to produce libs, this dir won't exist # so don't try (the error message from `ls` would be misleading) @@ -145,7 +150,7 @@ else # TODO: what about non-bundled deps? BUNDLED_LIBS=`cd $LIB_DIR && ls *.a` BUNDLED_LIBS=`echo $BUNDLED_LIBS | sed -E "s/lib(.*)\.a/-l\1/" | sed -e "s/\\.a lib/ -l/g"` - PKG_LIBS="-L$(pwd)/$LIB_DIR $PKG_LIBS $BUNDLED_LIBS" + PKG_DIRS="-L$(pwd)/$LIB_DIR $PKG_DIRS" fi fi fi @@ -182,15 +187,33 @@ if [ $? -eq 0 ] || [ "$UNAME" = "Darwin" ]; then # Always build with arrow on macOS PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ARROW" # Check for features - LIB_DIR=`echo $PKG_LIBS | sed -e 's/ -l.*//' | sed -e 's/^-L//'` - grep 'set(ARROW_S3 "ON")' $LIB_DIR/cmake/arrow/ArrowOptions.cmake >/dev/null 2>&1 + LIB_DIR=`echo $PKG_DIRS | sed -e 's/^-L//'` + ARROW_OPTS_CMAKE="$LIB_DIR/cmake/arrow/ArrowOptions.cmake" + # Check for Arrow Dataset subcomponent + grep 'set(ARROW_DATASET "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 + if [ $? -eq 0 ]; then + PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_DATASET" + PKG_LIBS="-larrow_dataset $PKG_LIBS" + # TODO: what if arrow-dataset has a different -L location than arrow? + fi + # Check for Parquet + grep 'set(ARROW_PARQUET "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 + if [ $? -eq 0 ]; then + PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_PARQUET" + PKG_LIBS="-lparquet $PKG_LIBS" + # TODO: what if parquet has a different -L location than arrow? + fi + # Check for S3 + grep 'set(ARROW_S3 "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 if [ $? -eq 0 ]; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_S3" if [ "$BUNDLED_LIBS" != "" ]; then # We're depending on openssl/curl from the system, so they're not in the bundled deps - PKG_LIBS="$PKG_LIBS -lssl -lcrypto -lcurl" + BUNDLED_LIBS="$BUNDLED_LIBS -lssl -lcrypto -lcurl" fi fi + # prepend PKG_DIRS and append BUNDLED_LIBS to PKG_LIBS + PKG_LIBS="$PKG_DIRS $PKG_LIBS $BUNDLED_LIBS" echo "PKG_CFLAGS=$PKG_CFLAGS" echo "PKG_LIBS=$PKG_LIBS" else diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R index a287569a53b..528756fc398 100644 --- a/r/data-raw/codegen.R +++ b/r/data-raw/codegen.R @@ -41,7 +41,7 @@ # #if defined(ARROW_R_WITH_FEATURE) # and each feature is written to its own set of export files. -features <- c("arrow", "s3") +features <- c("arrow", "dataset", "parquet", "s3") suppressPackageStartupMessages({ library(decor) @@ -194,10 +194,16 @@ arrow_exports_cpp <- glue::glue(' {feature_available("arrow")} +{feature_available("dataset")} + +{feature_available("parquet")} + {feature_available("s3")} static const R_CallMethodDef CallEntries[] = {{ \t\t{{ "_arrow_available", (DL_FUNC)& _arrow_available, 0 }}, +\t\t{{ "_dataset_available", (DL_FUNC)& _dataset_available, 0 }}, +\t\t{{ "_parquet_available", (DL_FUNC)& _parquet_available, 0 }}, \t\t{{ "_s3_available", (DL_FUNC)& _s3_available, 0 }}, {cpp_functions_registration} {classes_finalizers_registration} diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd index 13682a7ee35..24918263395 100644 --- a/r/man/ParquetFileReader.Rd +++ b/r/man/ParquetFileReader.Rd @@ -46,13 +46,15 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat \examples{ \donttest{ -f <- system.file("v0.7.1.parquet", package="arrow") -pq <- ParquetFileReader$create(f) -pq$GetSchema() -if (codec_is_available("snappy")) { - # This file has compressed data columns - tab <- pq$ReadTable() - tab$schema +if (arrow_with_parquet()) { + f <- system.file("v0.7.1.parquet", package="arrow") + pq <- ParquetFileReader$create(f) + pq$GetSchema() + if (codec_is_available("snappy")) { + # This file has compressed data columns + tab <- pq$ReadTable() + tab$schema + } } } } diff --git a/r/man/arrow_available.Rd b/r/man/arrow_available.Rd index bca7e684654..fdb23dfba30 100644 --- a/r/man/arrow_available.Rd +++ b/r/man/arrow_available.Rd @@ -2,17 +2,28 @@ % Please edit documentation in R/arrow-package.R \name{arrow_available} \alias{arrow_available} +\alias{arrow_with_dataset} +\alias{arrow_with_parquet} \alias{arrow_with_s3} \title{Is the C++ Arrow library available?} \usage{ arrow_available() +arrow_with_dataset() + +arrow_with_parquet() + arrow_with_s3() } \value{ \code{TRUE} or \code{FALSE} depending on whether the package was installed -with the Arrow C++ library (check with \code{arrow_available()}) or with S3 -support enabled (check with \code{arrow_with_s3()}). +with: +\itemize{ +\item The Arrow C++ library (check with \code{arrow_available()}) +\item Arrow Dataset support enabled (check with \code{arrow_with_dataset()}) +\item Parquet support enabled (check with \code{arrow_with_parquet()}) +\item Amazon S3 support enabled (check with \code{arrow_with_s3()}) +} } \description{ You won't generally need to call these function, but they're made available @@ -20,10 +31,12 @@ for diagnostic purposes. } \examples{ arrow_available() +arrow_with_dataset() +arrow_with_parquet() arrow_with_s3() } \seealso{ -If either of these are \code{FALSE}, see +If any of these are \code{FALSE}, see \code{vignette("install", package = "arrow")} for guidance on reinstalling the package. } diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index 4e9b526b369..5558d811a31 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -41,10 +41,12 @@ This function enables you to read Parquet files into R. } \examples{ \donttest{ -tf <- tempfile() -on.exit(unlink(tf)) -write_parquet(mtcars, tf) -df <- read_parquet(tf, col_select = starts_with("d")) -head(df) +if (arrow_with_parquet()) { + tf <- tempfile() + on.exit(unlink(tf)) + write_parquet(mtcars, tf) + df <- read_parquet(tf, col_select = starts_with("d")) + head(df) +} } } diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index d0e4f24dc46..63f3ab126bd 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -96,13 +96,15 @@ Note that "uncompressed" columns may still have dictionary encoding. } \examples{ \donttest{ -tf1 <- tempfile(fileext = ".parquet") -write_parquet(data.frame(x = 1:5), tf1) - -# using compression -if (codec_is_available("gzip")) { - tf2 <- tempfile(fileext = ".gz.parquet") - write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) +if (arrow_with_parquet()) { + tf1 <- tempfile(fileext = ".parquet") + write_parquet(data.frame(x = 1:5), tf1) + + # using compression + if (codec_is_available("gzip")) { + tf2 <- tempfile(fileext = ".gz.parquet") + write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) + } } } } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 5cbebdf4562..aa1f34ee228 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -705,6 +705,7 @@ BEGIN_CPP11 END_CPP11 } // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___Dataset__NewScan(const std::shared_ptr& ds); extern "C" SEXP _arrow_dataset___Dataset__NewScan(SEXP ds_sexp){ BEGIN_CPP11 @@ -712,7 +713,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___Dataset__NewScan(ds)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Dataset__NewScan(SEXP ds_sexp){ + Rf_error("Cannot call dataset___Dataset__NewScan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___Dataset__schema(const std::shared_ptr& dataset); extern "C" SEXP _arrow_dataset___Dataset__schema(SEXP dataset_sexp){ BEGIN_CPP11 @@ -720,7 +728,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___Dataset__schema(dataset)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Dataset__schema(SEXP dataset_sexp){ + Rf_error("Cannot call dataset___Dataset__schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::string dataset___Dataset__type_name(const std::shared_ptr& dataset); extern "C" SEXP _arrow_dataset___Dataset__type_name(SEXP dataset_sexp){ BEGIN_CPP11 @@ -728,7 +743,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___Dataset__type_name(dataset)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Dataset__type_name(SEXP dataset_sexp){ + Rf_error("Cannot call dataset___Dataset__type_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___Dataset__ReplaceSchema(const std::shared_ptr& dataset, const std::shared_ptr& schm); extern "C" SEXP _arrow_dataset___Dataset__ReplaceSchema(SEXP dataset_sexp, SEXP schm_sexp){ BEGIN_CPP11 @@ -737,7 +759,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___Dataset__ReplaceSchema(dataset, schm)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Dataset__ReplaceSchema(SEXP dataset_sexp, SEXP schm_sexp){ + Rf_error("Cannot call dataset___Dataset__ReplaceSchema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___UnionDataset__create(const ds::DatasetVector& datasets, const std::shared_ptr& schm); extern "C" SEXP _arrow_dataset___UnionDataset__create(SEXP datasets_sexp, SEXP schm_sexp){ BEGIN_CPP11 @@ -746,7 +775,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___UnionDataset__create(datasets, schm)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___UnionDataset__create(SEXP datasets_sexp, SEXP schm_sexp){ + Rf_error("Cannot call dataset___UnionDataset__create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___InMemoryDataset__create(const std::shared_ptr& table); extern "C" SEXP _arrow_dataset___InMemoryDataset__create(SEXP table_sexp){ BEGIN_CPP11 @@ -754,7 +790,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___InMemoryDataset__create(table)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___InMemoryDataset__create(SEXP table_sexp){ + Rf_error("Cannot call dataset___InMemoryDataset__create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) cpp11::list dataset___UnionDataset__children(const std::shared_ptr& ds); extern "C" SEXP _arrow_dataset___UnionDataset__children(SEXP ds_sexp){ BEGIN_CPP11 @@ -762,7 +805,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___UnionDataset__children(ds)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___UnionDataset__children(SEXP ds_sexp){ + Rf_error("Cannot call dataset___UnionDataset__children(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___FileSystemDataset__format(const std::shared_ptr& dataset); extern "C" SEXP _arrow_dataset___FileSystemDataset__format(SEXP dataset_sexp){ BEGIN_CPP11 @@ -770,7 +820,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileSystemDataset__format(dataset)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileSystemDataset__format(SEXP dataset_sexp){ + Rf_error("Cannot call dataset___FileSystemDataset__format(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___FileSystemDataset__filesystem(const std::shared_ptr& dataset); extern "C" SEXP _arrow_dataset___FileSystemDataset__filesystem(SEXP dataset_sexp){ BEGIN_CPP11 @@ -778,7 +835,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileSystemDataset__filesystem(dataset)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileSystemDataset__filesystem(SEXP dataset_sexp){ + Rf_error("Cannot call dataset___FileSystemDataset__filesystem(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::vector dataset___FileSystemDataset__files(const std::shared_ptr& dataset); extern "C" SEXP _arrow_dataset___FileSystemDataset__files(SEXP dataset_sexp){ BEGIN_CPP11 @@ -786,7 +850,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileSystemDataset__files(dataset)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileSystemDataset__files(SEXP dataset_sexp){ + Rf_error("Cannot call dataset___FileSystemDataset__files(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___DatasetFactory__Finish1(const std::shared_ptr& factory, bool unify_schemas); extern "C" SEXP _arrow_dataset___DatasetFactory__Finish1(SEXP factory_sexp, SEXP unify_schemas_sexp){ BEGIN_CPP11 @@ -795,7 +866,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___DatasetFactory__Finish1(factory, unify_schemas)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___DatasetFactory__Finish1(SEXP factory_sexp, SEXP unify_schemas_sexp){ + Rf_error("Cannot call dataset___DatasetFactory__Finish1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___DatasetFactory__Finish2(const std::shared_ptr& factory, const std::shared_ptr& schema); extern "C" SEXP _arrow_dataset___DatasetFactory__Finish2(SEXP factory_sexp, SEXP schema_sexp){ BEGIN_CPP11 @@ -804,7 +882,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___DatasetFactory__Finish2(factory, schema)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___DatasetFactory__Finish2(SEXP factory_sexp, SEXP schema_sexp){ + Rf_error("Cannot call dataset___DatasetFactory__Finish2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___DatasetFactory__Inspect(const std::shared_ptr& factory, bool unify_schemas); extern "C" SEXP _arrow_dataset___DatasetFactory__Inspect(SEXP factory_sexp, SEXP unify_schemas_sexp){ BEGIN_CPP11 @@ -813,7 +898,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___DatasetFactory__Inspect(factory, unify_schemas)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___DatasetFactory__Inspect(SEXP factory_sexp, SEXP unify_schemas_sexp){ + Rf_error("Cannot call dataset___DatasetFactory__Inspect(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___UnionDatasetFactory__Make(const std::vector>& children); extern "C" SEXP _arrow_dataset___UnionDatasetFactory__Make(SEXP children_sexp){ BEGIN_CPP11 @@ -821,7 +913,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___UnionDatasetFactory__Make(children)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___UnionDatasetFactory__Make(SEXP children_sexp){ + Rf_error("Cannot call dataset___UnionDatasetFactory__Make(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___FileSystemDatasetFactory__Make2(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format, const std::shared_ptr& partitioning); extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make2(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp, SEXP partitioning_sexp){ BEGIN_CPP11 @@ -832,7 +931,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileSystemDatasetFactory__Make2(fs, selector, format, partitioning)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make2(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp, SEXP partitioning_sexp){ + Rf_error("Cannot call dataset___FileSystemDatasetFactory__Make2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___FileSystemDatasetFactory__Make1(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format); extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make1(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp){ BEGIN_CPP11 @@ -842,7 +948,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileSystemDatasetFactory__Make1(fs, selector, format)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make1(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp){ + Rf_error("Cannot call dataset___FileSystemDatasetFactory__Make1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___FileSystemDatasetFactory__Make3(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format, const std::shared_ptr& factory); extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make3(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp, SEXP factory_sexp){ BEGIN_CPP11 @@ -853,7 +966,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileSystemDatasetFactory__Make3(fs, selector, format, factory)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make3(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp, SEXP factory_sexp){ + Rf_error("Cannot call dataset___FileSystemDatasetFactory__Make3(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::string dataset___FileFormat__type_name(const std::shared_ptr& format); extern "C" SEXP _arrow_dataset___FileFormat__type_name(SEXP format_sexp){ BEGIN_CPP11 @@ -861,7 +981,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileFormat__type_name(format)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileFormat__type_name(SEXP format_sexp){ + Rf_error("Cannot call dataset___FileFormat__type_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___FileFormat__DefaultWriteOptions(const std::shared_ptr& fmt); extern "C" SEXP _arrow_dataset___FileFormat__DefaultWriteOptions(SEXP fmt_sexp){ BEGIN_CPP11 @@ -869,7 +996,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileFormat__DefaultWriteOptions(fmt)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileFormat__DefaultWriteOptions(SEXP fmt_sexp){ + Rf_error("Cannot call dataset___FileFormat__DefaultWriteOptions(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___ParquetFileFormat__Make(bool use_buffered_stream, int64_t buffer_size, cpp11::strings dict_columns); extern "C" SEXP _arrow_dataset___ParquetFileFormat__Make(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP dict_columns_sexp){ BEGIN_CPP11 @@ -879,7 +1013,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___ParquetFileFormat__Make(use_buffered_stream, buffer_size, dict_columns)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ParquetFileFormat__Make(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP dict_columns_sexp){ + Rf_error("Cannot call dataset___ParquetFileFormat__Make(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::string dataset___FileWriteOptions__type_name(const std::shared_ptr& options); extern "C" SEXP _arrow_dataset___FileWriteOptions__type_name(SEXP options_sexp){ BEGIN_CPP11 @@ -887,7 +1028,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___FileWriteOptions__type_name(options)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___FileWriteOptions__type_name(SEXP options_sexp){ + Rf_error("Cannot call dataset___FileWriteOptions__type_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___ParquetFileWriteOptions__update(const std::shared_ptr& options, const std::shared_ptr& writer_props, const std::shared_ptr& arrow_writer_props); extern "C" SEXP _arrow_dataset___ParquetFileWriteOptions__update(SEXP options_sexp, SEXP writer_props_sexp, SEXP arrow_writer_props_sexp){ BEGIN_CPP11 @@ -898,7 +1046,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ParquetFileWriteOptions__update(SEXP options_sexp, SEXP writer_props_sexp, SEXP arrow_writer_props_sexp){ + Rf_error("Cannot call dataset___ParquetFileWriteOptions__update(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___IpcFileWriteOptions__update2(const std::shared_ptr& ipc_options, bool use_legacy_format, const std::shared_ptr& codec, arrow::ipc::MetadataVersion metadata_version); extern "C" SEXP _arrow_dataset___IpcFileWriteOptions__update2(SEXP ipc_options_sexp, SEXP use_legacy_format_sexp, SEXP codec_sexp, SEXP metadata_version_sexp){ BEGIN_CPP11 @@ -910,7 +1065,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___IpcFileWriteOptions__update2(SEXP ipc_options_sexp, SEXP use_legacy_format_sexp, SEXP codec_sexp, SEXP metadata_version_sexp){ + Rf_error("Cannot call dataset___IpcFileWriteOptions__update2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___IpcFileWriteOptions__update1(const std::shared_ptr& ipc_options, bool use_legacy_format, arrow::ipc::MetadataVersion metadata_version); extern "C" SEXP _arrow_dataset___IpcFileWriteOptions__update1(SEXP ipc_options_sexp, SEXP use_legacy_format_sexp, SEXP metadata_version_sexp){ BEGIN_CPP11 @@ -921,14 +1083,28 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___IpcFileWriteOptions__update1(SEXP ipc_options_sexp, SEXP use_legacy_format_sexp, SEXP metadata_version_sexp){ + Rf_error("Cannot call dataset___IpcFileWriteOptions__update1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___IpcFileFormat__Make(); extern "C" SEXP _arrow_dataset___IpcFileFormat__Make(){ BEGIN_CPP11 return cpp11::as_sexp(dataset___IpcFileFormat__Make()); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___IpcFileFormat__Make(){ + Rf_error("Cannot call dataset___IpcFileFormat__Make(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___CsvFileFormat__Make(const std::shared_ptr& parse_options); extern "C" SEXP _arrow_dataset___CsvFileFormat__Make(SEXP parse_options_sexp){ BEGIN_CPP11 @@ -936,7 +1112,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___CsvFileFormat__Make(parse_options)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___CsvFileFormat__Make(SEXP parse_options_sexp){ + Rf_error("Cannot call dataset___CsvFileFormat__Make(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___DirectoryPartitioning(const std::shared_ptr& schm); extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp){ BEGIN_CPP11 @@ -944,7 +1127,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___DirectoryPartitioning(schm)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp){ + Rf_error("Cannot call dataset___DirectoryPartitioning(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___DirectoryPartitioning__MakeFactory(const std::vector& field_names); extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp){ BEGIN_CPP11 @@ -952,7 +1142,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___DirectoryPartitioning__MakeFactory(field_names)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp){ + Rf_error("Cannot call dataset___DirectoryPartitioning__MakeFactory(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___HivePartitioning(const std::shared_ptr& schm, const std::string& null_fallback); extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp){ BEGIN_CPP11 @@ -961,7 +1158,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___HivePartitioning(schm, null_fallback)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp){ + Rf_error("Cannot call dataset___HivePartitioning(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___HivePartitioning__MakeFactory(const std::string& null_fallback); extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp){ BEGIN_CPP11 @@ -969,7 +1173,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___HivePartitioning__MakeFactory(null_fallback)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(){ + Rf_error("Cannot call dataset___HivePartitioning__MakeFactory(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___ScannerBuilder__ProjectNames(const std::shared_ptr& sb, const std::vector& cols); extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectNames(SEXP sb_sexp, SEXP cols_sexp){ BEGIN_CPP11 @@ -990,7 +1201,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ScannerBuilder__Project(SEXP sb_sexp, SEXP cols_sexp){ + Rf_error("Cannot call dataset___ScannerBuilder__Project(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___ScannerBuilder__Filter(const std::shared_ptr& sb, const std::shared_ptr& expr); extern "C" SEXP _arrow_dataset___ScannerBuilder__Filter(SEXP sb_sexp, SEXP expr_sexp){ BEGIN_CPP11 @@ -1000,7 +1218,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ScannerBuilder__Filter(SEXP sb_sexp, SEXP expr_sexp){ + Rf_error("Cannot call dataset___ScannerBuilder__Filter(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___ScannerBuilder__UseThreads(const std::shared_ptr& sb, bool threads); extern "C" SEXP _arrow_dataset___ScannerBuilder__UseThreads(SEXP sb_sexp, SEXP threads_sexp){ BEGIN_CPP11 @@ -1010,7 +1235,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ScannerBuilder__UseThreads(SEXP sb_sexp, SEXP threads_sexp){ + Rf_error("Cannot call dataset___ScannerBuilder__UseThreads(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___ScannerBuilder__BatchSize(const std::shared_ptr& sb, int64_t batch_size); extern "C" SEXP _arrow_dataset___ScannerBuilder__BatchSize(SEXP sb_sexp, SEXP batch_size_sexp){ BEGIN_CPP11 @@ -1020,7 +1252,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ScannerBuilder__BatchSize(SEXP sb_sexp, SEXP batch_size_sexp){ + Rf_error("Cannot call dataset___ScannerBuilder__BatchSize(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___ScannerBuilder__schema(const std::shared_ptr& sb); extern "C" SEXP _arrow_dataset___ScannerBuilder__schema(SEXP sb_sexp){ BEGIN_CPP11 @@ -1028,7 +1267,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___ScannerBuilder__schema(sb)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ScannerBuilder__schema(SEXP sb_sexp){ + Rf_error("Cannot call dataset___ScannerBuilder__schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___ScannerBuilder__Finish(const std::shared_ptr& sb); extern "C" SEXP _arrow_dataset___ScannerBuilder__Finish(SEXP sb_sexp){ BEGIN_CPP11 @@ -1036,7 +1282,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___ScannerBuilder__Finish(sb)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ScannerBuilder__Finish(SEXP sb_sexp){ + Rf_error("Cannot call dataset___ScannerBuilder__Finish(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___Scanner__ToTable(const std::shared_ptr& scanner); extern "C" SEXP _arrow_dataset___Scanner__ToTable(SEXP scanner_sexp){ BEGIN_CPP11 @@ -1044,7 +1297,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___Scanner__ToTable(scanner)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Scanner__ToTable(SEXP scanner_sexp){ + Rf_error("Cannot call dataset___Scanner__ToTable(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___Scanner__head(const std::shared_ptr& scanner, int n); extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){ BEGIN_CPP11 @@ -1053,7 +1313,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___Scanner__head(scanner, n)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){ + Rf_error("Cannot call dataset___Scanner__head(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) cpp11::list dataset___Scanner__Scan(const std::shared_ptr& scanner); extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){ BEGIN_CPP11 @@ -1061,7 +1328,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___Scanner__Scan(scanner)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){ + Rf_error("Cannot call dataset___Scanner__Scan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___Scanner__schema(const std::shared_ptr& sc); extern "C" SEXP _arrow_dataset___Scanner__schema(SEXP sc_sexp){ BEGIN_CPP11 @@ -1069,7 +1343,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___Scanner__schema(sc)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Scanner__schema(SEXP sc_sexp){ + Rf_error("Cannot call dataset___Scanner__schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) cpp11::list dataset___ScanTask__get_batches(const std::shared_ptr& scan_task); extern "C" SEXP _arrow_dataset___ScanTask__get_batches(SEXP scan_task_sexp){ BEGIN_CPP11 @@ -1077,7 +1358,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___ScanTask__get_batches(scan_task)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ScanTask__get_batches(SEXP scan_task_sexp){ + Rf_error("Cannot call dataset___ScanTask__get_batches(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___Dataset__Write(const std::shared_ptr& file_write_options, const std::shared_ptr& filesystem, std::string base_dir, const std::shared_ptr& partitioning, std::string basename_template, const std::shared_ptr& scanner); extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP file_write_options_sexp, SEXP filesystem_sexp, SEXP base_dir_sexp, SEXP partitioning_sexp, SEXP basename_template_sexp, SEXP scanner_sexp){ BEGIN_CPP11 @@ -1091,6 +1379,12 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP file_write_options_sexp, SEXP filesystem_sexp, SEXP base_dir_sexp, SEXP partitioning_sexp, SEXP basename_template_sexp, SEXP scanner_sexp){ + Rf_error("Cannot call dataset___Dataset__Write(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // datatype.cpp std::shared_ptr Int8__initialize(); extern "C" SEXP _arrow_Int8__initialize(){ @@ -1537,6 +1831,7 @@ BEGIN_CPP11 END_CPP11 } // expression.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options); extern "C" SEXP _arrow_dataset___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){ BEGIN_CPP11 @@ -1546,7 +1841,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___expr__call(func_name, argument_list, options)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){ + Rf_error("Cannot call dataset___expr__call(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // expression.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___expr__field_ref(std::string name); extern "C" SEXP _arrow_dataset___expr__field_ref(SEXP name_sexp){ BEGIN_CPP11 @@ -1554,7 +1856,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___expr__field_ref(name)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___expr__field_ref(SEXP name_sexp){ + Rf_error("Cannot call dataset___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // expression.cpp +#if defined(ARROW_R_WITH_DATASET) std::string dataset___expr__get_field_ref_name(const std::shared_ptr& ref); extern "C" SEXP _arrow_dataset___expr__get_field_ref_name(SEXP ref_sexp){ BEGIN_CPP11 @@ -1562,7 +1871,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___expr__get_field_ref_name(ref)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___expr__get_field_ref_name(SEXP ref_sexp){ + Rf_error("Cannot call dataset___expr__get_field_ref_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // expression.cpp +#if defined(ARROW_R_WITH_DATASET) std::shared_ptr dataset___expr__scalar(const std::shared_ptr& x); extern "C" SEXP _arrow_dataset___expr__scalar(SEXP x_sexp){ BEGIN_CPP11 @@ -1570,7 +1886,14 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___expr__scalar(x)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___expr__scalar(SEXP x_sexp){ + Rf_error("Cannot call dataset___expr__scalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // expression.cpp +#if defined(ARROW_R_WITH_DATASET) std::string dataset___expr__ToString(const std::shared_ptr& x); extern "C" SEXP _arrow_dataset___expr__ToString(SEXP x_sexp){ BEGIN_CPP11 @@ -1578,6 +1901,12 @@ BEGIN_CPP11 return cpp11::as_sexp(dataset___expr__ToString(x)); END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___expr__ToString(SEXP x_sexp){ + Rf_error("Cannot call dataset___expr__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // feather.cpp void ipc___WriteFeather__Table(const std::shared_ptr& stream, const std::shared_ptr& table, int version, int chunk_size, arrow::Compression::type compression, int compression_level); extern "C" SEXP _arrow_ipc___WriteFeather__Table(SEXP stream_sexp, SEXP table_sexp, SEXP version_sexp, SEXP chunk_size_sexp, SEXP compression_sexp, SEXP compression_level_sexp){ @@ -2391,6 +2720,7 @@ BEGIN_CPP11 END_CPP11 } // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___ArrowReaderProperties__Make(bool use_threads); extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__Make(SEXP use_threads_sexp){ BEGIN_CPP11 @@ -2398,7 +2728,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___ArrowReaderProperties__Make(use_threads)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__Make(SEXP use_threads_sexp){ + Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__Make(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___arrow___ArrowReaderProperties__set_use_threads(const std::shared_ptr& properties, bool use_threads); extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_use_threads(SEXP properties_sexp, SEXP use_threads_sexp){ BEGIN_CPP11 @@ -2408,7 +2745,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_use_threads(SEXP properties_sexp, SEXP use_threads_sexp){ + Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__set_use_threads(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) bool parquet___arrow___ArrowReaderProperties__get_use_threads(const std::shared_ptr& properties, bool use_threads); extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_use_threads(SEXP properties_sexp, SEXP use_threads_sexp){ BEGIN_CPP11 @@ -2417,7 +2761,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___ArrowReaderProperties__get_use_threads(properties, use_threads)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_use_threads(SEXP properties_sexp, SEXP use_threads_sexp){ + Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__get_use_threads(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) bool parquet___arrow___ArrowReaderProperties__get_read_dictionary(const std::shared_ptr& properties, int column_index); extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary(SEXP properties_sexp, SEXP column_index_sexp){ BEGIN_CPP11 @@ -2426,7 +2777,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___ArrowReaderProperties__get_read_dictionary(properties, column_index)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary(SEXP properties_sexp, SEXP column_index_sexp){ + Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__get_read_dictionary(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___arrow___ArrowReaderProperties__set_read_dictionary(const std::shared_ptr& properties, int column_index, bool read_dict); extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary(SEXP properties_sexp, SEXP column_index_sexp, SEXP read_dict_sexp){ BEGIN_CPP11 @@ -2437,7 +2795,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary(SEXP properties_sexp, SEXP column_index_sexp, SEXP read_dict_sexp){ + Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__set_read_dictionary(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__OpenFile(const std::shared_ptr& file, const std::shared_ptr& props); extern "C" SEXP _arrow_parquet___arrow___FileReader__OpenFile(SEXP file_sexp, SEXP props_sexp){ BEGIN_CPP11 @@ -2446,7 +2811,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__OpenFile(file, props)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__OpenFile(SEXP file_sexp, SEXP props_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__OpenFile(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__ReadTable1(const std::shared_ptr& reader); extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadTable1(SEXP reader_sexp){ BEGIN_CPP11 @@ -2454,7 +2826,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__ReadTable1(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadTable1(SEXP reader_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__ReadTable1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__ReadTable2(const std::shared_ptr& reader, const std::vector& column_indices); extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadTable2(SEXP reader_sexp, SEXP column_indices_sexp){ BEGIN_CPP11 @@ -2463,7 +2842,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__ReadTable2(reader, column_indices)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadTable2(SEXP reader_sexp, SEXP column_indices_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__ReadTable2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__ReadRowGroup1(const std::shared_ptr& reader, int i); extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadRowGroup1(SEXP reader_sexp, SEXP i_sexp){ BEGIN_CPP11 @@ -2472,7 +2858,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__ReadRowGroup1(reader, i)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadRowGroup1(SEXP reader_sexp, SEXP i_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__ReadRowGroup1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__ReadRowGroup2(const std::shared_ptr& reader, int i, const std::vector& column_indices); extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadRowGroup2(SEXP reader_sexp, SEXP i_sexp, SEXP column_indices_sexp){ BEGIN_CPP11 @@ -2482,7 +2875,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__ReadRowGroup2(reader, i, column_indices)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadRowGroup2(SEXP reader_sexp, SEXP i_sexp, SEXP column_indices_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__ReadRowGroup2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__ReadRowGroups1(const std::shared_ptr& reader, const std::vector& row_groups); extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadRowGroups1(SEXP reader_sexp, SEXP row_groups_sexp){ BEGIN_CPP11 @@ -2491,7 +2891,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__ReadRowGroups1(reader, row_groups)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadRowGroups1(SEXP reader_sexp, SEXP row_groups_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__ReadRowGroups1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__ReadRowGroups2(const std::shared_ptr& reader, const std::vector& row_groups, const std::vector& column_indices); extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadRowGroups2(SEXP reader_sexp, SEXP row_groups_sexp, SEXP column_indices_sexp){ BEGIN_CPP11 @@ -2501,7 +2908,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__ReadRowGroups2(reader, row_groups, column_indices)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadRowGroups2(SEXP reader_sexp, SEXP row_groups_sexp, SEXP column_indices_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__ReadRowGroups2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) int64_t parquet___arrow___FileReader__num_rows(const std::shared_ptr& reader); extern "C" SEXP _arrow_parquet___arrow___FileReader__num_rows(SEXP reader_sexp){ BEGIN_CPP11 @@ -2509,7 +2923,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__num_rows(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__num_rows(SEXP reader_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__num_rows(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) int parquet___arrow___FileReader__num_columns(const std::shared_ptr& reader); extern "C" SEXP _arrow_parquet___arrow___FileReader__num_columns(SEXP reader_sexp){ BEGIN_CPP11 @@ -2517,7 +2938,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__num_columns(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__num_columns(SEXP reader_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__num_columns(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) int parquet___arrow___FileReader__num_row_groups(const std::shared_ptr& reader); extern "C" SEXP _arrow_parquet___arrow___FileReader__num_row_groups(SEXP reader_sexp){ BEGIN_CPP11 @@ -2525,7 +2953,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__num_row_groups(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__num_row_groups(SEXP reader_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__num_row_groups(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__ReadColumn(const std::shared_ptr& reader, int i); extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadColumn(SEXP reader_sexp, SEXP i_sexp){ BEGIN_CPP11 @@ -2534,7 +2969,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__ReadColumn(reader, i)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__ReadColumn(SEXP reader_sexp, SEXP i_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__ReadColumn(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___ArrowWriterProperties___create(bool allow_truncated_timestamps, bool use_deprecated_int96_timestamps, int timestamp_unit); extern "C" SEXP _arrow_parquet___ArrowWriterProperties___create(SEXP allow_truncated_timestamps_sexp, SEXP use_deprecated_int96_timestamps_sexp, SEXP timestamp_unit_sexp){ BEGIN_CPP11 @@ -2544,14 +2986,28 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___ArrowWriterProperties___create(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___ArrowWriterProperties___create(SEXP allow_truncated_timestamps_sexp, SEXP use_deprecated_int96_timestamps_sexp, SEXP timestamp_unit_sexp){ + Rf_error("Cannot call parquet___ArrowWriterProperties___create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___WriterProperties___Builder__create(); extern "C" SEXP _arrow_parquet___WriterProperties___Builder__create(){ BEGIN_CPP11 return cpp11::as_sexp(parquet___WriterProperties___Builder__create()); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___WriterProperties___Builder__create(){ + Rf_error("Cannot call parquet___WriterProperties___Builder__create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___WriterProperties___Builder__version(const std::shared_ptr& builder, const parquet::ParquetVersion::type& version); extern "C" SEXP _arrow_parquet___WriterProperties___Builder__version(SEXP builder_sexp, SEXP version_sexp){ BEGIN_CPP11 @@ -2561,7 +3017,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___WriterProperties___Builder__version(SEXP builder_sexp, SEXP version_sexp){ + Rf_error("Cannot call parquet___WriterProperties___Builder__version(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___ArrowWriterProperties___Builder__set_compressions(const std::shared_ptr& builder, const std::vector& paths, cpp11::integers types); extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__set_compressions(SEXP builder_sexp, SEXP paths_sexp, SEXP types_sexp){ BEGIN_CPP11 @@ -2572,7 +3035,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__set_compressions(SEXP builder_sexp, SEXP paths_sexp, SEXP types_sexp){ + Rf_error("Cannot call parquet___ArrowWriterProperties___Builder__set_compressions(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___ArrowWriterProperties___Builder__set_compression_levels(const std::shared_ptr& builder, const std::vector& paths, cpp11::integers levels); extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels(SEXP builder_sexp, SEXP paths_sexp, SEXP levels_sexp){ BEGIN_CPP11 @@ -2583,7 +3053,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels(SEXP builder_sexp, SEXP paths_sexp, SEXP levels_sexp){ + Rf_error("Cannot call parquet___ArrowWriterProperties___Builder__set_compression_levels(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___ArrowWriterProperties___Builder__set_use_dictionary(const std::shared_ptr& builder, const std::vector& paths, cpp11::logicals use_dictionary); extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary(SEXP builder_sexp, SEXP paths_sexp, SEXP use_dictionary_sexp){ BEGIN_CPP11 @@ -2594,7 +3071,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary(SEXP builder_sexp, SEXP paths_sexp, SEXP use_dictionary_sexp){ + Rf_error("Cannot call parquet___ArrowWriterProperties___Builder__set_use_dictionary(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___ArrowWriterProperties___Builder__set_write_statistics(const std::shared_ptr& builder, const std::vector& paths, cpp11::logicals write_statistics); extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics(SEXP builder_sexp, SEXP paths_sexp, SEXP write_statistics_sexp){ BEGIN_CPP11 @@ -2605,7 +3089,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics(SEXP builder_sexp, SEXP paths_sexp, SEXP write_statistics_sexp){ + Rf_error("Cannot call parquet___ArrowWriterProperties___Builder__set_write_statistics(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___ArrowWriterProperties___Builder__data_page_size(const std::shared_ptr& builder, int64_t data_page_size); extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__data_page_size(SEXP builder_sexp, SEXP data_page_size_sexp){ BEGIN_CPP11 @@ -2615,7 +3106,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___ArrowWriterProperties___Builder__data_page_size(SEXP builder_sexp, SEXP data_page_size_sexp){ + Rf_error("Cannot call parquet___ArrowWriterProperties___Builder__data_page_size(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___WriterProperties___Builder__build(const std::shared_ptr& builder); extern "C" SEXP _arrow_parquet___WriterProperties___Builder__build(SEXP builder_sexp){ BEGIN_CPP11 @@ -2623,7 +3121,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___WriterProperties___Builder__build(builder)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___WriterProperties___Builder__build(SEXP builder_sexp){ + Rf_error("Cannot call parquet___WriterProperties___Builder__build(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___ParquetFileWriter__Open(const std::shared_ptr& schema, const std::shared_ptr& sink, const std::shared_ptr& properties, const std::shared_ptr& arrow_properties); extern "C" SEXP _arrow_parquet___arrow___ParquetFileWriter__Open(SEXP schema_sexp, SEXP sink_sexp, SEXP properties_sexp, SEXP arrow_properties_sexp){ BEGIN_CPP11 @@ -2634,7 +3139,14 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___ParquetFileWriter__Open(schema, sink, properties, arrow_properties)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___ParquetFileWriter__Open(SEXP schema_sexp, SEXP sink_sexp, SEXP properties_sexp, SEXP arrow_properties_sexp){ + Rf_error("Cannot call parquet___arrow___ParquetFileWriter__Open(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___arrow___FileWriter__WriteTable(const std::shared_ptr& writer, const std::shared_ptr& table, int64_t chunk_size); extern "C" SEXP _arrow_parquet___arrow___FileWriter__WriteTable(SEXP writer_sexp, SEXP table_sexp, SEXP chunk_size_sexp){ BEGIN_CPP11 @@ -2645,7 +3157,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileWriter__WriteTable(SEXP writer_sexp, SEXP table_sexp, SEXP chunk_size_sexp){ + Rf_error("Cannot call parquet___arrow___FileWriter__WriteTable(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___arrow___FileWriter__Close(const std::shared_ptr& writer); extern "C" SEXP _arrow_parquet___arrow___FileWriter__Close(SEXP writer_sexp){ BEGIN_CPP11 @@ -2654,7 +3173,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileWriter__Close(SEXP writer_sexp){ + Rf_error("Cannot call parquet___arrow___FileWriter__Close(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) void parquet___arrow___WriteTable(const std::shared_ptr& table, const std::shared_ptr& sink, const std::shared_ptr& properties, const std::shared_ptr& arrow_properties); extern "C" SEXP _arrow_parquet___arrow___WriteTable(SEXP table_sexp, SEXP sink_sexp, SEXP properties_sexp, SEXP arrow_properties_sexp){ BEGIN_CPP11 @@ -2666,7 +3192,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___WriteTable(SEXP table_sexp, SEXP sink_sexp, SEXP properties_sexp, SEXP arrow_properties_sexp){ + Rf_error("Cannot call parquet___arrow___WriteTable(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__GetSchema(const std::shared_ptr& reader); extern "C" SEXP _arrow_parquet___arrow___FileReader__GetSchema(SEXP reader_sexp){ BEGIN_CPP11 @@ -2674,6 +3207,12 @@ BEGIN_CPP11 return cpp11::as_sexp(parquet___arrow___FileReader__GetSchema(reader)); END_CPP11 } +#else +extern "C" SEXP _arrow_parquet___arrow___FileReader__GetSchema(SEXP reader_sexp){ + Rf_error("Cannot call parquet___arrow___FileReader__GetSchema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // py-to-r.cpp std::shared_ptr ImportArray(arrow::r::Pointer array, arrow::r::Pointer schema); extern "C" SEXP _arrow_ImportArray(SEXP array_sexp, SEXP schema_sexp){ @@ -3524,6 +4063,26 @@ return Rf_ScalarLogical( ); } +extern "C" SEXP _dataset_available() { +return Rf_ScalarLogical( +#if defined(ARROW_R_WITH_DATASET) + TRUE +#else + FALSE +#endif +); +} + +extern "C" SEXP _parquet_available() { +return Rf_ScalarLogical( +#if defined(ARROW_R_WITH_PARQUET) + TRUE +#else + FALSE +#endif +); +} + extern "C" SEXP _s3_available() { return Rf_ScalarLogical( #if defined(ARROW_R_WITH_S3) @@ -3536,6 +4095,8 @@ return Rf_ScalarLogical( static const R_CallMethodDef CallEntries[] = { { "_arrow_available", (DL_FUNC)& _arrow_available, 0 }, + { "_dataset_available", (DL_FUNC)& _dataset_available, 0 }, + { "_parquet_available", (DL_FUNC)& _parquet_available, 0 }, { "_s3_available", (DL_FUNC)& _s3_available, 0 }, { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index b37c01c7621..b94ab764729 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -35,16 +35,26 @@ #include #include #include + +#if defined(ARROW_R_WITH_DATASET) #include +#endif + #include #include #include #include #include #include + +#if defined(ARROW_R_WITH_PARQUET) #include +#endif +#if defined(ARROW_R_WITH_DATASET) namespace ds = ::arrow::dataset; +#endif + namespace fs = ::arrow::fs; SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); @@ -169,12 +179,14 @@ R6_CLASS_NAME(arrow::csv::ParseOptions, "CsvParseOptions"); R6_CLASS_NAME(arrow::csv::ConvertOptions, "CsvConvertOptions"); R6_CLASS_NAME(arrow::csv::TableReader, "CsvTableReader"); +#if defined(ARROW_R_WITH_PARQUET) R6_CLASS_NAME(parquet::ArrowReaderProperties, "ParquetArrowReaderProperties"); R6_CLASS_NAME(parquet::ArrowWriterProperties, "ParquetArrowWriterProperties"); R6_CLASS_NAME(parquet::WriterProperties, "ParquetWriterProperties"); R6_CLASS_NAME(parquet::arrow::FileReader, "ParquetFileReader"); R6_CLASS_NAME(parquet::WriterPropertiesBuilder, "ParquetWriterPropertiesBuilder"); R6_CLASS_NAME(parquet::arrow::FileWriter, "ParquetFileWriter"); +#endif R6_CLASS_NAME(arrow::ipc::feather::Reader, "FeatherReader"); @@ -206,6 +218,8 @@ struct r6_class_name { static const char* get(const std::shared_ptr&); }; +#if defined(ARROW_R_WITH_DATASET) + template <> struct r6_class_name { static const char* get(const std::shared_ptr&); @@ -216,6 +230,8 @@ struct r6_class_name { static const char* get(const std::shared_ptr&); }; +#endif + } // namespace cpp11 #endif diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp index f8c24217ce3..001cd9da0f6 100644 --- a/r/src/dataset.cpp +++ b/r/src/dataset.cpp @@ -17,7 +17,7 @@ #include "./arrow_types.h" -#if defined(ARROW_R_WITH_ARROW) +#if defined(ARROW_R_WITH_DATASET) #include #include @@ -63,7 +63,7 @@ const char* r6_class_name::get( // Dataset, UnionDataset, FileSystemDataset -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___Dataset__NewScan( const std::shared_ptr& ds) { auto context = std::make_shared(); @@ -71,55 +71,55 @@ std::shared_ptr dataset___Dataset__NewScan( return ValueOrStop(ds->NewScan(std::move(context))); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___Dataset__schema( const std::shared_ptr& dataset) { return dataset->schema(); } -// [[arrow::export]] +// [[dataset::export]] std::string dataset___Dataset__type_name(const std::shared_ptr& dataset) { return dataset->type_name(); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___Dataset__ReplaceSchema( const std::shared_ptr& dataset, const std::shared_ptr& schm) { return ValueOrStop(dataset->ReplaceSchema(schm)); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___UnionDataset__create( const ds::DatasetVector& datasets, const std::shared_ptr& schm) { return ValueOrStop(ds::UnionDataset::Make(schm, datasets)); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___InMemoryDataset__create( const std::shared_ptr& table) { return std::make_shared(table); } -// [[arrow::export]] +// [[dataset::export]] cpp11::list dataset___UnionDataset__children( const std::shared_ptr& ds) { return arrow::r::to_r_list(ds->children()); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___FileSystemDataset__format( const std::shared_ptr& dataset) { return dataset->format(); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___FileSystemDataset__filesystem( const std::shared_ptr& dataset) { return dataset->filesystem(); } -// [[arrow::export]] +// [[dataset::export]] std::vector dataset___FileSystemDataset__files( const std::shared_ptr& dataset) { return dataset->files(); @@ -127,7 +127,7 @@ std::vector dataset___FileSystemDataset__files( // DatasetFactory, UnionDatasetFactory, FileSystemDatasetFactory -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___DatasetFactory__Finish1( const std::shared_ptr& factory, bool unify_schemas) { ds::FinishOptions opts; @@ -137,14 +137,14 @@ std::shared_ptr dataset___DatasetFactory__Finish1( return ValueOrStop(factory->Finish(opts)); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___DatasetFactory__Finish2( const std::shared_ptr& factory, const std::shared_ptr& schema) { return ValueOrStop(factory->Finish(schema)); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___DatasetFactory__Inspect( const std::shared_ptr& factory, bool unify_schemas) { ds::InspectOptions opts; @@ -154,13 +154,13 @@ std::shared_ptr dataset___DatasetFactory__Inspect( return ValueOrStop(factory->Inspect(opts)); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___UnionDatasetFactory__Make( const std::vector>& children) { return ValueOrStop(ds::UnionDatasetFactory::Make(children)); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___FileSystemDatasetFactory__Make2( const std::shared_ptr& fs, const std::shared_ptr& selector, @@ -176,7 +176,7 @@ std::shared_ptr dataset___FileSystemDatasetFactory ValueOrStop(ds::FileSystemDatasetFactory::Make(fs, *selector, format, options))); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___FileSystemDatasetFactory__Make1( const std::shared_ptr& fs, const std::shared_ptr& selector, @@ -184,7 +184,7 @@ std::shared_ptr dataset___FileSystemDatasetFactory return dataset___FileSystemDatasetFactory__Make2(fs, selector, format, nullptr); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___FileSystemDatasetFactory__Make3( const std::shared_ptr& fs, const std::shared_ptr& selector, @@ -202,19 +202,19 @@ std::shared_ptr dataset___FileSystemDatasetFactory // FileFormat, ParquetFileFormat, IpcFileFormat -// [[arrow::export]] +// [[dataset::export]] std::string dataset___FileFormat__type_name( const std::shared_ptr& format) { return format->type_name(); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___FileFormat__DefaultWriteOptions( const std::shared_ptr& fmt) { return fmt->DefaultWriteOptions(); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___ParquetFileFormat__Make( bool use_buffered_stream, int64_t buffer_size, cpp11::strings dict_columns) { auto fmt = std::make_shared(); @@ -230,13 +230,14 @@ std::shared_ptr dataset___ParquetFileFormat__Make( return fmt; } -// [[arrow::export]] +// [[dataset::export]] std::string dataset___FileWriteOptions__type_name( const std::shared_ptr& options) { return options->type_name(); } -// [[arrow::export]] +#if defined(ARROW_R_WITH_PARQUET) +// [[dataset::export]] void dataset___ParquetFileWriteOptions__update( const std::shared_ptr& options, const std::shared_ptr& writer_props, @@ -244,8 +245,9 @@ void dataset___ParquetFileWriteOptions__update( options->writer_properties = writer_props; options->arrow_writer_properties = arrow_writer_props; } +#endif -// [[arrow::export]] +// [[dataset::export]] void dataset___IpcFileWriteOptions__update2( const std::shared_ptr& ipc_options, bool use_legacy_format, const std::shared_ptr& codec, @@ -255,7 +257,7 @@ void dataset___IpcFileWriteOptions__update2( ipc_options->options->metadata_version = metadata_version; } -// [[arrow::export]] +// [[dataset::export]] void dataset___IpcFileWriteOptions__update1( const std::shared_ptr& ipc_options, bool use_legacy_format, arrow::ipc::MetadataVersion metadata_version) { @@ -263,12 +265,12 @@ void dataset___IpcFileWriteOptions__update1( ipc_options->options->metadata_version = metadata_version; } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___IpcFileFormat__Make() { return std::make_shared(); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___CsvFileFormat__Make( const std::shared_ptr& parse_options) { auto format = std::make_shared(); @@ -278,26 +280,26 @@ std::shared_ptr dataset___CsvFileFormat__Make( // DirectoryPartitioning, HivePartitioning -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___DirectoryPartitioning( const std::shared_ptr& schm) { return std::make_shared(schm); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___DirectoryPartitioning__MakeFactory( const std::vector& field_names) { return ds::DirectoryPartitioning::MakeFactory(field_names); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___HivePartitioning( const std::shared_ptr& schm, const std::string& null_fallback) { std::vector> dictionaries; return std::make_shared(schm, dictionaries, null_fallback); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___HivePartitioning__MakeFactory( const std::string& null_fallback) { ds::HivePartitioningFactoryOptions options; @@ -307,13 +309,13 @@ std::shared_ptr dataset___HivePartitioning__MakeFactory // ScannerBuilder, Scanner -// [[arrow::export]] +// [[dataset::export]] void dataset___ScannerBuilder__ProjectNames(const std::shared_ptr& sb, const std::vector& cols) { StopIfNotOk(sb->Project(cols)); } -// [[arrow::export]] +// [[dataset::export]] void dataset___ScannerBuilder__ProjectExprs( const std::shared_ptr& sb, const std::vector>& exprs, @@ -326,43 +328,43 @@ void dataset___ScannerBuilder__ProjectExprs( StopIfNotOk(sb->Project(expressions, names)); } -// [[arrow::export]] +// [[dataset::export]] void dataset___ScannerBuilder__Filter(const std::shared_ptr& sb, const std::shared_ptr& expr) { StopIfNotOk(sb->Filter(*expr)); } -// [[arrow::export]] +// [[dataset::export]] void dataset___ScannerBuilder__UseThreads(const std::shared_ptr& sb, bool threads) { StopIfNotOk(sb->UseThreads(threads)); } -// [[arrow::export]] +// [[dataset::export]] void dataset___ScannerBuilder__BatchSize(const std::shared_ptr& sb, int64_t batch_size) { StopIfNotOk(sb->BatchSize(batch_size)); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___ScannerBuilder__schema( const std::shared_ptr& sb) { return sb->schema(); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___ScannerBuilder__Finish( const std::shared_ptr& sb) { return ValueOrStop(sb->Finish()); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___Scanner__ToTable( const std::shared_ptr& scanner) { return ValueOrStop(scanner->ToTable()); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___Scanner__head( const std::shared_ptr& scanner, int n) { // TODO: make this a full Slice with offset > 0 @@ -381,7 +383,7 @@ std::shared_ptr dataset___Scanner__head( return ValueOrStop(arrow::Table::FromRecordBatches(std::move(batches))); } -// [[arrow::export]] +// [[dataset::export]] cpp11::list dataset___Scanner__Scan(const std::shared_ptr& scanner) { auto it = ValueOrStop(scanner->Scan()); std::vector> out; @@ -395,13 +397,13 @@ cpp11::list dataset___Scanner__Scan(const std::shared_ptr& scanner) return arrow::r::to_r_list(out); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___Scanner__schema( const std::shared_ptr& sc) { return sc->schema(); } -// [[arrow::export]] +// [[dataset::export]] cpp11::list dataset___ScanTask__get_batches( const std::shared_ptr& scan_task) { arrow::RecordBatchIterator rbi; @@ -415,7 +417,7 @@ cpp11::list dataset___ScanTask__get_batches( return arrow::r::to_r_list(out); } -// [[arrow::export]] +// [[dataset::export]] void dataset___Dataset__Write( const std::shared_ptr& file_write_options, const std::shared_ptr& filesystem, std::string base_dir, diff --git a/r/src/expression.cpp b/r/src/expression.cpp index e7c6dd4e0c0..0e8fd52034d 100644 --- a/r/src/expression.cpp +++ b/r/src/expression.cpp @@ -17,7 +17,7 @@ #include "./arrow_types.h" -#if defined(ARROW_R_WITH_ARROW) +#if defined(ARROW_R_WITH_DATASET) #include #include @@ -26,7 +26,7 @@ namespace ds = ::arrow::dataset; std::shared_ptr make_compute_options( std::string func_name, cpp11::list options); -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options) { @@ -42,12 +42,12 @@ std::shared_ptr dataset___expr__call(std::string func_name, ds::call(std::move(func_name), std::move(arguments), std::move(options_ptr))); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___expr__field_ref(std::string name) { return std::make_shared(ds::field_ref(std::move(name))); } -// [[arrow::export]] +// [[dataset::export]] std::string dataset___expr__get_field_ref_name( const std::shared_ptr& ref) { auto field_ref = ref->field_ref(); @@ -57,13 +57,13 @@ std::string dataset___expr__get_field_ref_name( return *field_ref->name(); } -// [[arrow::export]] +// [[dataset::export]] std::shared_ptr dataset___expr__scalar( const std::shared_ptr& x) { return std::make_shared(ds::literal(std::move(x))); } -// [[arrow::export]] +// [[dataset::export]] std::string dataset___expr__ToString(const std::shared_ptr& x) { return x->ToString(); } diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp index 1b0bc41b833..5de7ca8fac4 100644 --- a/r/src/parquet.cpp +++ b/r/src/parquet.cpp @@ -17,7 +17,7 @@ #include "./arrow_types.h" -#if defined(ARROW_R_WITH_ARROW) +#if defined(ARROW_R_WITH_PARQUET) #include #include @@ -38,38 +38,38 @@ class ArrowWriterPropertiesBuilder : public ArrowWriterProperties::Builder { } // namespace parquet -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___ArrowReaderProperties__Make(bool use_threads) { return std::make_shared(use_threads); } -// [[arrow::export]] +// [[parquet::export]] void parquet___arrow___ArrowReaderProperties__set_use_threads( const std::shared_ptr& properties, bool use_threads) { properties->set_use_threads(use_threads); } -// [[arrow::export]] +// [[parquet::export]] bool parquet___arrow___ArrowReaderProperties__get_use_threads( const std::shared_ptr& properties, bool use_threads) { return properties->use_threads(); } -// [[arrow::export]] +// [[parquet::export]] bool parquet___arrow___ArrowReaderProperties__get_read_dictionary( const std::shared_ptr& properties, int column_index) { return properties->read_dictionary(column_index); } -// [[arrow::export]] +// [[parquet::export]] void parquet___arrow___ArrowReaderProperties__set_read_dictionary( const std::shared_ptr& properties, int column_index, bool read_dict) { properties->set_read_dictionary(column_index, read_dict); } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__OpenFile( const std::shared_ptr& file, const std::shared_ptr& props) { @@ -81,7 +81,7 @@ std::shared_ptr parquet___arrow___FileReader__OpenFi return std::move(reader); } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__ReadTable1( const std::shared_ptr& reader) { std::shared_ptr table; @@ -89,7 +89,7 @@ std::shared_ptr parquet___arrow___FileReader__ReadTable1( return table; } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__ReadTable2( const std::shared_ptr& reader, const std::vector& column_indices) { @@ -98,7 +98,7 @@ std::shared_ptr parquet___arrow___FileReader__ReadTable2( return table; } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__ReadRowGroup1( const std::shared_ptr& reader, int i) { std::shared_ptr table; @@ -106,7 +106,7 @@ std::shared_ptr parquet___arrow___FileReader__ReadRowGroup1( return table; } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__ReadRowGroup2( const std::shared_ptr& reader, int i, const std::vector& column_indices) { @@ -115,7 +115,7 @@ std::shared_ptr parquet___arrow___FileReader__ReadRowGroup2( return table; } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__ReadRowGroups1( const std::shared_ptr& reader, const std::vector& row_groups) { @@ -124,7 +124,7 @@ std::shared_ptr parquet___arrow___FileReader__ReadRowGroups1( return table; } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__ReadRowGroups2( const std::shared_ptr& reader, const std::vector& row_groups, const std::vector& column_indices) { @@ -133,25 +133,25 @@ std::shared_ptr parquet___arrow___FileReader__ReadRowGroups2( return table; } -// [[arrow::export]] +// [[parquet::export]] int64_t parquet___arrow___FileReader__num_rows( const std::shared_ptr& reader) { return reader->parquet_reader()->metadata()->num_rows(); } -// [[arrow::export]] +// [[parquet::export]] int parquet___arrow___FileReader__num_columns( const std::shared_ptr& reader) { return reader->parquet_reader()->metadata()->num_columns(); } -// [[arrow::export]] +// [[parquet::export]] int parquet___arrow___FileReader__num_row_groups( const std::shared_ptr& reader) { return reader->num_row_groups(); } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__ReadColumn( const std::shared_ptr& reader, int i) { std::shared_ptr array; @@ -159,7 +159,7 @@ std::shared_ptr parquet___arrow___FileReader__ReadColumn( return array; } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___ArrowWriterProperties___create( bool allow_truncated_timestamps, bool use_deprecated_int96_timestamps, int timestamp_unit) { @@ -180,20 +180,20 @@ std::shared_ptr parquet___ArrowWriterProperties_ return builder->build(); } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___WriterProperties___Builder__create() { return std::make_shared(); } -// [[arrow::export]] +// [[parquet::export]] void parquet___WriterProperties___Builder__version( const std::shared_ptr& builder, const parquet::ParquetVersion::type& version) { builder->version(version); } -// [[arrow::export]] +// [[parquet::export]] void parquet___ArrowWriterProperties___Builder__set_compressions( const std::shared_ptr& builder, const std::vector& paths, cpp11::integers types) { @@ -207,7 +207,7 @@ void parquet___ArrowWriterProperties___Builder__set_compressions( } } -// [[arrow::export]] +// [[parquet::export]] void parquet___ArrowWriterProperties___Builder__set_compression_levels( const std::shared_ptr& builder, const std::vector& paths, cpp11::integers levels) { @@ -221,7 +221,7 @@ void parquet___ArrowWriterProperties___Builder__set_compression_levels( } } -// [[arrow::export]] +// [[parquet::export]] void parquet___ArrowWriterProperties___Builder__set_use_dictionary( const std::shared_ptr& builder, const std::vector& paths, cpp11::logicals use_dictionary) { @@ -244,7 +244,7 @@ void parquet___ArrowWriterProperties___Builder__set_use_dictionary( } } -// [[arrow::export]] +// [[parquet::export]] void parquet___ArrowWriterProperties___Builder__set_write_statistics( const std::shared_ptr& builder, const std::vector& paths, cpp11::logicals write_statistics) { @@ -267,20 +267,20 @@ void parquet___ArrowWriterProperties___Builder__set_write_statistics( } } -// [[arrow::export]] +// [[parquet::export]] void parquet___ArrowWriterProperties___Builder__data_page_size( const std::shared_ptr& builder, int64_t data_page_size) { builder->data_pagesize(data_page_size); } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___WriterProperties___Builder__build( const std::shared_ptr& builder) { return builder->build(); } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___ParquetFileWriter__Open( const std::shared_ptr& schema, const std::shared_ptr& sink, @@ -292,20 +292,20 @@ std::shared_ptr parquet___arrow___ParquetFileWriter_ return std::move(writer); } -// [[arrow::export]] +// [[parquet::export]] void parquet___arrow___FileWriter__WriteTable( const std::shared_ptr& writer, const std::shared_ptr& table, int64_t chunk_size) { PARQUET_THROW_NOT_OK(writer->WriteTable(*table, chunk_size)); } -// [[arrow::export]] +// [[parquet::export]] void parquet___arrow___FileWriter__Close( const std::shared_ptr& writer) { PARQUET_THROW_NOT_OK(writer->Close()); } -// [[arrow::export]] +// [[parquet::export]] void parquet___arrow___WriteTable( const std::shared_ptr& table, const std::shared_ptr& sink, @@ -315,7 +315,7 @@ void parquet___arrow___WriteTable( *table, gc_memory_pool(), sink, table->num_rows(), properties, arrow_properties)); } -// [[arrow::export]] +// [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__GetSchema( const std::shared_ptr& reader) { std::shared_ptr schema; diff --git a/r/tools/autobrew b/r/tools/autobrew index 07a68c50c7f..0288a6eacd3 100644 --- a/r/tools/autobrew +++ b/r/tools/autobrew @@ -48,7 +48,8 @@ fi # Hardcode this for my custom autobrew build rm -f $BREWDIR/lib/*.dylib AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common -lpthread -lcurl" -PKG_LIBS="-L$BREWDIR/lib -lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lthrift -llz4 -lsnappy -lzstd $AWS_LIBS" +PKG_LIBS="-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lthrift -llz4 -lsnappy -lzstd $AWS_LIBS" +PKG_DIRS="-L$BREWDIR/lib" # Prevent CRAN builder from linking against old libs in /usr/local/lib for FILE in $BREWDIR/Cellar/*/*/lib/*.a; do From cc681dabf14c8f5061adc2fef8a02a4da15ee514 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 1 Mar 2021 17:53:12 -0500 Subject: [PATCH 02/28] Update tests and add missing example wrapper --- r/R/dataset-partition.R | 4 +- r/man/hive_partition.Rd | 4 +- r/tests/testthat/helper-skip.R | 6 +- r/tests/testthat/latin1.R | 10 +- .../testthat/test-backwards-compatibility.R | 6 + r/tests/testthat/test-dataset.R | 72 +++++++-- r/tests/testthat/test-dplyr-mutate.R | 5 +- r/tests/testthat/test-expression.R | 1 + r/tests/testthat/test-metadata.R | 1 + r/tests/testthat/test-parquet.R | 2 + r/tests/testthat/test-python-flight.R | 2 + r/tests/testthat/test-python.R | 2 + r/tests/testthat/test-s3-minio.R | 139 +++++++++--------- r/tests/testthat/test-s3.R | 1 + 14 files changed, 164 insertions(+), 91 deletions(-) diff --git a/r/R/dataset-partition.R b/r/R/dataset-partition.R index cefcea592f3..2d00ac0b815 100644 --- a/r/R/dataset-partition.R +++ b/r/R/dataset-partition.R @@ -90,7 +90,9 @@ HivePartitioning$create <- function(schm, null_fallback = NULL) { #' calling `hive_partition()` with no arguments. #' @examples #' \donttest{ -#' hive_partition(year = int16(), month = int8()) +#' if (arrow_with_dataset()) { +#' hive_partition(year = int16(), month = int8()) +#' } #' } #' @export hive_partition <- function(..., null_fallback = NULL) { diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd index 7c25ef16e2c..057447c3307 100644 --- a/r/man/hive_partition.Rd +++ b/r/man/hive_partition.Rd @@ -27,6 +27,8 @@ Because fields are named in the path segments, order of fields passed to } \examples{ \donttest{ -hive_partition(year = int16(), month = int8()) +if (arrow_with_dataset()) { + hive_partition(year = int16(), month = int8()) +} } } diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index 366cc75fb27..37ba8be2b2c 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -16,7 +16,11 @@ # under the License. skip_if_not_available <- function(feature) { - if (feature == "s3") { + if (feature == "dataset") { + skip_if_not(arrow_with_dataset()) + } else if (feature == "parquet") { + skip_if_not(arrow_with_parquet()) + } else if (feature == "s3") { skip_if_not(arrow_with_s3()) } else if (!codec_is_available(feature)) { skip(paste("Arrow C++ not built with support for", feature)) diff --git a/r/tests/testthat/latin1.R b/r/tests/testthat/latin1.R index 0339cb7d5b4..a7419c92bd4 100644 --- a/r/tests/testthat/latin1.R +++ b/r/tests/testthat/latin1.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -x <- iconv("Veitingastaðir", to = "latin1") +x <- iconv("Veitingasta�ir", to = "latin1") df <- tibble::tibble( chr = x, fct = as.factor(x) @@ -69,6 +69,8 @@ feather_file <- tempfile() write_feather(df_struct, feather_file) expect_identical(read_feather(feather_file), df_struct) -parquet_file <- tempfile() -write_parquet(df, parquet_file) # Parquet doesn't yet support nested types -expect_identical(read_parquet(parquet_file), df) +if (arrow_with_parquet()) { + parquet_file <- tempfile() + write_parquet(df, parquet_file) # Parquet doesn't yet support nested types + expect_identical(read_parquet(parquet_file), df) +} diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R index c6bd51498cf..3cf5d91ee5f 100644 --- a/r/tests/testthat/test-backwards-compatibility.R +++ b/r/tests/testthat/test-backwards-compatibility.R @@ -46,6 +46,7 @@ expect_identical_with_metadata <- function(object, expected, ..., top_level = TR } test_that("reading a known Parquet file to dataframe with 3.0.0", { + skip_if_not_available("parquet") skip_if_not_available("snappy") pq_file <- test_path("golden-files/data-arrow-extra-meta_3.0.0.parquet") @@ -55,6 +56,7 @@ test_that("reading a known Parquet file to dataframe with 3.0.0", { }) test_that("reading a known Parquet file to dataframe with 2.0.0", { + skip_if_not_available("parquet") skip_if_not_available("snappy") pq_file <- test_path("golden-files/data-arrow_2.0.0.parquet") @@ -64,6 +66,7 @@ test_that("reading a known Parquet file to dataframe with 2.0.0", { }) test_that("reading a known Parquet file to dataframe with 1.0.1", { + skip_if_not_available("parquet") skip_if_not_available("snappy") pq_file <- test_path("golden-files/data-arrow_1.0.1.parquet") @@ -77,6 +80,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) { # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_uncompressed.feather"), compression = "uncompressed") # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_zstd.feather"), compression = "zstd") test_that("reading a known Feather file to dataframe with 2.0.0", { + skip_if_not_available("parquet") skip_if_not_available(comp) feather_file <- test_path(paste0("golden-files/data-arrow_2.0.0_", comp,".feather")) @@ -85,6 +89,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) { }) test_that("reading a known Feather file to dataframe with 1.0.1", { + skip_if_not_available("parquet") skip_if_not_available(comp) feather_file <- test_path(paste0("golden-files/data-arrow_1.0.1_", comp,".feather")) @@ -94,6 +99,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) { }) test_that("reading a known Feather file to dataframe with 0.17.0", { + skip_if_not_available("parquet") skip_if_not_available(comp) feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp,".feather")) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 502282c4e29..140e8721a05 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +skip_if_not_available("dataset") + context("Dataset") library(dplyr) @@ -52,17 +54,19 @@ df2 <- tibble( ) test_that("Setup (putting data in the dir)", { - dir.create(file.path(dataset_dir, 1)) - dir.create(file.path(dataset_dir, 2)) - write_parquet(df1, file.path(dataset_dir, 1, "file1.parquet")) - write_parquet(df2, file.path(dataset_dir, 2, "file2.parquet")) - expect_length(dir(dataset_dir, recursive = TRUE), 2) - - dir.create(file.path(hive_dir, "subdir", "group=1", "other=xxx"), recursive = TRUE) - dir.create(file.path(hive_dir, "subdir", "group=2", "other=yyy"), recursive = TRUE) - write_parquet(df1, file.path(hive_dir, "subdir", "group=1", "other=xxx", "file1.parquet")) - write_parquet(df2, file.path(hive_dir, "subdir", "group=2", "other=yyy", "file2.parquet")) - expect_length(dir(hive_dir, recursive = TRUE), 2) + if (arrow_with_parquet()) { + dir.create(file.path(dataset_dir, 1)) + dir.create(file.path(dataset_dir, 2)) + write_parquet(df1, file.path(dataset_dir, 1, "file1.parquet")) + write_parquet(df2, file.path(dataset_dir, 2, "file2.parquet")) + expect_length(dir(dataset_dir, recursive = TRUE), 2) + + dir.create(file.path(hive_dir, "subdir", "group=1", "other=xxx"), recursive = TRUE) + dir.create(file.path(hive_dir, "subdir", "group=2", "other=yyy"), recursive = TRUE) + write_parquet(df1, file.path(hive_dir, "subdir", "group=1", "other=xxx", "file1.parquet")) + write_parquet(df2, file.path(hive_dir, "subdir", "group=2", "other=yyy", "file2.parquet")) + expect_length(dir(hive_dir, recursive = TRUE), 2) + } # Now, an IPC format dataset dir.create(file.path(ipc_dir, 3)) @@ -87,6 +91,7 @@ test_that("Setup (putting data in the dir)", { }) test_that("Simple interface for datasets", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_is(ds$format, "ParquetFileFormat") expect_is(ds$filesystem, "LocalFileSystem") @@ -123,12 +128,14 @@ test_that("Simple interface for datasets", { }) test_that("dim method returns the correct number of rows and columns", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_identical(dim(ds), c(20L, 7L)) }) test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_query object", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_warning( @@ -158,6 +165,7 @@ test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_ test_that("dataset from URI", { skip_on_os("windows") + skip_if_not_available("parquet") uri <- paste0("file://", dataset_dir) ds <- open_dataset(uri, partitioning = schema(part = uint8())) expect_is(ds, "Dataset") @@ -175,12 +183,14 @@ test_that("dataset from URI", { }) test_that("Simple interface for datasets (custom ParquetFileFormat)", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()), format = FileFormat$create("parquet", dict_columns = c("chr"))) expect_type_equal(ds$schema$GetFieldByName("chr")$type, dictionary()) }) test_that("Hive partitioning", { + skip_if_not_available("parquet") ds <- open_dataset(hive_dir, partitioning = hive_partition(other = utf8(), group = uint8())) expect_is(ds, "Dataset") expect_equivalent( @@ -195,12 +205,14 @@ test_that("Hive partitioning", { }) test_that("input validation", { + skip_if_not_available("parquet") expect_error( open_dataset(hive_dir, hive_partition(other = utf8(), group = uint8())) ) }) test_that("Partitioning inference", { + skip_if_not_available("parquet") # These are the same tests as above, just using the *PartitioningFactory ds1 <- open_dataset(dataset_dir, partitioning = "part") expect_identical(names(ds1), c(names(df1), "part")) @@ -381,6 +393,7 @@ test_that("readr parse options", { test_that("Dataset with multiple file formats", { skip("https://issues.apache.org/jira/browse/ARROW-7653") + skip_if_not_available("parquet") ds <- open_dataset(list( open_dataset(dataset_dir, format = "parquet", partitioning = "part"), open_dataset(ipc_dir, format = "arrow", partitioning = "part") @@ -399,6 +412,7 @@ test_that("Dataset with multiple file formats", { }) test_that("Creating UnionDataset", { + skip_if_not_available("parquet") ds1 <- open_dataset(file.path(dataset_dir, 1)) ds2 <- open_dataset(file.path(dataset_dir, 2)) union1 <- open_dataset(list(ds1, ds2)) @@ -451,6 +465,7 @@ test_that("InMemoryDataset", { }) test_that("map_batches", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = "part") expect_equivalent( ds %>% @@ -462,11 +477,13 @@ test_that("map_batches", { }) test_that("partitioning = NULL to ignore partition information (but why?)", { + skip_if_not_available("parquet") ds <- open_dataset(hive_dir, partitioning = NULL) expect_identical(names(ds), names(df1)) # i.e. not c(names(df1), "group", "other") }) test_that("filter() with is.na()", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equivalent( ds %>% @@ -478,6 +495,7 @@ test_that("filter() with is.na()", { }) test_that("filter() with is.nan()", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equivalent( ds %>% @@ -489,6 +507,7 @@ test_that("filter() with is.nan()", { }) test_that("filter() with %in%", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equivalent( ds %>% @@ -510,6 +529,7 @@ test_that("filter() with %in%", { }) test_that("filter() with strings", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equivalent( ds %>% @@ -529,6 +549,7 @@ test_that("filter() with strings", { }) test_that("filter() with .data", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equivalent( ds %>% @@ -558,6 +579,7 @@ test_that("filter() with .data", { }) test_that("filter() on timestamp columns", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equivalent( ds %>% @@ -591,6 +613,7 @@ test_that("filter() on timestamp columns", { }) test_that("filter() on date32 columns", { + skip_if_not_available("parquet") tmp <- tempfile() dir.create(tmp) df <- data.frame(date = as.Date(c("2020-02-02", "2020-02-03"))) @@ -615,6 +638,7 @@ test_that("filter() on date32 columns", { }) test_that("filter() with expressions", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_is(ds$format, "ParquetFileFormat") expect_is(ds$filesystem, "LocalFileSystem") @@ -817,6 +841,7 @@ test_that("filter scalar validation doesn't crash (ARROW-7772)", { }) test_that("collect() on Dataset works (if fits in memory)", { + skip_if_not_available("parquet") expect_equal( collect(open_dataset(dataset_dir)), rbind(df1, df2) @@ -824,6 +849,7 @@ test_that("collect() on Dataset works (if fits in memory)", { }) test_that("count()", { + skip_if_not_available("parquet") skip("count() is not a generic so we have to get here through summarize()") ds <- open_dataset(dataset_dir) df <- rbind(df1, df2) @@ -838,6 +864,7 @@ test_that("count()", { }) test_that("head/tail", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir) expect_equal(as.data.frame(head(ds)), head(df1)) expect_equal( @@ -867,6 +894,7 @@ test_that("head/tail", { }) test_that("Dataset [ (take by index)", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir) # Taking only from one file expect_equal( @@ -904,6 +932,7 @@ test_that("Dataset [ (take by index)", { }) test_that("dplyr method not implemented messages", { + skip_if_not_available("parquet") ds <- open_dataset(dataset_dir) # This one is more nuanced expect_error( @@ -926,6 +955,7 @@ test_that("dplyr method not implemented messages", { }) test_that("Dataset and query print methods", { + skip_if_not_available("parquet") ds <- open_dataset(hive_dir) expect_output( print(ds), @@ -994,12 +1024,15 @@ expect_scan_result <- function(ds, schm) { ) } -files <- c( - file.path(dataset_dir, 1, "file1.parquet", fsep = "/"), - file.path(dataset_dir, 2, "file2.parquet", fsep = "/") -) +if(arrow_with_parquet) { + files <- c( + file.path(dataset_dir, 1, "file1.parquet", fsep = "/"), + file.path(dataset_dir, 2, "file2.parquet", fsep = "/") + ) +} test_that("Assembling a Dataset manually and getting a Table", { + skip_if_not_available("parquet") fs <- LocalFileSystem$create() selector <- FileSelector$create(dataset_dir, recursive = TRUE) partitioning <- DirectoryPartitioning$create(schema(part = double())) @@ -1027,6 +1060,7 @@ test_that("Assembling a Dataset manually and getting a Table", { }) test_that("Assembling multiple DatasetFactories with DatasetFactory", { + skip_if_not_available("parquet") factory1 <- dataset_factory(file.path(dataset_dir, 1), format = "parquet") expect_is(factory1, "FileSystemDatasetFactory") factory2 <- dataset_factory(file.path(dataset_dir, 2), format = "parquet") @@ -1082,6 +1116,7 @@ test_that("Writing a dataset: CSV->IPC", { }) test_that("Writing a dataset: Parquet->IPC", { + skip_if_not_available("parquet") skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 ds <- open_dataset(hive_dir) dst_dir <- make_temp_dir() @@ -1105,6 +1140,7 @@ test_that("Writing a dataset: Parquet->IPC", { }) test_that("Writing a dataset: CSV->Parquet", { + skip_if_not_available("parquet") skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 ds <- open_dataset(csv_dir, partitioning = "part", format = "csv") dst_dir <- make_temp_dir() @@ -1128,6 +1164,7 @@ test_that("Writing a dataset: CSV->Parquet", { }) test_that("Writing a dataset: Parquet->Parquet (default)", { + skip_if_not_available("parquet") skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 ds <- open_dataset(hive_dir) dst_dir <- make_temp_dir() @@ -1169,6 +1206,7 @@ test_that("Writing a dataset: no format specified", { }) test_that("Dataset writing: dplyr methods", { + skip_if_not_available("parquet") skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 ds <- open_dataset(hive_dir) dst_dir <- tempfile() @@ -1207,6 +1245,7 @@ test_that("Dataset writing: dplyr methods", { test_that("Dataset writing: non-hive", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 + skip_if_not_available("parquet") ds <- open_dataset(hive_dir) dst_dir <- tempfile() write_dataset(ds, dst_dir, format = "feather", partitioning = "int", hive_style = FALSE) @@ -1216,6 +1255,7 @@ test_that("Dataset writing: non-hive", { test_that("Dataset writing: no partitioning", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 + skip_if_not_available("parquet") ds <- open_dataset(hive_dir) dst_dir <- tempfile() write_dataset(ds, dst_dir, format = "feather", partitioning = NULL) @@ -1332,6 +1372,7 @@ test_that("Writing a dataset: Ipc format options & compression", { test_that("Writing a dataset: Parquet format options", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 + skip_if_not_available("parquet") ds <- open_dataset(csv_dir, partitioning = "part", format = "csv") dst_dir <- make_temp_dir() dst_dir_no_truncated_timestamps <- make_temp_dir() @@ -1373,6 +1414,7 @@ test_that("Writing a dataset: Parquet format options", { }) test_that("Dataset writing: unsupported features/input validation", { + skip_if_not_available("parquet") expect_error(write_dataset(4), 'dataset must be a "Dataset"') ds <- open_dataset(hive_dir) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 56d7e368520..a67095cc6fd 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -131,7 +131,7 @@ test_that("dplyr::mutate's examples", { mass2_squared = mass2 * mass2 ) %>% collect(), - starwars # this is a test dataset that ships with dplyr + starwars # this is a test tibble that ships with dplyr ) # As well as adding new variables, you can use mutate() to @@ -272,7 +272,7 @@ test_that("handle bad expressions", { ) }) -test_that("print a mutated dataset", { +test_that("print a mutated table", { expect_output( Table$create(tbl) %>% select(int) %>% @@ -300,6 +300,7 @@ See $.data for the source Arrow object', }) test_that("mutate and write_dataset", { + skip_if_not_available("dataset") # See related test in test-dataset.R skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R index 3df7270f4c5..d7eb6df63e3 100644 --- a/r/tests/testthat/test-expression.R +++ b/r/tests/testthat/test-expression.R @@ -46,6 +46,7 @@ test_that("array_refs", { }) test_that("C++ expressions", { + skip_if_not_available("dataset") f <- Expression$field_ref("f") expect_identical(f$field_name, "f") g <- Expression$field_ref("g") diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index 5abd3d298c4..4e1895e82ec 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -141,6 +141,7 @@ test_that("RecordBatch R metadata", { }) test_that("R metadata roundtrip via parquet", { + skip_if_not_available("parquet") tf <- tempfile() on.exit(unlink(tf)) diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 3e8d8e08d0e..4ac356f004d 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +skip_if_not_available("parquet") + context("Parquet file reading/writing") pq_file <- system.file("v0.7.1.parquet", package = "arrow") diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R index dbd2ba9a8b2..94f4f5fefd3 100644 --- a/r/tests/testthat/test-python-flight.R +++ b/r/tests/testthat/test-python-flight.R @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +skip("Temporarily skip this") + # Assumes: # * We've already done arrow::install_pyarrow() # * R -e 'arrow::load_flight_server("demo_flight_server")$DemoFlightServer(port = 8089)$serve()' diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R index 821e14a493b..978a7585092 100644 --- a/r/tests/testthat/test-python.R +++ b/r/tests/testthat/test-python.R @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +skip("Temporarily skip this") + context("To/from Python") test_that("install_pyarrow", { diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R index 01d2d057e28..629ec886e72 100644 --- a/r/tests/testthat/test-s3-minio.R +++ b/r/tests/testthat/test-s3-minio.R @@ -69,84 +69,89 @@ if (arrow_with_s3() && process_is_running("minio server")) { }) test_that("read/write Parquet on minio", { + skip_if_not_available("parquet") write_parquet(example_data, fs$path(minio_uri("test.parquet"))) expect_identical(read_parquet(minio_uri("test.parquet")), example_data) }) - # Dataset test setup, cf. test-dataset.R - library(dplyr) - first_date <- lubridate::ymd_hms("2015-04-29 03:12:39") - df1 <- tibble( - int = 1:10, - dbl = as.numeric(1:10), - lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2), - chr = letters[1:10], - fct = factor(LETTERS[1:10]), - ts = first_date + lubridate::days(1:10) - ) - - second_date <- lubridate::ymd_hms("2017-03-09 07:01:02") - df2 <- tibble( - int = 101:110, - dbl = as.numeric(51:60), - lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2), - chr = letters[10:1], - fct = factor(LETTERS[10:1]), - ts = second_date + lubridate::days(10:1) - ) - - # This is also to set up the dataset tests - test_that("write_parquet with filesystem arg", { - fs$CreateDir(minio_path("hive_dir", "group=1", "other=xxx")) - fs$CreateDir(minio_path("hive_dir", "group=2", "other=yyy")) - expect_length(fs$ls(minio_path("hive_dir")), 2) - write_parquet(df1, fs$path(minio_path("hive_dir", "group=1", "other=xxx", "file1.parquet"))) - write_parquet(df2, fs$path(minio_path("hive_dir", "group=2", "other=yyy", "file2.parquet"))) - expect_identical( - read_parquet(fs$path(minio_path("hive_dir", "group=1", "other=xxx", "file1.parquet"))), - df1 + if (arrow_with_dataset()) { + + # Dataset test setup, cf. test-dataset.R + library(dplyr) + first_date <- lubridate::ymd_hms("2015-04-29 03:12:39") + df1 <- tibble( + int = 1:10, + dbl = as.numeric(1:10), + lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2), + chr = letters[1:10], + fct = factor(LETTERS[1:10]), + ts = first_date + lubridate::days(1:10) ) - }) - test_that("open_dataset with fs", { - ds <- open_dataset(fs$path(minio_path("hive_dir"))) - expect_identical( - ds %>% select(dbl, lgl) %>% collect(), - rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) + second_date <- lubridate::ymd_hms("2017-03-09 07:01:02") + df2 <- tibble( + int = 101:110, + dbl = as.numeric(51:60), + lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2), + chr = letters[10:1], + fct = factor(LETTERS[10:1]), + ts = second_date + lubridate::days(10:1) ) - }) - test_that("write_dataset with fs", { - ds <- open_dataset(fs$path(minio_path("hive_dir"))) - write_dataset(ds, fs$path(minio_path("new_dataset_dir"))) - expect_length(fs$ls(minio_path("new_dataset_dir")), 1) - }) - - make_temp_dir <- function() { - path <- tempfile() - dir.create(path) - normalizePath(path, winslash = "/") - } + # This is also to set up the dataset tests + test_that("write_parquet with filesystem arg", { + skip_if_not_available("parquet") + fs$CreateDir(minio_path("hive_dir", "group=1", "other=xxx")) + fs$CreateDir(minio_path("hive_dir", "group=2", "other=yyy")) + expect_length(fs$ls(minio_path("hive_dir")), 2) + write_parquet(df1, fs$path(minio_path("hive_dir", "group=1", "other=xxx", "file1.parquet"))) + write_parquet(df2, fs$path(minio_path("hive_dir", "group=2", "other=yyy", "file2.parquet"))) + expect_identical( + read_parquet(fs$path(minio_path("hive_dir", "group=1", "other=xxx", "file1.parquet"))), + df1 + ) + }) - test_that("Let's test copy_files too", { - td <- make_temp_dir() - copy_files(minio_uri("hive_dir"), td) - expect_length(dir(td), 2) - ds <- open_dataset(td) - expect_identical( - ds %>% select(dbl, lgl) %>% collect(), - rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) - ) + test_that("open_dataset with fs", { + ds <- open_dataset(fs$path(minio_path("hive_dir"))) + expect_identical( + ds %>% select(dbl, lgl) %>% collect(), + rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) + ) + }) + + test_that("write_dataset with fs", { + ds <- open_dataset(fs$path(minio_path("hive_dir"))) + write_dataset(ds, fs$path(minio_path("new_dataset_dir"))) + expect_length(fs$ls(minio_path("new_dataset_dir")), 1) + }) + + make_temp_dir <- function() { + path <- tempfile() + dir.create(path) + normalizePath(path, winslash = "/") + } + + test_that("Let's test copy_files too", { + td <- make_temp_dir() + copy_files(minio_uri("hive_dir"), td) + expect_length(dir(td), 2) + ds <- open_dataset(td) + expect_identical( + ds %>% select(dbl, lgl) %>% collect(), + rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) + ) - # Let's copy the other way and use a SubTreeFileSystem rather than URI - copy_files(td, fs$path(minio_path("hive_dir2"))) - ds2 <- open_dataset(fs$path(minio_path("hive_dir2"))) - expect_identical( - ds2 %>% select(dbl, lgl) %>% collect(), - rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) - ) - }) + # Let's copy the other way and use a SubTreeFileSystem rather than URI + copy_files(td, fs$path(minio_path("hive_dir2"))) + ds2 <- open_dataset(fs$path(minio_path("hive_dir2"))) + expect_identical( + ds2 %>% select(dbl, lgl) %>% collect(), + rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) + ) + }) + } test_that("S3FileSystem input validation", { expect_error( diff --git a/r/tests/testthat/test-s3.R b/r/tests/testthat/test-s3.R index 33c249547a6..938e0c6fdb2 100644 --- a/r/tests/testthat/test-s3.R +++ b/r/tests/testthat/test-s3.R @@ -46,6 +46,7 @@ if (run_these) { }) test_that("read/write Parquet on S3", { + skip_if_not_available("parquet") write_parquet(example_data, bucket_uri(now, "test.parquet")) expect_identical(read_parquet(bucket_uri(now, "test.parquet")), example_data) }) From 837f936d914f8cf70a054c71276d1546a9a3833f Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 1 Mar 2021 18:49:42 -0500 Subject: [PATCH 03/28] Add missing parens --- r/tests/testthat/test-dataset.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 140e8721a05..37efaf9d853 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -1024,7 +1024,7 @@ expect_scan_result <- function(ds, schm) { ) } -if(arrow_with_parquet) { +if(arrow_with_parquet()) { files <- c( file.path(dataset_dir, 1, "file1.parquet", fsep = "/"), file.path(dataset_dir, 2, "file2.parquet", fsep = "/") From efe258be9fbb7e90628c2dce7712bca176fbcd33 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 1 Mar 2021 19:20:18 -0500 Subject: [PATCH 04/28] Fix simple mistakes --- r/tests/testthat/latin1.R | 2 +- r/tests/testthat/test-python-flight.R | 2 -- r/tests/testthat/test-python.R | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/r/tests/testthat/latin1.R b/r/tests/testthat/latin1.R index a7419c92bd4..6c0be621cdb 100644 --- a/r/tests/testthat/latin1.R +++ b/r/tests/testthat/latin1.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -x <- iconv("Veitingasta�ir", to = "latin1") +x <- iconv("Veitingastaðir", to = "latin1") df <- tibble::tibble( chr = x, fct = as.factor(x) diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R index 94f4f5fefd3..dbd2ba9a8b2 100644 --- a/r/tests/testthat/test-python-flight.R +++ b/r/tests/testthat/test-python-flight.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -skip("Temporarily skip this") - # Assumes: # * We've already done arrow::install_pyarrow() # * R -e 'arrow::load_flight_server("demo_flight_server")$DemoFlightServer(port = 8089)$serve()' diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R index 978a7585092..821e14a493b 100644 --- a/r/tests/testthat/test-python.R +++ b/r/tests/testthat/test-python.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -skip("Temporarily skip this") - context("To/from Python") test_that("install_pyarrow", { From 38871a7012fc5b0b03bace521175164351a1b34f Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 2 Mar 2021 09:24:09 -0500 Subject: [PATCH 05/28] Revert changes to latin1.R --- r/tests/testthat/latin1.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/r/tests/testthat/latin1.R b/r/tests/testthat/latin1.R index 6c0be621cdb..0339cb7d5b4 100644 --- a/r/tests/testthat/latin1.R +++ b/r/tests/testthat/latin1.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -x <- iconv("Veitingastaðir", to = "latin1") +x <- iconv("Veitingastaðir", to = "latin1") df <- tibble::tibble( chr = x, fct = as.factor(x) @@ -69,8 +69,6 @@ feather_file <- tempfile() write_feather(df_struct, feather_file) expect_identical(read_feather(feather_file), df_struct) -if (arrow_with_parquet()) { - parquet_file <- tempfile() - write_parquet(df, parquet_file) # Parquet doesn't yet support nested types - expect_identical(read_parquet(parquet_file), df) -} +parquet_file <- tempfile() +write_parquet(df, parquet_file) # Parquet doesn't yet support nested types +expect_identical(read_parquet(parquet_file), df) From ff9ea2782d880826058dd9658523f4996ca9ac01 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 2 Mar 2021 09:29:25 -0500 Subject: [PATCH 06/28] Redo latin1.R with ISO-8859-1 encoding --- r/tests/testthat/latin1.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/latin1.R b/r/tests/testthat/latin1.R index 0339cb7d5b4..150192d3147 100644 --- a/r/tests/testthat/latin1.R +++ b/r/tests/testthat/latin1.R @@ -69,6 +69,8 @@ feather_file <- tempfile() write_feather(df_struct, feather_file) expect_identical(read_feather(feather_file), df_struct) -parquet_file <- tempfile() -write_parquet(df, parquet_file) # Parquet doesn't yet support nested types -expect_identical(read_parquet(parquet_file), df) +if (arrow_with_parquet()) { + parquet_file <- tempfile() + write_parquet(df, parquet_file) # Parquet doesn't yet support nested types + expect_identical(read_parquet(parquet_file), df) +} From 1b1757bb4bbd4d034cf4150e13a02f3ed012c4d8 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 3 Mar 2021 11:22:51 -0500 Subject: [PATCH 07/28] Iterate over features in arrow_exports_cpp --- r/data-raw/codegen.R | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R index 528756fc398..b9781c8fe6f 100644 --- a/r/data-raw/codegen.R +++ b/r/data-raw/codegen.R @@ -187,33 +187,31 @@ cpp_file_header <- '// Generated by using data-raw/codegen.R -> do not edit by h #include "./arrow_types.h" ' -arrow_exports_cpp <- glue::glue(' +arrow_exports_cpp <- paste0( +glue::glue(' {cpp_file_header} {cpp_functions_definitions} {cpp_classes_finalizers} - -{feature_available("arrow")} - -{feature_available("dataset")} - -{feature_available("parquet")} - -{feature_available("s3")} - -static const R_CallMethodDef CallEntries[] = {{ -\t\t{{ "_arrow_available", (DL_FUNC)& _arrow_available, 0 }}, -\t\t{{ "_dataset_available", (DL_FUNC)& _dataset_available, 0 }}, -\t\t{{ "_parquet_available", (DL_FUNC)& _parquet_available, 0 }}, -\t\t{{ "_s3_available", (DL_FUNC)& _s3_available, 0 }}, +\n'), +glue::glue_collapse(glue::glue(' +{feature_available({features})} +'), sep = '\n'), +' +static const R_CallMethodDef CallEntries[] = { +', +glue::glue_collapse(glue::glue(' +\t\t{{ "_{features}_available", (DL_FUNC)& _{features}_available, 0 }}, +'), sep = '\n'), +glue::glue('\n {cpp_functions_registration} {classes_finalizers_registration} \t\t{{NULL, NULL, 0}} }}; - -extern "C" void R_init_arrow(DllInfo* dll){{ +\n'), +'extern "C" void R_init_arrow(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); -}} +} \n') write_if_modified(arrow_exports_cpp, "src/arrowExports.cpp") From e6e566244115fa303fc6b5495715732453596a30 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 3 Mar 2021 15:22:42 -0500 Subject: [PATCH 08/28] Fix order in which -l flags are prepended --- r/configure | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/r/configure b/r/configure index 68f2b7fcb93..5b8f1331718 100755 --- a/r/configure +++ b/r/configure @@ -189,13 +189,6 @@ if [ $? -eq 0 ] || [ "$UNAME" = "Darwin" ]; then # Check for features LIB_DIR=`echo $PKG_DIRS | sed -e 's/^-L//'` ARROW_OPTS_CMAKE="$LIB_DIR/cmake/arrow/ArrowOptions.cmake" - # Check for Arrow Dataset subcomponent - grep 'set(ARROW_DATASET "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 - if [ $? -eq 0 ]; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_DATASET" - PKG_LIBS="-larrow_dataset $PKG_LIBS" - # TODO: what if arrow-dataset has a different -L location than arrow? - fi # Check for Parquet grep 'set(ARROW_PARQUET "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 if [ $? -eq 0 ]; then @@ -203,6 +196,13 @@ if [ $? -eq 0 ] || [ "$UNAME" = "Darwin" ]; then PKG_LIBS="-lparquet $PKG_LIBS" # TODO: what if parquet has a different -L location than arrow? fi + # Check for Arrow Dataset subcomponent + grep 'set(ARROW_DATASET "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 + if [ $? -eq 0 ]; then + PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_DATASET" + PKG_LIBS="-larrow_dataset $PKG_LIBS" + # TODO: what if arrow-dataset has a different -L location than arrow? + fi # Check for S3 grep 'set(ARROW_S3 "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 if [ $? -eq 0 ]; then From 3075ed4937ca515a7c311b41590f6e1240bdbcc8 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 3 Mar 2021 15:30:31 -0500 Subject: [PATCH 09/28] Note assumption that arrow, parquet, arrow-dataset have same -L flag --- r/configure | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/r/configure b/r/configure index 5b8f1331718..c8ecb6efe32 100755 --- a/r/configure +++ b/r/configure @@ -194,14 +194,16 @@ if [ $? -eq 0 ] || [ "$UNAME" = "Darwin" ]; then if [ $? -eq 0 ]; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_PARQUET" PKG_LIBS="-lparquet $PKG_LIBS" - # TODO: what if parquet has a different -L location than arrow? + # NOTE: parquet is assumed to have the same -L flag as arrow + # so there is no need to add its location to PKG_DIRS fi # Check for Arrow Dataset subcomponent grep 'set(ARROW_DATASET "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 if [ $? -eq 0 ]; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_DATASET" PKG_LIBS="-larrow_dataset $PKG_LIBS" - # TODO: what if arrow-dataset has a different -L location than arrow? + # NOTE: arrow-dataset is assumed to have the same -L flag as arrow + # so there is no need to add its location to PKG_DIRS fi # Check for S3 grep 'set(ARROW_S3 "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1 From 8b18f6e87122326461b8b02d90ba8e7de97e0054 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 3 Mar 2021 15:37:15 -0500 Subject: [PATCH 10/28] Remove set empty PKG_DIRS at top --- r/configure | 1 - 1 file changed, 1 deletion(-) diff --git a/r/configure b/r/configure index c8ecb6efe32..98e2df015f2 100755 --- a/r/configure +++ b/r/configure @@ -32,7 +32,6 @@ PKG_RPM_NAME="(unsuppored)" PKG_BREW_NAME="apache-arrow" PKG_TEST_HEADER="" PKG_LIBS="-larrow" -PKG_DIRS="" BUNDLED_LIBS="" # Make some env vars case-insensitive From b824af6067980acade3a04b2afca4eec56075acc Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 3 Mar 2021 15:38:07 -0500 Subject: [PATCH 11/28] Do not append empty PKG_DIRS Co-authored-by: Neal Richardson --- r/configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/configure b/r/configure index 98e2df015f2..51fba728fe8 100755 --- a/r/configure +++ b/r/configure @@ -69,7 +69,7 @@ fi if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then echo "*** Using INCLUDE_DIR/LIB_DIR" PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS" - PKG_DIRS="-L$LIB_DIR $PKG_DIRS" + PKG_DIRS="-L$LIB_DIR" else # Use pkg-config if available and allowed pkg-config --version >/dev/null 2>&1 From bb132135069d2f741ec99d37f6d8afc43563c851 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 3 Mar 2021 16:11:51 -0500 Subject: [PATCH 12/28] Do not append empty PKG_DIRS in static libs case --- r/configure | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/configure b/r/configure index 51fba728fe8..4758d7cc675 100755 --- a/r/configure +++ b/r/configure @@ -139,8 +139,8 @@ else LIB_DIR="libarrow/arrow-${VERSION}/lib" if [ -d "$LIB_DIR" ]; then - # Enumerate the static libs, add them to BUNDLED_LIBS, - # and prepend the location to PKG_DIRS + # Enumerate the static libs, put their -l flags in BUNDLED_LIBS, + # and put their -L location in PKG_DIRS # # If tools/linuxlibs.R fails to produce libs, this dir won't exist # so don't try (the error message from `ls` would be misleading) @@ -149,7 +149,7 @@ else # TODO: what about non-bundled deps? BUNDLED_LIBS=`cd $LIB_DIR && ls *.a` BUNDLED_LIBS=`echo $BUNDLED_LIBS | sed -E "s/lib(.*)\.a/-l\1/" | sed -e "s/\\.a lib/ -l/g"` - PKG_DIRS="-L$(pwd)/$LIB_DIR $PKG_DIRS" + PKG_DIRS="-L$(pwd)/$LIB_DIR" fi fi fi From eb312446e2bcf6c1f223bb290223aedda4753832 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 09:52:55 -0500 Subject: [PATCH 13/28] Remove conditional around hive_partition() example and use \dontrun --- r/R/dataset-partition.R | 6 ++---- r/man/hive_partition.Rd | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/r/R/dataset-partition.R b/r/R/dataset-partition.R index 2d00ac0b815..e40427a9f18 100644 --- a/r/R/dataset-partition.R +++ b/r/R/dataset-partition.R @@ -89,10 +89,8 @@ HivePartitioning$create <- function(schm, null_fallback = NULL) { #' @return A [HivePartitioning][Partitioning], or a `HivePartitioningFactory` if #' calling `hive_partition()` with no arguments. #' @examples -#' \donttest{ -#' if (arrow_with_dataset()) { -#' hive_partition(year = int16(), month = int8()) -#' } +#' \dontrun{ +#' hive_partition(year = int16(), month = int8()) #' } #' @export hive_partition <- function(..., null_fallback = NULL) { diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd index 057447c3307..ab427f746b3 100644 --- a/r/man/hive_partition.Rd +++ b/r/man/hive_partition.Rd @@ -26,9 +26,7 @@ Because fields are named in the path segments, order of fields passed to \code{hive_partition()} does not matter. } \examples{ -\donttest{ -if (arrow_with_dataset()) { - hive_partition(year = int16(), month = int8()) -} +\dontrun{ +hive_partition(year = int16(), month = int8()) } } From f3f207e9acf1930559e7158ab64e2d66e0946d17 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 10:00:46 -0500 Subject: [PATCH 14/28] Remove set empty BUNDLED_LIBS at top --- r/configure | 1 - 1 file changed, 1 deletion(-) diff --git a/r/configure b/r/configure index 4758d7cc675..e8eef21dea7 100755 --- a/r/configure +++ b/r/configure @@ -32,7 +32,6 @@ PKG_RPM_NAME="(unsuppored)" PKG_BREW_NAME="apache-arrow" PKG_TEST_HEADER="" PKG_LIBS="-larrow" -BUNDLED_LIBS="" # Make some env vars case-insensitive ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'` From 21ce0a5c94fb529bdd53d2bd820944018650a3fc Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 10:16:35 -0500 Subject: [PATCH 15/28] Respect ARROW_DATASET and ARROW_PARQUET env vars --- r/inst/build_arrow_static.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 61dd5930de0..00054911492 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -50,13 +50,13 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_BUILD_STATIC=ON \ -DARROW_COMPUTE=ON \ -DARROW_CSV=ON \ - -DARROW_DATASET=ON \ + -DARROW_DATASET=${ARROW_DATASET:-ON} \ -DARROW_DEPENDENCY_SOURCE=BUNDLED \ -DARROW_FILESYSTEM=ON \ -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ -DARROW_MIMALLOC=${ARROW_MIMALLOC:-$ARROW_DEFAULT_PARAM} \ -DARROW_JSON=ON \ - -DARROW_PARQUET=ON \ + -DARROW_PARQUET=${ARROW_PARQUET:-ON} \ -DARROW_S3=${ARROW_S3:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-$ARROW_DEFAULT_PARAM} \ From 0d92497de194df5a5d393cf73b59feca6e4c13de Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 14:31:47 -0500 Subject: [PATCH 16/28] Remove arrow_with_parquet() condition around examples and use \dontrun --- r/R/parquet.R | 50 +++++++++++++++++--------------------- r/man/ParquetFileReader.Rd | 18 ++++++-------- r/man/read_parquet.Rd | 14 +++++------ r/man/write_parquet.Rd | 18 ++++++-------- 4 files changed, 44 insertions(+), 56 deletions(-) diff --git a/r/R/parquet.R b/r/R/parquet.R index 237aebb880f..45751b16170 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -27,14 +27,12 @@ #' @return A [arrow::Table][Table], or a `data.frame` if `as_data_frame` is #' `TRUE` (the default). #' @examples -#' \donttest{ -#' if (arrow_with_parquet()) { -#' tf <- tempfile() -#' on.exit(unlink(tf)) -#' write_parquet(mtcars, tf) -#' df <- read_parquet(tf, col_select = starts_with("d")) -#' head(df) -#' } +#' \dontrun{ +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' write_parquet(mtcars, tf) +#' df <- read_parquet(tf, col_select = starts_with("d")) +#' head(df) #' } #' @export read_parquet <- function(file, @@ -124,16 +122,14 @@ read_parquet <- function(file, #' @return the input `x` invisibly. #' #' @examples -#' \donttest{ -#' if (arrow_with_parquet()) { -#' tf1 <- tempfile(fileext = ".parquet") -#' write_parquet(data.frame(x = 1:5), tf1) -#' -#' # using compression -#' if (codec_is_available("gzip")) { -#' tf2 <- tempfile(fileext = ".gz.parquet") -#' write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) -#' } +#' \dontrun{ +#' tf1 <- tempfile(fileext = ".parquet") +#' write_parquet(data.frame(x = 1:5), tf1) +#' +#' # using compression +#' if (codec_is_available("gzip")) { +#' tf2 <- tempfile(fileext = ".gz.parquet") +#' write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) #' } #' } #' @export @@ -453,16 +449,14 @@ ParquetFileWriter$create <- function(schema, #' #' @export #' @examples -#' \donttest{ -#' if (arrow_with_parquet()) { -#' f <- system.file("v0.7.1.parquet", package="arrow") -#' pq <- ParquetFileReader$create(f) -#' pq$GetSchema() -#' if (codec_is_available("snappy")) { -#' # This file has compressed data columns -#' tab <- pq$ReadTable() -#' tab$schema -#' } +#' \dontrun{ +#' f <- system.file("v0.7.1.parquet", package="arrow") +#' pq <- ParquetFileReader$create(f) +#' pq$GetSchema() +#' if (codec_is_available("snappy")) { +#' # This file has compressed data columns +#' tab <- pq$ReadTable() +#' tab$schema #' } #' } #' @include arrow-package.R diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd index 24918263395..9885802011d 100644 --- a/r/man/ParquetFileReader.Rd +++ b/r/man/ParquetFileReader.Rd @@ -45,16 +45,14 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat } \examples{ -\donttest{ -if (arrow_with_parquet()) { - f <- system.file("v0.7.1.parquet", package="arrow") - pq <- ParquetFileReader$create(f) - pq$GetSchema() - if (codec_is_available("snappy")) { - # This file has compressed data columns - tab <- pq$ReadTable() - tab$schema - } +\dontrun{ +f <- system.file("v0.7.1.parquet", package="arrow") +pq <- ParquetFileReader$create(f) +pq$GetSchema() +if (codec_is_available("snappy")) { + # This file has compressed data columns + tab <- pq$ReadTable() + tab$schema } } } diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index 5558d811a31..5d6e2e2d5b3 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -40,13 +40,11 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is This function enables you to read Parquet files into R. } \examples{ -\donttest{ -if (arrow_with_parquet()) { - tf <- tempfile() - on.exit(unlink(tf)) - write_parquet(mtcars, tf) - df <- read_parquet(tf, col_select = starts_with("d")) - head(df) -} +\dontrun{ +tf <- tempfile() +on.exit(unlink(tf)) +write_parquet(mtcars, tf) +df <- read_parquet(tf, col_select = starts_with("d")) +head(df) } } diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index 63f3ab126bd..c89c709dfb0 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -95,16 +95,14 @@ disable compression, set \code{compression = "uncompressed"}. Note that "uncompressed" columns may still have dictionary encoding. } \examples{ -\donttest{ -if (arrow_with_parquet()) { - tf1 <- tempfile(fileext = ".parquet") - write_parquet(data.frame(x = 1:5), tf1) - - # using compression - if (codec_is_available("gzip")) { - tf2 <- tempfile(fileext = ".gz.parquet") - write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) - } +\dontrun{ +tf1 <- tempfile(fileext = ".parquet") +write_parquet(data.frame(x = 1:5), tf1) + +# using compression +if (codec_is_available("gzip")) { + tf2 <- tempfile(fileext = ".gz.parquet") + write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) } } } From 1de7958988942e63382039748e5d5e0436284653 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 15:37:19 -0500 Subject: [PATCH 17/28] Add nightly test of minimal build --- dev/tasks/r/azure.linux.yml | 3 +++ dev/tasks/tasks.yml | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index 7ffe8c581cc..11ce81e6d75 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -47,6 +47,9 @@ jobs: export R_ORG={{ r_org }} export R_IMAGE={{ r_image }} export R_TAG={{ r_tag }} + export ARROW_DATASET={{ arrow_dataset|default("ON") }} + export ARROW_PARQUET={{ arrow_parquet|default("ON") }} + export ARROW_S3={{ arrow_s3|default("ON") }} # we have to export this (right?) because we need it in the build env export ARROW_R_DEV={{ not_cran }} # Note that ci/scripts/r_test.sh sets NOT_CRAN=true if ARROW_R_DEV=TRUE diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 6c3df7c724f..989c280fb7e 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1789,6 +1789,18 @@ tasks: r_image: r-base r_tag: 3.6-opensuse42 not_cran: "TRUE" + + test-r-minimal-build: + ci: azure + template: r/azure.linux.yml + params: + r_org: rocker + r_image: r-base + r_tag: latest + not_cran: "TRUE" + arrow_dataset: "OFF" + arrow_parquet: "OFF" + arrow_s3: "OFF" test-ubuntu-18.04-r-sanitizer: ci: azure From 411cb70a1ab82c231877b01bd266b0e2983d0d5b Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 18:46:27 -0500 Subject: [PATCH 18/28] Add ARROW_ env vars to .env --- dev/tasks/r/azure.linux.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index 11ce81e6d75..eea99e17d7e 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -53,6 +53,7 @@ jobs: # we have to export this (right?) because we need it in the build env export ARROW_R_DEV={{ not_cran }} # Note that ci/scripts/r_test.sh sets NOT_CRAN=true if ARROW_R_DEV=TRUE + printenv | grep '^ARROW_' >.env docker-compose run r displayName: Docker run From 1de8e1ef5c59b97030d4a204e84e3af59688cd43 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 18:48:40 -0500 Subject: [PATCH 19/28] Append don't overwrite .env --- dev/tasks/r/azure.linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index eea99e17d7e..b76a61804f1 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -53,7 +53,7 @@ jobs: # we have to export this (right?) because we need it in the build env export ARROW_R_DEV={{ not_cran }} # Note that ci/scripts/r_test.sh sets NOT_CRAN=true if ARROW_R_DEV=TRUE - printenv | grep '^ARROW_' >.env + printenv | grep '^ARROW_' >>.env docker-compose run r displayName: Docker run From 726295bd04b3103f2795d449ee8c2862181c6eda Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 21:16:16 -0500 Subject: [PATCH 20/28] Fix pipes and use a temporary .env file --- dev/tasks/r/azure.linux.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index b76a61804f1..434ba9abd2e 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -53,8 +53,10 @@ jobs: # we have to export this (right?) because we need it in the build env export ARROW_R_DEV={{ not_cran }} # Note that ci/scripts/r_test.sh sets NOT_CRAN=true if ARROW_R_DEV=TRUE - printenv | grep '^ARROW_' >>.env - docker-compose run r + cp .env temp.env + $(printenv | grep '^ARROW_' >>temp.env) + docker-compose --env-file temp.env run r + rm temp.env displayName: Docker run - script: | From 6262778e831e3cf43b22c958fb668562cb11a897 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 21:38:14 -0500 Subject: [PATCH 21/28] Look ma no pipes --- dev/tasks/r/azure.linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index 434ba9abd2e..9558ba44165 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -54,7 +54,7 @@ jobs: export ARROW_R_DEV={{ not_cran }} # Note that ci/scripts/r_test.sh sets NOT_CRAN=true if ARROW_R_DEV=TRUE cp .env temp.env - $(printenv | grep '^ARROW_' >>temp.env) + grep '^ARROW_' <(printenv) >>temp.env docker-compose --env-file temp.env run r rm temp.env displayName: Docker run From 2bb70e8aceb0e53acb3c120c52b74069ec029f90 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 22:04:28 -0500 Subject: [PATCH 22/28] Use -e to pass vars into container --- dev/tasks/r/azure.linux.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index 9558ba44165..09d7773f458 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -47,16 +47,14 @@ jobs: export R_ORG={{ r_org }} export R_IMAGE={{ r_image }} export R_TAG={{ r_tag }} - export ARROW_DATASET={{ arrow_dataset|default("ON") }} - export ARROW_PARQUET={{ arrow_parquet|default("ON") }} - export ARROW_S3={{ arrow_s3|default("ON") }} # we have to export this (right?) because we need it in the build env export ARROW_R_DEV={{ not_cran }} # Note that ci/scripts/r_test.sh sets NOT_CRAN=true if ARROW_R_DEV=TRUE - cp .env temp.env - grep '^ARROW_' <(printenv) >>temp.env - docker-compose --env-file temp.env run r - rm temp.env + docker-compose run \ + -e ARROW_DATASET={{ arrow_dataset|default("ON") }} \ + -e ARROW_PARQUET={{ arrow_parquet|default("ON") }} \ + -e ARROW_S3={{ arrow_s3|default("ON") }} \ + r displayName: Docker run - script: | From 815d45a8510cef19dd64c3cf85db1999af31db64 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 4 Mar 2021 23:23:47 -0500 Subject: [PATCH 23/28] Name the job something without "minimal" --- dev/tasks/tasks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 989c280fb7e..46718766eb3 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1790,7 +1790,7 @@ tasks: r_tag: 3.6-opensuse42 not_cran: "TRUE" - test-r-minimal-build: + test-r-without-dataset-parquet-s3: ci: azure template: r/azure.linux.yml params: From f81acbea1fd5787137ad703f5d7c438b016f1141 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 5 Mar 2021 15:52:34 -0500 Subject: [PATCH 24/28] Rename test to "minimal" and set LIBARROW_MINIMAL=true --- dev/tasks/r/azure.linux.yml | 1 + dev/tasks/tasks.yml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index 09d7773f458..1e357b0d869 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -54,6 +54,7 @@ jobs: -e ARROW_DATASET={{ arrow_dataset|default("ON") }} \ -e ARROW_PARQUET={{ arrow_parquet|default("ON") }} \ -e ARROW_S3={{ arrow_s3|default("ON") }} \ + -e LIBARROW_MINIMAL={{ libarrow_minimal|default("") }} \ r displayName: Docker run diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 46718766eb3..970f730c334 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1790,7 +1790,7 @@ tasks: r_tag: 3.6-opensuse42 not_cran: "TRUE" - test-r-without-dataset-parquet-s3: + test-r-minimal-build: ci: azure template: r/azure.linux.yml params: @@ -1801,6 +1801,7 @@ tasks: arrow_dataset: "OFF" arrow_parquet: "OFF" arrow_s3: "OFF" + libarrow_minimal: "TRUE" test-ubuntu-18.04-r-sanitizer: ci: azure From cbc7377be6a35cd2549835208787865280fca0fe Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 5 Mar 2021 16:10:10 -0500 Subject: [PATCH 25/28] Use ARROW_R_WITH_PARQUET and ARROW_R_WITH_DATASET on Windows --- r/configure.win | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/configure.win b/r/configure.win index 80529e702ac..88ac0e125e1 100644 --- a/r/configure.win +++ b/r/configure.win @@ -49,7 +49,7 @@ AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-man # NOTE: If you make changes to the libraries below, you should also change # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD -PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_DS_STATIC -DARROW_R_WITH_ARROW" +PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_DS_STATIC -DARROW_R_WITH_ARROW -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET" PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '"-L${RWINLIB}/lib"'$(R_ARCH) '"-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lutf8proc -lre2 -lthrift -lsnappy -lz -lzstd -llz4 ${MIMALLOC_LIBS} ${OPENSSL_LIBS}" # S3 support only for Rtools40 (i.e. R >= 4.0) From b5ed1c0dace3dfa93b17d8545fd8a93a96dc4153 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 5 Mar 2021 16:16:35 -0500 Subject: [PATCH 26/28] Use ARROW_R_WITH_PARQUET and ARROW_R_WITH_DATASET in Windows conda recipe --- dev/tasks/conda-recipes/r-arrow/configure.win | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/conda-recipes/r-arrow/configure.win b/dev/tasks/conda-recipes/r-arrow/configure.win index 3a6ea90f27e..82fa1795699 100755 --- a/dev/tasks/conda-recipes/r-arrow/configure.win +++ b/dev/tasks/conda-recipes/r-arrow/configure.win @@ -2,7 +2,7 @@ set -euxo pipefail -echo "PKG_CPPFLAGS=-DNDEBUG -I\"${LIBRARY_PREFIX}/include\" -I\"${PREFIX}/include\" -DARROW_R_WITH_ARROW -DARROW_R_WITH_S3" > src/Makevars.win +echo "PKG_CPPFLAGS=-DNDEBUG -I\"${LIBRARY_PREFIX}/include\" -I\"${PREFIX}/include\" -DARROW_R_WITH_ARROW -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET -DARROW_R_WITH_S3" > src/Makevars.win echo "PKG_CXXFLAGS=\$(CXX_VISIBILITY)" >> src/Makevars.win echo 'CXX_STD=CXX11' >> src/Makevars.win echo "PKG_LIBS=-L\"${LIBRARY_PREFIX}/lib\" -larrow_dataset -lparquet -larrow" >> src/Makevars.win From 96e91046ebcc2cd83289c63fe4f78c8e7841d40f Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 5 Mar 2021 19:13:44 -0800 Subject: [PATCH 27/28] Regenerate arrowExports.cpp --- r/src/arrowExports.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index aa1f34ee228..cf00c195bf6 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1159,7 +1159,7 @@ BEGIN_CPP11 END_CPP11 } #else -extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp){ +extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp){ Rf_error("Cannot call dataset___HivePartitioning(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -1174,7 +1174,7 @@ BEGIN_CPP11 END_CPP11 } #else -extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(){ +extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp){ Rf_error("Cannot call dataset___HivePartitioning__MakeFactory(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -1190,7 +1190,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectNames(SEXP sb_sexp, SEXP cols_sexp){ + Rf_error("Cannot call dataset___ScannerBuilder__ProjectNames(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // dataset.cpp +#if defined(ARROW_R_WITH_DATASET) void dataset___ScannerBuilder__ProjectExprs(const std::shared_ptr& sb, const std::vector>& exprs, const std::vector& names); extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectExprs(SEXP sb_sexp, SEXP exprs_sexp, SEXP names_sexp){ BEGIN_CPP11 @@ -1202,8 +1209,8 @@ BEGIN_CPP11 END_CPP11 } #else -extern "C" SEXP _arrow_dataset___ScannerBuilder__Project(SEXP sb_sexp, SEXP cols_sexp){ - Rf_error("Cannot call dataset___ScannerBuilder__Project(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectExprs(SEXP sb_sexp, SEXP exprs_sexp, SEXP names_sexp){ + Rf_error("Cannot call dataset___ScannerBuilder__ProjectExprs(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -4052,7 +4059,6 @@ arrow::r::r6_reset_pointer(r6); END_CPP11 return R_NilValue; } - extern "C" SEXP _arrow_available() { return Rf_ScalarLogical( #if defined(ARROW_R_WITH_ARROW) @@ -4062,7 +4068,6 @@ return Rf_ScalarLogical( #endif ); } - extern "C" SEXP _dataset_available() { return Rf_ScalarLogical( #if defined(ARROW_R_WITH_DATASET) @@ -4072,7 +4077,6 @@ return Rf_ScalarLogical( #endif ); } - extern "C" SEXP _parquet_available() { return Rf_ScalarLogical( #if defined(ARROW_R_WITH_PARQUET) @@ -4082,7 +4086,6 @@ return Rf_ScalarLogical( #endif ); } - extern "C" SEXP _s3_available() { return Rf_ScalarLogical( #if defined(ARROW_R_WITH_S3) @@ -4092,7 +4095,6 @@ return Rf_ScalarLogical( #endif ); } - static const R_CallMethodDef CallEntries[] = { { "_arrow_available", (DL_FUNC)& _arrow_available, 0 }, { "_dataset_available", (DL_FUNC)& _dataset_available, 0 }, @@ -4503,9 +4505,9 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, {NULL, NULL, 0} }; - extern "C" void R_init_arrow(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } + From 1b1680f2e8b1cc05f19ed460905db2b349d6d1e3 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 5 Mar 2021 19:37:36 -0800 Subject: [PATCH 28/28] Update dev/tasks/r/azure.linux.yml --- dev/tasks/r/azure.linux.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index 1e357b0d869..c9563d82077 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -51,9 +51,9 @@ jobs: export ARROW_R_DEV={{ not_cran }} # Note that ci/scripts/r_test.sh sets NOT_CRAN=true if ARROW_R_DEV=TRUE docker-compose run \ - -e ARROW_DATASET={{ arrow_dataset|default("ON") }} \ - -e ARROW_PARQUET={{ arrow_parquet|default("ON") }} \ - -e ARROW_S3={{ arrow_s3|default("ON") }} \ + -e ARROW_DATASET={{ arrow_dataset|default("") }} \ + -e ARROW_PARQUET={{ arrow_parquet|default("") }} \ + -e ARROW_S3={{ arrow_s3|default("") }} \ -e LIBARROW_MINIMAL={{ libarrow_minimal|default("") }} \ r displayName: Docker run