From 3c62f882ab9bdd670aa18a14c590390b669ed828 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 19 Jun 2024 15:02:03 -0500 Subject: [PATCH 1/6] Turn S3 on by default for macos --- r/tools/nixlibs.R | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index def4d35f825..7eeb9c9e1e4 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -536,7 +536,7 @@ build_libarrow <- function(src_dir, dst_dir) { } cleanup(build_dir) - env_var_list <- c( + env_var_list <- list( SOURCE_DIR = src_dir, BUILD_DIR = build_dir, DEST_DIR = dst_dir, @@ -574,6 +574,8 @@ build_libarrow <- function(src_dir, dst_dir) { env_var_list <- c(env_var_list, setNames("BUNDLED", env_var)) } } + # We also _do_ want to enable S3 by default + env_var_list <- c(env_var_list, c(ARROW_S3 = "ON")) } env_var_list <- with_cloud_support(env_var_list) @@ -814,8 +816,14 @@ set_thirdparty_urls <- function(env_var_list) { env_var_list } -is_feature_requested <- function(env_varname, default = env_is("LIBARROW_MINIMAL", "false")) { - env_value <- tolower(Sys.getenv(env_varname)) +is_feature_requested <- function(env_varname, env_var_list, default = env_is("LIBARROW_MINIMAL", "false")) { + # look in our env_var_list first, if it's not found there go to + # the actual environment + env_value <- tolower(env_var_list[[env_varname]]) + if (is.null(env_value)) { + env_value <- tolower(Sys.getenv(env_varname)) + } + if (identical(env_value, "off")) { # If e.g. ARROW_MIMALLOC=OFF explicitly, override default requested <- FALSE @@ -828,8 +836,8 @@ is_feature_requested <- function(env_varname, default = env_is("LIBARROW_MINIMAL } with_cloud_support <- function(env_var_list) { - arrow_s3 <- is_feature_requested("ARROW_S3") - arrow_gcs <- is_feature_requested("ARROW_GCS") + arrow_s3 <- is_feature_requested("ARROW_S3", env_var_list) + arrow_gcs <- is_feature_requested("ARROW_GCS", env_var_list) if (arrow_s3 || arrow_gcs) { # User wants S3 or GCS support. From d32e1fad36852a2622e5f8d82c0d43e50008318f Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 19 Jun 2024 15:41:23 -0500 Subject: [PATCH 2/6] Also GCS and ZSTD so we avoid the message about lacking --- r/tools/nixlibs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 7eeb9c9e1e4..f2febcbf37c 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -574,8 +574,8 @@ build_libarrow <- function(src_dir, dst_dir) { env_var_list <- c(env_var_list, setNames("BUNDLED", env_var)) } } - # We also _do_ want to enable S3 by default - env_var_list <- c(env_var_list, c(ARROW_S3 = "ON")) + # We also _do_ want to enable S3, GCS, and ZSTD by default + env_var_list <- c(env_var_list, c(ARROW_S3 = "ON", ARROW_GCS = "ON", ARROW_WITH_ZSTD = "ON")) } env_var_list <- with_cloud_support(env_var_list) From 46e33dbb59c944084bfa2c5b60bd080d1a9e9b5f Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 20 Jun 2024 07:38:16 -0500 Subject: [PATCH 3/6] Update r/tools/nixlibs.R Co-authored-by: Neal Richardson --- r/tools/nixlibs.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index f2febcbf37c..2d6f5ca4a5f 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -575,7 +575,13 @@ build_libarrow <- function(src_dir, dst_dir) { } } # We also _do_ want to enable S3, GCS, and ZSTD by default - env_var_list <- c(env_var_list, c(ARROW_S3 = "ON", ARROW_GCS = "ON", ARROW_WITH_ZSTD = "ON")) + # so that binaries built on CRAN from source are fully featured + env_var_list <- c( + env_var_list, + ARROW_S3 = Sys.getenv("ARROW_S3", "ON"), + ARROW_GCS = Sys.getenv("ARROW_GCS", "ON"), + ARROW_WITH_ZSTD = Sys.getenv("ARROW_WITH_ZSTD", "ON") + ) } env_var_list <- with_cloud_support(env_var_list) From 548b6b2961ac4d3adb7e254ac7e9da0f1280b69d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 20 Jun 2024 17:42:07 -0500 Subject: [PATCH 4/6] Remove GCS to lessen attack surface --- r/R/arrow-info.R | 2 +- r/tools/nixlibs.R | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/r/R/arrow-info.R b/r/R/arrow-info.R index 916b6683fbc..ddeb0f04efa 100644 --- a/r/R/arrow-info.R +++ b/r/R/arrow-info.R @@ -140,7 +140,7 @@ some_features_are_off <- function(features) { # `features` is a named logical vector (as in arrow_info()$capabilities) # Let's exclude some less relevant ones # jemalloc is only included because it is sometimes disabled in our build process - blocklist <- c("lzo", "bz2", "brotli", "substrait", "jemalloc") + blocklist <- c("lzo", "bz2", "brotli", "substrait", "jemalloc", "gcs") # Return TRUE if any of the other features are FALSE !all(features[setdiff(names(features), blocklist)]) } diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 2d6f5ca4a5f..51f8834599a 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -574,12 +574,12 @@ build_libarrow <- function(src_dir, dst_dir) { env_var_list <- c(env_var_list, setNames("BUNDLED", env_var)) } } - # We also _do_ want to enable S3, GCS, and ZSTD by default + # We also _do_ want to enable S3 and ZSTD by default # so that binaries built on CRAN from source are fully featured + # but defer to the env vars if those are set env_var_list <- c( env_var_list, ARROW_S3 = Sys.getenv("ARROW_S3", "ON"), - ARROW_GCS = Sys.getenv("ARROW_GCS", "ON"), ARROW_WITH_ZSTD = Sys.getenv("ARROW_WITH_ZSTD", "ON") ) } @@ -822,14 +822,12 @@ set_thirdparty_urls <- function(env_var_list) { env_var_list } +# this is generally about features that people asked for via environment variables, but +# for some cases (like S3 when we override it in this script) we might find those in +# env_var_list is_feature_requested <- function(env_varname, env_var_list, default = env_is("LIBARROW_MINIMAL", "false")) { - # look in our env_var_list first, if it's not found there go to - # the actual environment - env_value <- tolower(env_var_list[[env_varname]]) - if (is.null(env_value)) { - env_value <- tolower(Sys.getenv(env_varname)) - } - + # look in the environment first, but then use the env_var_list if nothing is found + env_value <- tolower(Sys.getenv(env_varname, env_var_list[[env_varname]])) if (identical(env_value, "off")) { # If e.g. ARROW_MIMALLOC=OFF explicitly, override default requested <- FALSE From b23878ae76139230da0042204f293d148f53f25d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Thu, 20 Jun 2024 18:47:08 -0500 Subject: [PATCH 5/6] Oops, can't use NULL in unset --- r/tools/nixlibs.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 51f8834599a..fc79e063280 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -827,7 +827,11 @@ set_thirdparty_urls <- function(env_var_list) { # env_var_list is_feature_requested <- function(env_varname, env_var_list, default = env_is("LIBARROW_MINIMAL", "false")) { # look in the environment first, but then use the env_var_list if nothing is found - env_value <- tolower(Sys.getenv(env_varname, env_var_list[[env_varname]])) + env_var_list_value <- env_var_list[[env_varname]] + if (is.null(env_var_list_value)) { + env_var_list_value <- "" + } + env_value <- tolower(Sys.getenv(env_varname, env_var_list_value)) if (identical(env_value, "off")) { # If e.g. ARROW_MIMALLOC=OFF explicitly, override default requested <- FALSE From 82b64d2c79ae3cec084b899019b7a5a9c97a3f27 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 21 Jun 2024 16:29:19 -0500 Subject: [PATCH 6/6] turn off shared snappy --- r/inst/build_arrow_static.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 825a230e78e..fc3a173294b 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -65,6 +65,7 @@ esac mkdir -p "${BUILD_DIR}" pushd "${BUILD_DIR}" ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ + -DARROW_SNAPPY_USE_SHARED=OFF \ -DARROW_BUILD_TESTS=OFF \ -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_STATIC=ON \