From 3c4dfb0e005eb13b9e1d6809818bf15d8eb79374 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 31 Oct 2023 14:21:18 -0400 Subject: [PATCH 1/8] maybe clean up find_latest_nightly() --- r/tools/nixlibs.R | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 63c185ce545..59f5227dd30 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -35,7 +35,8 @@ exit <- function(..., .status = 1) { # checks the nightly repo for the latest nightly version X.Y.Z.100 -find_latest_nightly <- function(description_version) { +find_latest_nightly <- function(description_version, + list_uri = "https://nightlies.apache.org/arrow/r/src/contrib") { if (!startsWith(arrow_repo, "https://nightlies.apache.org/arrow/r")) { lg("Detected non standard dev repo: %s, not checking latest nightly version.", arrow_repo) return(description_version) @@ -44,17 +45,27 @@ find_latest_nightly <- function(description_version) { res <- try( { # Binaries are only uploaded if all jobs pass so can just look at the source versions. - urls <- readLines("https://nightlies.apache.org/arrow/r/src/contrib") + urls <- readLines(list_uri) versions <- grep("arrow_.*\\.tar\\.gz", urls, value = TRUE) versions <- sub(".*arrow_(.*)\\.tar\\.gz.*", "\\1", x = versions) - versions <- sapply(versions, package_version) + versions <- lapply(versions, package_version) versions <- data.frame(do.call(rbind, versions)) - matching_major <- versions[versions$X1 == description_version[1, 1], ] - latest <- matching_major[which.max(matching_major$X4), ] + matching_major <- versions[versions$X1 == description_version[1, 1], , drop = FALSE] + if (nrow(matching_major) == 0) { + lg( + "No nightly binaries were found for version %s: falling back to libarrow build from source", + description_version + ) + + return(description_version) + } + + latest <- matching_major[which.max(matching_major$X4), , drop = TRUE] package_version(paste0(latest, collapse = ".")) }, silent = quietly ) + if (inherits(res, "try-error")) { lg("Failed to find latest nightly for %s", description_version) latest <- description_version From 8218475c3af1bc891092c18d2598f5d92a54ab33 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 2 Nov 2023 09:24:15 -0400 Subject: [PATCH 2/8] slightly better nightly testing --- r/tools/nixlibs.R | 24 ++++++++++------ r/tools/test-nixlibs.R | 64 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 9 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 59f5227dd30..2ee2ab34376 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -36,7 +36,7 @@ exit <- function(..., .status = 1) { # checks the nightly repo for the latest nightly version X.Y.Z.100 find_latest_nightly <- function(description_version, - list_uri = "https://nightlies.apache.org/arrow/r/src/contrib") { + list_uri = "https://nightlies.apache.org/arrow/r/src/contrib/PACKAGES") { if (!startsWith(arrow_repo, "https://nightlies.apache.org/arrow/r")) { lg("Detected non standard dev repo: %s, not checking latest nightly version.", arrow_repo) return(description_version) @@ -46,12 +46,18 @@ find_latest_nightly <- function(description_version, { # Binaries are only uploaded if all jobs pass so can just look at the source versions. urls <- readLines(list_uri) - versions <- grep("arrow_.*\\.tar\\.gz", urls, value = TRUE) - versions <- sub(".*arrow_(.*)\\.tar\\.gz.*", "\\1", x = versions) - versions <- lapply(versions, package_version) - versions <- data.frame(do.call(rbind, versions)) - matching_major <- versions[versions$X1 == description_version[1, 1], , drop = FALSE] - if (nrow(matching_major) == 0) { + versions <- grep("Version:\\s*.*?", urls, value = TRUE) + versions <- sort(package_version(sub("Version:\\s*", "\\1", versions))) + major_versions <- vapply( + versions, + function(x) as.integer(x[[c(1, 1)]]), + integer(1) + ) + + description_version_major <- as.integer(description_version[1, 1]) + matching_major <- major_versions == description_version_major + if (!any(matching_major)) { + warning("just for the stack trace") lg( "No nightly binaries were found for version %s: falling back to libarrow build from source", description_version @@ -60,8 +66,8 @@ find_latest_nightly <- function(description_version, return(description_version) } - latest <- matching_major[which.max(matching_major$X4), , drop = TRUE] - package_version(paste0(latest, collapse = ".")) + versions <- versions[matching_major] + versions[[length(versions)]] }, silent = quietly ) diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R index c9571b58b7b..a217f463c60 100644 --- a/r/tools/test-nixlibs.R +++ b/r/tools/test-nixlibs.R @@ -155,3 +155,67 @@ test_that("check_allowlist", { expect_true(check_allowlist("redhat", tempfile())) # remote allowlist doesn't exist, so we fall back to the default list, which contains redhat expect_false(check_allowlist("debian", tempfile())) }) + +test_that("find_latest_nightly()", { + tf <- tempfile() + tf_uri <- paste0("file://", tf) + on.exit(unlink(tf)) + + writeLines( + c( + "Version: 13.0.0.100000333", + "Version: 13.0.0.100000334", + "Version: 13.0.0.100000335", + "Version: 14.0.0.100000001" + ), + tf + ) + + expect_output( + expect_identical( + find_latest_nightly(package_version("13.0.1.9000"), list_uri = tf_uri), + package_version("13.0.0.100000335") + ), + "Found latest nightly" + ) + + expect_output( + expect_identical( + find_latest_nightly(package_version("14.0.0.9000"), list_uri = tf_uri), + package_version("14.0.0.100000001") + ), + "Found latest nightly" + ) + + expect_output( + expect_identical( + find_latest_nightly(package_version("15.0.0.9000"), list_uri = tf_uri), + package_version("15.0.0.9000") + ), + "No nightly binaries were found for version" + ) + + # Check empty input + writeLines(character(), tf) + expect_output( + expect_identical( + find_latest_nightly(package_version("15.0.0.9000"), list_uri = tf_uri), + package_version("15.0.0.9000") + ), + "No nightly binaries were found for version" + ) + + # Check input that will throw an error + expect_output( + expect_identical( + suppressWarnings( + find_latest_nightly( + package_version("15.0.0.9000"), + list_uri = "this is not a URI" + ) + ), + package_version("15.0.0.9000") + ), + "Failed to find latest nightly" + ) +}) From d04504397fc5eea055d50fec12a34ae8d300435c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 3 Nov 2023 08:28:59 -0400 Subject: [PATCH 3/8] maybe default LIBARROW_BINARY to false --- ci/docker/linux-r.dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile index d368a6629c5..4acb704f02f 100644 --- a/ci/docker/linux-r.dockerfile +++ b/ci/docker/linux-r.dockerfile @@ -36,6 +36,9 @@ ENV R_PRUNE_DEPS=${r_prune_deps} ARG r_custom_ccache=false ENV R_CUSTOM_CCACHE=${r_custom_ccache} +ARG r_libarrow_binary=FALSE +ENV LIBARROW_BINARY=${r_libarrow_binary} + ARG tz="UTC" ENV TZ=${tz} From 0709f307db7f35a564d3d36edb5be1e100bb8158 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 3 Nov 2023 11:01:14 -0400 Subject: [PATCH 4/8] remove a debug --- r/tools/nixlibs.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 2ee2ab34376..21163c7faad 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -57,7 +57,6 @@ find_latest_nightly <- function(description_version, description_version_major <- as.integer(description_version[1, 1]) matching_major <- major_versions == description_version_major if (!any(matching_major)) { - warning("just for the stack trace") lg( "No nightly binaries were found for version %s: falling back to libarrow build from source", description_version From ff398edc951caffa7cc73cc68e40f435587011ea Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 3 Nov 2023 11:08:24 -0400 Subject: [PATCH 5/8] maybe fix logic --- ci/docker/linux-r.dockerfile | 3 --- r/tools/nixlibs.R | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile index 4acb704f02f..d368a6629c5 100644 --- a/ci/docker/linux-r.dockerfile +++ b/ci/docker/linux-r.dockerfile @@ -36,9 +36,6 @@ ENV R_PRUNE_DEPS=${r_prune_deps} ARG r_custom_ccache=false ENV R_CUSTOM_CCACHE=${r_custom_ccache} -ARG r_libarrow_binary=FALSE -ENV LIBARROW_BINARY=${r_libarrow_binary} - ARG tz="UTC" ENV TZ=${tz} diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 21163c7faad..f4492fc4405 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -883,6 +883,8 @@ build_ok <- !env_is("LIBARROW_BUILD", "false") # https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds) download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") +download_libarrow_ok <- download_ok && !env_is("LIBARROW_DOWNLOAD", "false") + # This "tools/thirdparty_dependencies" path, within the tar file, might exist if # create_package_with_all_dependencies() was run, or if someone has created it # manually before running make build. @@ -920,7 +922,7 @@ if (!test_mode && !file.exists(api_h)) { lg("File not found: %s ($ARROW_DOWNLOADED_BINARIES)", bin_zip) bin_file <- NULL } - } else if (download_ok) { + } else if (download_libarrow_ok) { binary_flavor <- identify_binary() if (!is.null(binary_flavor)) { # The env vars say we can, and we've determined a lib that should work From 0f2783f2d168597e9dcce4a2ff2c0ba33c0517f4 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 7 Nov 2023 22:10:54 -0400 Subject: [PATCH 6/8] ensure quiet nixlibs.R tests --- r/tools/nixlibs.R | 14 +++++++++----- r/tools/test-nixlibs.R | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index f4492fc4405..0005de0116c 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -36,7 +36,8 @@ exit <- function(..., .status = 1) { # checks the nightly repo for the latest nightly version X.Y.Z.100 find_latest_nightly <- function(description_version, - list_uri = "https://nightlies.apache.org/arrow/r/src/contrib/PACKAGES") { + list_uri = "https://nightlies.apache.org/arrow/r/src/contrib/PACKAGES", + hush = quietly) { if (!startsWith(arrow_repo, "https://nightlies.apache.org/arrow/r")) { lg("Detected non standard dev repo: %s, not checking latest nightly version.", arrow_repo) return(description_version) @@ -68,7 +69,7 @@ find_latest_nightly <- function(description_version, versions <- versions[matching_major] versions[[length(versions)]] }, - silent = quietly + silent = hush ) if (inherits(res, "try-error")) { @@ -848,16 +849,19 @@ quietly <- !env_is("ARROW_R_DEV", "true") not_cran <- env_is("NOT_CRAN", "true") -if (is_release & !test_mode) { +if (is_release) { VERSION <- VERSION[1, 1:3] arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") -} else if(!test_mode) { +} else { not_cran <- TRUE arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") +} + +if (!is_release && !test_mode) { VERSION <- find_latest_nightly(VERSION) } -# To collect dirs to rm on exit, use del() to add dirs +# To collect dirs to rm on exit, use cleanup() to add dirs # we reset it to avoid errors on reruns in the same session. options(.arrow.cleanup = character()) on.exit(unlink(getOption(".arrow.cleanup"), recursive = TRUE), add = TRUE) diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R index a217f463c60..f97a80ccc29 100644 --- a/r/tools/test-nixlibs.R +++ b/r/tools/test-nixlibs.R @@ -211,7 +211,8 @@ test_that("find_latest_nightly()", { suppressWarnings( find_latest_nightly( package_version("15.0.0.9000"), - list_uri = "this is not a URI" + list_uri = "this is not a URI", + hush = TRUE ) ), package_version("15.0.0.9000") From 1361154d6a73d0b2ed45741a985629d97188138a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Nov 2023 15:34:02 -0400 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Jonathan Keane --- r/tools/nixlibs.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 0005de0116c..436eefcfcfe 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -49,11 +49,7 @@ find_latest_nightly <- function(description_version, urls <- readLines(list_uri) versions <- grep("Version:\\s*.*?", urls, value = TRUE) versions <- sort(package_version(sub("Version:\\s*", "\\1", versions))) - major_versions <- vapply( - versions, - function(x) as.integer(x[[c(1, 1)]]), - integer(1) - ) + major_versions <- versions$major description_version_major <- as.integer(description_version[1, 1]) matching_major <- major_versions == description_version_major @@ -67,7 +63,7 @@ find_latest_nightly <- function(description_version, } versions <- versions[matching_major] - versions[[length(versions)]] + max(versions) }, silent = hush ) From 371e003ba2aba3cdf4a82b967d439722758a6e00 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Nov 2023 15:39:36 -0400 Subject: [PATCH 8/8] document LIBARROW_DOWNLOAD --- r/vignettes/install.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd index 20fe4ed8961..10155e3a8cd 100644 --- a/r/vignettes/install.Rmd +++ b/r/vignettes/install.Rmd @@ -283,6 +283,7 @@ the bundled build script. All boolean variables are case-insensitive. | --- | --- | :-: | | `LIBARROW_BUILD` | Allow building from source | `true` | | `LIBARROW_BINARY` | Try to install `libarrow` binary instead of building from source | (unset) | +| `LIBARROW_DOWNLOAD` | Set to `false` to explicitly forbid fetching a `libarrow` binary | (unset) | | `LIBARROW_MINIMAL` | Build with minimal features enabled | (unset) | | `NOT_CRAN` | Set `LIBARROW_BINARY=true` and `LIBARROW_MINIMAL=false` | `false` | | `ARROW_R_DEV` | More verbose messaging and regenerates some code | `false` |