From 8974e197594eb28cbd6b1003c31c455c4a3659d0 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 6 Oct 2023 05:54:12 +0200 Subject: [PATCH 01/30] add script to fetch checksum files from artifactory --- r/.gitignore | 4 +++ r/tools/update-checksum.R | 63 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 r/tools/update-checksum.R diff --git a/r/.gitignore b/r/.gitignore index e607d2662f2..d680fb42ddd 100644 --- a/r/.gitignore +++ b/r/.gitignore @@ -20,6 +20,10 @@ arrow_*.tgz extra-tests/files .deps +# Checksums for the precompiled binaries will be added just before CRAN submission +# use `tools/update-checksums.R` to download them. +/tools/checksums/ + # C++ sources for an offline build. They're copied from the ../cpp directory, so ignore them here. /tools/cpp/ # cmake expects dotenv, NOTICE.txt, and LICENSE.txt to be available one level up diff --git a/r/tools/update-checksum.R b/r/tools/update-checksum.R new file mode 100644 index 00000000000..4be07ab06fd --- /dev/null +++ b/r/tools/update-checksum.R @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Run this script AFTER the release was voted and the artifacts +# are moved into the final dir. This script will download the checksum +# files and save them to the tools/checksums directory mirroring the +# artifactory layout. *libs.R uses these files to validated the downloaded +# binaries when installing the package. +# +# Run this script from the r/ directory of the arrow repo with the version +# as the first argument$ Rscript tools/update-checksum.R 14.0.0 + +args <- commandArgs(TRUE) +VERSION <- args[1] +tools_root <- "" + +if (length(args) != 1) { + stop("Usage: Rscript tools/update-checksum.R ") +} + +tasks_yml <- "../dev/tasks/tasks.yml" + +if (!file.exists(tasks_yml)) { + stop("Run this script from the r/ directory of the arrow repo") +} + +# Get the libarrow binary paths from the tasks.yml file +binary_paths <- readLines(tasks_yml) |> + grep("r-lib__libarrow", x = _, value = TRUE) |> + sub(".+r-lib__libarrow__bin__(.+\\.zip)", "\\1", x = _) |> + sub("{no_rc_r_version}", VERSION, fixed = TRUE, x = _) |> + sub("__", "/", x = _) |> + sub("\\.zip", ".zip", fixed = TRUE, x = _) + +artifactory_root <- "https://apache.jfrog.io/artifactory/arrow/r/%s/libarrow/bin/%s" + +# Get the checksuym file from the artifactory +for (path in binary_paths) { + sha_path <- paste0(path, ".sha512") + file <- file.path("tools/checksums", sha_path) + dirname(file) |> dir.create(path = _, recursive = TRUE, showWarnings = FALSE) + + url <- sprintf(artifactory_root, VERSION, sha_path) + download.file(url, file, quiet = TRUE, cacheOK = FALSE) + + if (!file.exists(file)) { + stop(sprintf("Failed to download %s", url)) + } +} From 357f62aaf1d3ca8a67e09f44bfd279c46bf3e0f5 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 6 Oct 2023 05:53:52 +0200 Subject: [PATCH 02/30] remove rwinlib --- r/tools/winlibs.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index b554770e40c..178497d00b3 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -48,18 +48,13 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), "/libarrow/bin/windows/arrow-%1$s.zip" ) - rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip" dev_version <- package_version(VERSION)[1, 4] # Small dev versions are added for R-only changes during CRAN submission. if (is.na(dev_version) || dev_version < "100") { VERSION <- package_version(VERSION)[1, 1:3] - get_file(rwinlib, VERSION) - # If not found, fall back to apache artifactory - if (!file.exists("lib.zip")) { - get_file(artifactory, VERSION) } } else { get_file(nightly, VERSION) From b80ee6d1f3cc87225347755fda1508d3c444dd46 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 6 Oct 2023 05:54:37 +0200 Subject: [PATCH 03/30] validate windows binary checksum --- r/tools/winlibs.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index 178497d00b3..16da393d086 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -54,7 +54,15 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { # Small dev versions are added for R-only changes during CRAN submission. if (is.na(dev_version) || dev_version < "100") { VERSION <- package_version(VERSION)[1, 1:3] + get_file(artifactory, VERSION) + checksum <- sprintf("tools/checksums/windows/arrow-%s.zip.sha512", VERSION) + checksum_ok <- system2("shasum", args = c( + "-a", "512", "-c", checksum + )) + + if (checksum_ok != 0) { + stop("*** Checksum validation failed for libarrow binary!") } } else { get_file(nightly, VERSION) From f0cfd4339f38502f57b5ee885d28d127c8a4a772 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 01:18:38 +0200 Subject: [PATCH 04/30] fix error with lineendings on windows --- r/tools/update-checksum.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/r/tools/update-checksum.R b/r/tools/update-checksum.R index 4be07ab06fd..eb241d57b6b 100644 --- a/r/tools/update-checksum.R +++ b/r/tools/update-checksum.R @@ -57,7 +57,11 @@ for (path in binary_paths) { url <- sprintf(artifactory_root, VERSION, sha_path) download.file(url, file, quiet = TRUE, cacheOK = FALSE) - if (!file.exists(file)) { - stop(sprintf("Failed to download %s", url)) + if (grepl("windows", path)) { + # UNIX style line endings cause errors with mysys2 sha512sum + sed_status <- system2("sed", args = c("-i", "s/\\r//", file)) + if (sed_status != 0) { + stop("Failed to remove \\r from windows checksum file. Exit code: ", sed_status) + } } } From eceadb4e51a97d3aa390bac17a2e33eb743c2923 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 01:26:53 +0200 Subject: [PATCH 05/30] use versioned filename to match checksum file --- r/tools/winlibs.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index 16da393d086..fc07740194b 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -32,7 +32,7 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { get_file <- function(template, version) { try( suppressWarnings( - download.file(sprintf(template, version), "lib.zip", quiet = quietly) + download.file(sprintf(template, version), zip_file, quiet = quietly) ), silent = quietly ) @@ -50,10 +50,13 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { ) dev_version <- package_version(VERSION)[1, 4] + zip_file <- sprintf("arrow-%s.zip", VERSION) # Small dev versions are added for R-only changes during CRAN submission. if (is.na(dev_version) || dev_version < "100") { VERSION <- package_version(VERSION)[1, 1:3] + zip_file <- sprintf("arrow-%s.zip", VERSION) + get_file(artifactory, VERSION) checksum <- sprintf("tools/checksums/windows/arrow-%s.zip.sha512", VERSION) @@ -62,13 +65,13 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { )) if (checksum_ok != 0) { - stop("*** Checksum validation failed for libarrow binary!") + stop("*** Checksum validation failed for libarrow binary: ", zip_file) } } else { get_file(nightly, VERSION) } } dir.create("windows", showWarnings = FALSE) - unzip("lib.zip", exdir = "windows") - unlink("lib.zip") + unzip(zip_file, exdir = "windows") + unlink(zip_file) } From f1219f2506bc76875309b267abb11539b66824be Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 01:53:47 +0200 Subject: [PATCH 06/30] add checksum validation for nixlibs.R --- r/tools/nixlibs.R | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 60deca05cd1..33009c1050b 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -28,11 +28,12 @@ if (test_mode && is.na(VERSION)) { } dev_version <- package_version(VERSION)[1, 4] +is_release <- is.na(dev_version) || dev_version < "100" on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" # Small dev versions are added for R-only changes during CRAN submission. -if (is.na(dev_version) || dev_version < "100") { +if (is_release) { VERSION <- package_version(VERSION)[1, 1:3] arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") } else { @@ -103,6 +104,25 @@ download_binary <- function(lib) { } libfile <- NULL } + + # validate binary checksum for CRAN release only + if (dir.exists("tools/checksums") && is_release || + env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { + checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1", binary_url) + checksum_file <- file.path("tools/checksums", checksum_file) + + # shasum -a is more portable than sha512sum + checksum_ok <- system2("shasum", args = c( + "-a", "512", "--status", "-c", checksum + )) + + if (checksum_ok != 0) { + cat("*** Checksum validation failed for libarrow binary: ", zip_file) + libfile <- NULL + } + cat("*** Checksum validated successfully for libarrow binary: ", zip_file) + } + libfile } From cc0d87650ebb988e3c479f19387da7664c81a5ce Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 02:06:12 +0200 Subject: [PATCH 07/30] use same logic for checksum for *libs.R --- r/tools/winlibs.R | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index fc07740194b..48089195390 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -17,6 +17,10 @@ args <- commandArgs(TRUE) VERSION <- args[1] +dev_version <- package_version(VERSION)[1, 4] +is_release <- is.na(dev_version) || dev_version < "100" +env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) + if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { if (length(args) > 1) { # Arg 2 would be the path/to/lib.zip @@ -49,26 +53,29 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { "/libarrow/bin/windows/arrow-%1$s.zip" ) - dev_version <- package_version(VERSION)[1, 4] zip_file <- sprintf("arrow-%s.zip", VERSION) # Small dev versions are added for R-only changes during CRAN submission. - if (is.na(dev_version) || dev_version < "100") { + if (is_release) { VERSION <- package_version(VERSION)[1, 1:3] zip_file <- sprintf("arrow-%s.zip", VERSION) get_file(artifactory, VERSION) + } else { + get_file(nightly, VERSION) + } + # validate binary checksum for CRAN release only + if (dir.exists("tools/checksums") && is_release || + env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { checksum <- sprintf("tools/checksums/windows/arrow-%s.zip.sha512", VERSION) - checksum_ok <- system2("shasum", args = c( - "-a", "512", "-c", checksum - )) + # rtools does not have shasum with default config + checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum)) if (checksum_ok != 0) { - stop("*** Checksum validation failed for libarrow binary: ", zip_file) + cat("*** Checksum validation failed for libarrow binary: ", zip_file) } - } else { - get_file(nightly, VERSION) + cat("*** Checksum validated successfully for libarrow binary: ", zip_file) } } dir.create("windows", showWarnings = FALSE) From 0beda0eae1c381863457869b69777c3fab8a645f Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 02:52:50 +0200 Subject: [PATCH 08/30] allow setting checksum path via envvar --- r/tools/nixlibs.R | 6 +++--- r/tools/winlibs.R | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 33009c1050b..57de45d5aef 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -30,7 +30,7 @@ if (test_mode && is.na(VERSION)) { dev_version <- package_version(VERSION)[1, 4] is_release <- is.na(dev_version) || dev_version < "100" on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" - +checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") # Small dev versions are added for R-only changes during CRAN submission. if (is_release) { @@ -106,10 +106,10 @@ download_binary <- function(lib) { } # validate binary checksum for CRAN release only - if (dir.exists("tools/checksums") && is_release || + if (dir.exists(checksum_path) && is_release || env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1", binary_url) - checksum_file <- file.path("tools/checksums", checksum_file) + checksum_file <- file.path(checksum_path, checksum_file) # shasum -a is more portable than sha512sum checksum_ok <- system2("shasum", args = c( diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index 48089195390..efa02c87390 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -20,6 +20,7 @@ VERSION <- args[1] dev_version <- package_version(VERSION)[1, 4] is_release <- is.na(dev_version) || dev_version < "100" env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) +checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { if (length(args) > 1) { @@ -66,9 +67,9 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { } # validate binary checksum for CRAN release only - if (dir.exists("tools/checksums") && is_release || + if (dir.exists(checksum_path) && is_release || env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { - checksum <- sprintf("tools/checksums/windows/arrow-%s.zip.sha512", VERSION) + checksum <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) # rtools does not have shasum with default config checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum)) From 16ae2618d7976fccae8b663d7be5df2c366d20d7 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 02:53:01 +0200 Subject: [PATCH 09/30] add checksum validation to nightly builds --- dev/tasks/r/github.packages.yml | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 760e3b6da44..4af8562ba80 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -102,12 +102,16 @@ jobs: run: | cd arrow/r/libarrow/dist zip -r $PKG_FILE lib/ include/ - + - name: Create Checksum + shell: bash + run: | + cd arrow/r/libarrow/dist + shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 - name: Upload binary artifact uses: actions/upload-artifact@v3 with: name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}' }}-openssl-{{ '${{ matrix.openssl }}' }} - path: arrow/r/libarrow/dist/arrow-*.zip + path: arrow/r/libarrow/dist/arrow-*.zip* linux-cpp: name: C++ Binary Linux OpenSSL {{ '${{ matrix.openssl }}' }} @@ -152,12 +156,16 @@ jobs: cd arrow/r/libarrow/dist # These files were created by the docker user so we have to sudo to get them sudo -E zip -r $PKG_FILE lib/ include/ - + - name: Create Checksum + shell: bash + run: | + cd arrow/r/libarrow/dist + shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 - name: Upload binary artifact uses: actions/upload-artifact@v3 with: name: r-lib__libarrow__bin__linux-openssl-{{ '${{ matrix.openssl }}' }} - path: arrow/r/libarrow/dist/arrow-*.zip + path: arrow/r/libarrow/dist/arrow-*.zip* windows-cpp: name: C++ Binary Windows RTools (40 only) @@ -181,11 +189,16 @@ jobs: ARROW_HOME: "arrow" {{ macros.github_set_sccache_envvars()|indent(8) }} run: arrow/ci/scripts/r_windows_build.sh + - name: Create Checksum + shell: bash + run: | + cd build + sha512sum arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 - name: Upload binary artifact uses: actions/upload-artifact@v3 with: name: r-lib__libarrow__bin__windows - path: build/arrow-*.zip + path: build/arrow-*.zip* r-packages: needs: [source, windows-cpp, macos-cpp] @@ -222,7 +235,6 @@ jobs: rig system add-pak {{ macros.github_setup_local_r_repo(false, true, true)|indent }} - name: Prepare Dependency Installation - shell: bash run: | tar -xzf repo/src/contrib/arrow_*.tar.gz arrow/DESCRIPTION @@ -244,6 +256,8 @@ jobs: NOT_CRAN: "false" # actions/setup-r sets this implicitly ARROW_R_DEV: "true" LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated + ARROW_R_ENFORCE_CHECKSUM: "true" + ARROW_R_CHECKSUM_path: "repo/libarrow/bin" run: | on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" From 9fbd82a698dca8b4ccdccd75c7f4d0a413b5a76d Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 03:18:32 +0200 Subject: [PATCH 10/30] add sudo to overcome docker permission --- dev/tasks/r/github.packages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 4af8562ba80..8d2f1508ae1 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -160,7 +160,7 @@ jobs: shell: bash run: | cd arrow/r/libarrow/dist - shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 + sudo shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 - name: Upload binary artifact uses: actions/upload-artifact@v3 with: From 271574cbdd6c689fdf2fb288005e503ca48e90ea Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 03:23:51 +0200 Subject: [PATCH 11/30] fix typo --- dev/tasks/r/github.packages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 8d2f1508ae1..0bdcc50064a 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -257,7 +257,7 @@ jobs: ARROW_R_DEV: "true" LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated ARROW_R_ENFORCE_CHECKSUM: "true" - ARROW_R_CHECKSUM_path: "repo/libarrow/bin" + ARROW_R_CHECKSUM_PATH: "repo/libarrow/bin" run: | on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" From b4e324f4b1501e40c99fbec5d7263410fd8e3ee6 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 04:04:17 +0200 Subject: [PATCH 12/30] chown docker build files to prevent permission issues --- dev/tasks/r/github.packages.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 0bdcc50064a..cf267ad3a30 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -153,14 +153,16 @@ jobs: PKG_FILE: arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }} run: | + # These files were created by the docker user so we have to chown them + chown -R $USER:$USER arrow/r/libarrow + cd arrow/r/libarrow/dist - # These files were created by the docker user so we have to sudo to get them - sudo -E zip -r $PKG_FILE lib/ include/ + zip -r $PKG_FILE lib/ include/ - name: Create Checksum shell: bash run: | cd arrow/r/libarrow/dist - sudo shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 + shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 - name: Upload binary artifact uses: actions/upload-artifact@v3 with: From 0428ad90c68f6512ecc8bdc88288daeec3f19744 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 04:21:33 +0200 Subject: [PATCH 13/30] use absolute checksum path --- dev/tasks/r/github.packages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index cf267ad3a30..069fdc57223 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -259,7 +259,7 @@ jobs: ARROW_R_DEV: "true" LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated ARROW_R_ENFORCE_CHECKSUM: "true" - ARROW_R_CHECKSUM_PATH: "repo/libarrow/bin" + ARROW_R_CHECKSUM_PATH: "{{ '${{ github.workspace }}' }}/repo/libarrow/bin" run: | on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" From 28ce9e70dbd9dbc112458d6640580cabb2101011 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 04:22:02 +0200 Subject: [PATCH 14/30] fix log message and checksum command --- r/tools/nixlibs.R | 6 +++--- r/tools/winlibs.R | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 57de45d5aef..025269ad6b0 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -113,14 +113,14 @@ download_binary <- function(lib) { # shasum -a is more portable than sha512sum checksum_ok <- system2("shasum", args = c( - "-a", "512", "--status", "-c", checksum + "-a", "512", "--status", "-c", checksum_file )) if (checksum_ok != 0) { - cat("*** Checksum validation failed for libarrow binary: ", zip_file) + cat("*** Checksum validation failed for libarrow binary: ", zip_file, "\n") libfile <- NULL } - cat("*** Checksum validated successfully for libarrow binary: ", zip_file) + cat("*** Checksum validated successfully for libarrow binary: ", zip_file, "\n") } libfile diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index efa02c87390..298a82dc2a9 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -74,9 +74,9 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum)) if (checksum_ok != 0) { - cat("*** Checksum validation failed for libarrow binary: ", zip_file) + stop("*** Checksum validation failed for libarrow binary: ", zip_file) } - cat("*** Checksum validated successfully for libarrow binary: ", zip_file) + cat("*** Checksum validated successfully for libarrow binary: ", zip_file, "\n") } } dir.create("windows", showWarnings = FALSE) From 5e01e62a2fed00062f3534e41bb03dbe50c0210e Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 04:35:42 +0200 Subject: [PATCH 15/30] chown needs sudo --- dev/tasks/r/github.packages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 069fdc57223..bf3440f73e7 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -154,7 +154,7 @@ jobs: VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }} run: | # These files were created by the docker user so we have to chown them - chown -R $USER:$USER arrow/r/libarrow + sudo chown -R $USER:$USER arrow/r/libarrow cd arrow/r/libarrow/dist zip -r $PKG_FILE lib/ include/ From ed758f6a9b8e0fc30c9907a0666e96f2d25a531b Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 04:43:22 +0200 Subject: [PATCH 16/30] validate checksum with linux binary --- dev/tasks/r/github.packages.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index bf3440f73e7..9970640a41c 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -435,6 +435,8 @@ jobs: LIBARROW_BINARY: "FALSE" ARROW_R_DEV: "TRUE" CMAKE_FIND_DEBUG_MODE: "ON" + ARROW_R_ENFORCE_CHECKSUM: "true" + ARROW_R_CHECKSUM_PATH: "{{ '${{ github.workspace }}' }}/repo/libarrow/bin" {{ macros.github_set_sccache_envvars()|indent(8) }} shell: Rscript {0} run: | From 6ef1495c888bfbd976110b69f2ffc130cabe4234 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 04:56:58 +0200 Subject: [PATCH 17/30] fix log msg --- r/tools/nixlibs.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 025269ad6b0..ba434d2cc6a 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -117,10 +117,11 @@ download_binary <- function(lib) { )) if (checksum_ok != 0) { - cat("*** Checksum validation failed for libarrow binary: ", zip_file, "\n") + cat("*** Checksum validation failed for libarrow binary: ", libfile, "\n") libfile <- NULL + } else { + cat("*** Checksum validated successfully for libarrow binary: ", libfile, "\n") } - cat("*** Checksum validated successfully for libarrow binary: ", zip_file, "\n") } libfile From 3aad00200f3df4c6061d66f7f5ffa5ab8466ab8c Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 05:00:03 +0200 Subject: [PATCH 18/30] fix file name --- r/tools/nixlibs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index ba434d2cc6a..170987e52ba 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -108,7 +108,7 @@ download_binary <- function(lib) { # validate binary checksum for CRAN release only if (dir.exists(checksum_path) && is_release || env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { - checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1", binary_url) + checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url) checksum_file <- file.path(checksum_path, checksum_file) # shasum -a is more portable than sha512sum From cec5269d39426f1375808d2c56cf2f1c90397f4f Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 05:22:48 +0200 Subject: [PATCH 19/30] actually test checksum with linux binary --- dev/tasks/r/github.packages.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 9970640a41c..89526bccdf5 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -351,6 +351,8 @@ jobs: ARROW_R_DEV: "TRUE" LIBARROW_BUILD: "FALSE" LIBARROW_BINARY: {{ '${{ matrix.config.libarrow_binary }}' }} + ARROW_R_ENFORCE_CHECKSUM: "true" + ARROW_R_CHECKSUM_PATH: "{{ '${{ github.workspace }}' }}/repo/libarrow/bin" shell: bash run: | Rscript -e ' @@ -435,8 +437,6 @@ jobs: LIBARROW_BINARY: "FALSE" ARROW_R_DEV: "TRUE" CMAKE_FIND_DEBUG_MODE: "ON" - ARROW_R_ENFORCE_CHECKSUM: "true" - ARROW_R_CHECKSUM_PATH: "{{ '${{ github.workspace }}' }}/repo/libarrow/bin" {{ macros.github_set_sccache_envvars()|indent(8) }} shell: Rscript {0} run: | From 88dde984514318dfa89e302299724550d85743d0 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 05:54:16 +0200 Subject: [PATCH 20/30] use versioned filename to match checksum in nixlibs --- r/tools/nixlibs.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 170987e52ba..33061aabed0 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -89,7 +89,7 @@ thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tool download_binary <- function(lib) { - libfile <- tempfile() + libfile <- paste0("arrow-", VERSION, ".zip") binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") if (try_download(binary_url, libfile)) { if (!quietly) { @@ -118,6 +118,7 @@ download_binary <- function(lib) { if (checksum_ok != 0) { cat("*** Checksum validation failed for libarrow binary: ", libfile, "\n") + unlink(libfile) libfile <- NULL } else { cat("*** Checksum validated successfully for libarrow binary: ", libfile, "\n") From 3aaecf6ae983d03e9dc1d9ca9921389a2ac2c0a2 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 06:35:02 +0200 Subject: [PATCH 21/30] fall back to sha512sum if shasum not available --- r/tools/nixlibs.R | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 33061aabed0..51329acb90e 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -110,11 +110,23 @@ download_binary <- function(lib) { env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url) checksum_file <- file.path(checksum_path, checksum_file) + checksum_cmd <- "shasum" + checksum_args <- c("--status", "-a", "512", "-c", checksum_file) + + # shasum is not available on all linux versions + status_shasum <- try( + suppressWarnings( + system2("shasum", args = c("--help"), stdout = FALSE, stderr = FALSE) + ), + silent = TRUE + ) - # shasum -a is more portable than sha512sum - checksum_ok <- system2("shasum", args = c( - "-a", "512", "--status", "-c", checksum_file - )) + if (inherits(status_shasum, "try-error") || is.integer(status_shasum) && status_shasum != 0) { + checksum_cmd <- "sha512sum" + checksum_args <- c("--status", "-c", checksum_file) + } + + checksum_ok <- system2(check_cmd, args = checksum_args) if (checksum_ok != 0) { cat("*** Checksum validation failed for libarrow binary: ", libfile, "\n") From 924286e2ba25072a0b1f211cac79688c0b0b3a3b Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 7 Oct 2023 07:08:07 +0200 Subject: [PATCH 22/30] fix typo --- r/tools/nixlibs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 51329acb90e..94169cc1ef0 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -126,7 +126,7 @@ download_binary <- function(lib) { checksum_args <- c("--status", "-c", checksum_file) } - checksum_ok <- system2(check_cmd, args = checksum_args) + checksum_ok <- system2(checksum_cmd, args = checksum_args) if (checksum_ok != 0) { cat("*** Checksum validation failed for libarrow binary: ", libfile, "\n") From 7477b23326f9478690b5e5e49e461b6f1e0393be Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sun, 8 Oct 2023 02:36:06 +0200 Subject: [PATCH 23/30] fix use of RWINLIB_LOCAL --- r/tools/winlibs.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index 298a82dc2a9..34e62d66c9c 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -30,7 +30,8 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { if (!file.exists(localfile)) { cat(sprintf("*** %s does not exist; build will fail\n", localfile)) } - file.copy(localfile, "lib.zip") + zip_file <- "lib.zip" + file.copy(localfile, zip_file) } else { # Download static arrow from the apache artifactory quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") From add36edfd951f1d23f3e4d7cf5278384096d555f Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sun, 8 Oct 2023 02:36:48 +0200 Subject: [PATCH 24/30] don't relly on env, use args explicitly --- r/tools/winlibs.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index 34e62d66c9c..3caf3ff0629 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -18,9 +18,9 @@ args <- commandArgs(TRUE) VERSION <- args[1] dev_version <- package_version(VERSION)[1, 4] +# Small dev versions are added for R-only changes during CRAN submission. is_release <- is.na(dev_version) || dev_version < "100" env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) -checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { if (length(args) > 1) { @@ -35,10 +35,10 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { } else { # Download static arrow from the apache artifactory quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") - get_file <- function(template, version) { + get_file <- function(template, version, dest_file) { try( suppressWarnings( - download.file(sprintf(template, version), zip_file, quiet = quietly) + download.file(sprintf(template, version), dest_file, quiet = quietly) ), silent = quietly ) @@ -57,22 +57,22 @@ if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { zip_file <- sprintf("arrow-%s.zip", VERSION) - # Small dev versions are added for R-only changes during CRAN submission. if (is_release) { VERSION <- package_version(VERSION)[1, 1:3] zip_file <- sprintf("arrow-%s.zip", VERSION) - get_file(artifactory, VERSION) + get_file(artifactory, VERSION, zip_file) } else { - get_file(nightly, VERSION) + get_file(nightly, VERSION, zip_file) } + checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") # validate binary checksum for CRAN release only if (dir.exists(checksum_path) && is_release || env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { - checksum <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) + checksum_file <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) # rtools does not have shasum with default config - checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum)) + checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) if (checksum_ok != 0) { stop("*** Checksum validation failed for libarrow binary: ", zip_file) From 4a9619ea90029baf0a966c57cab3cb27aeb2b373 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sun, 8 Oct 2023 03:23:23 +0200 Subject: [PATCH 25/30] unnest & refactor --- r/tools/winlibs.R | 118 +++++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 54 deletions(-) diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index 3caf3ff0629..9bab2cf8a88 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -18,69 +18,79 @@ args <- commandArgs(TRUE) VERSION <- args[1] dev_version <- package_version(VERSION)[1, 4] -# Small dev versions are added for R-only changes during CRAN submission. +# Small dev versions are added for R-only changes during CRAN submission is_release <- is.na(dev_version) || dev_version < "100" env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) +# We want to log the message in the style of the configure script +# not as an R error but still stop evaluation of this script. +lg <- function(...) { + cat("*** ", sprintf(...), "\n") +} +exit <- function(...) { + lg(...) + return() +} -if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { - if (length(args) > 1) { - # Arg 2 would be the path/to/lib.zip - localfile <- args[2] - cat(sprintf("*** Using RWINLIB_LOCAL %s\n", localfile)) - if (!file.exists(localfile)) { - cat(sprintf("*** %s does not exist; build will fail\n", localfile)) - } - zip_file <- "lib.zip" - file.copy(localfile, zip_file) - } else { - # Download static arrow from the apache artifactory - quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") - get_file <- function(template, version, dest_file) { - try( - suppressWarnings( - download.file(sprintf(template, version), dest_file, quiet = quietly) - ), - silent = quietly - ) - } +if (is_release) { + # This is a release version, so we need to use the major.minor.patch version without + # the CRAN suffix/dev_version + VERSION <- package_version(VERSION)[1, 1:3] + # %1$s uses the first variable for both substitutions + url_template <- paste0( + getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), + "/libarrow/bin/windows/arrow-%1$s.zip" + ) +} else { + url_template <- paste0( + getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), + "/libarrow/bin/windows/arrow-%s.zip" + ) +} - # URL templates - nightly <- paste0( - getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), - "/libarrow/bin/windows/arrow-%s.zip" - ) - # %1$s uses the first variable for both substitutions - artifactory <- paste0( - getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), - "/libarrow/bin/windows/arrow-%1$s.zip" - ) +if (file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { + exit("Found local Arrow %s!", VERSION) +} - zip_file <- sprintf("arrow-%s.zip", VERSION) +zip_file <- sprintf("arrow-%s.zip", VERSION) - if (is_release) { - VERSION <- package_version(VERSION)[1, 1:3] - zip_file <- sprintf("arrow-%s.zip", VERSION) +if (length(args) > 1) { + # Arg 2 would be the path/to/lib.zip + localfile <- args[2] + if (!file.exists(localfile)) { + exit("RWINLIB_LOCAL '%s' does not exist. Build will fail.", localfile) + } else { + lg("Using RWINLIB_LOCAL %s", localfile) + } + file.copy(localfile, zip_file) +} else { + quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") + binary_url <- sprintf(url_template, VERSION) + try( + suppressWarnings( + download.file(binary_url, zip_file, quiet = quietly) + ), + silent = quietly + ) - get_file(artifactory, VERSION, zip_file) - } else { - get_file(nightly, VERSION, zip_file) - } + if (!file.exists(zip_file) || file.size(zip_file) == 0) { + exit("Failed to download libarrow binary from %s. Build will fail.", binary_url) + } - checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") - # validate binary checksum for CRAN release only - if (dir.exists(checksum_path) && is_release || - env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { - checksum_file <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) - # rtools does not have shasum with default config - checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) + checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") + # validate binary checksum for CRAN release only + if (dir.exists(checksum_path) && is_release || + env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { + checksum_file <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) + # rtools does not have shasum with default config + checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) - if (checksum_ok != 0) { - stop("*** Checksum validation failed for libarrow binary: ", zip_file) - } - cat("*** Checksum validated successfully for libarrow binary: ", zip_file, "\n") + if (checksum_ok != 0) { + exit("Checksum validation failed for libarrow binary: %s", zip_file) } + lg("Checksum validated successfully for libarrow binary: %s", zip_file) } - dir.create("windows", showWarnings = FALSE) - unzip(zip_file, exdir = "windows") - unlink(zip_file) } + +dir.create("windows", showWarnings = FALSE) +unzip(zip_file, exdir = "windows") +unlink(zip_file) From 4dae43c38ba7d64c9cb189ed2ff63852079ea90c Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sun, 8 Oct 2023 03:44:17 +0200 Subject: [PATCH 26/30] fix return --- r/tools/winlibs.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index 9bab2cf8a88..8ddc2284ecc 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -22,14 +22,10 @@ dev_version <- package_version(VERSION)[1, 4] is_release <- is.na(dev_version) || dev_version < "100" env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) # We want to log the message in the style of the configure script -# not as an R error but still stop evaluation of this script. +# not as an R error. Use `return` to exit the script after logging. lg <- function(...) { cat("*** ", sprintf(...), "\n") } -exit <- function(...) { - lg(...) - return() -} if (is_release) { # This is a release version, so we need to use the major.minor.patch version without @@ -48,7 +44,8 @@ if (is_release) { } if (file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { - exit("Found local Arrow %s!", VERSION) + lg("Found local Arrow %s!", VERSION) + return() } zip_file <- sprintf("arrow-%s.zip", VERSION) @@ -57,7 +54,8 @@ if (length(args) > 1) { # Arg 2 would be the path/to/lib.zip localfile <- args[2] if (!file.exists(localfile)) { - exit("RWINLIB_LOCAL '%s' does not exist. Build will fail.", localfile) + lg("RWINLIB_LOCAL '%s' does not exist. Build will fail.", localfile) + return() } else { lg("Using RWINLIB_LOCAL %s", localfile) } @@ -73,7 +71,8 @@ if (length(args) > 1) { ) if (!file.exists(zip_file) || file.size(zip_file) == 0) { - exit("Failed to download libarrow binary from %s. Build will fail.", binary_url) + lg("Failed to download libarrow binary from %s. Build will fail.", binary_url) + return() } checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") @@ -85,7 +84,8 @@ if (length(args) > 1) { checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) if (checksum_ok != 0) { - exit("Checksum validation failed for libarrow binary: %s", zip_file) + lg("Checksum validation failed for libarrow binary: %s", zip_file) + return() } lg("Checksum validated successfully for libarrow binary: %s", zip_file) } From 7a6c452aba7d9ca23130e7497733af8be4160743 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 11 Oct 2023 03:05:38 +0200 Subject: [PATCH 27/30] allow checksum bypass via envvar --- r/tools/nixlibs.R | 9 ++++++--- r/tools/winlibs.R | 8 ++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 94169cc1ef0..1346e209d8d 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -104,10 +104,13 @@ download_binary <- function(lib) { } libfile <- NULL } - + # Explicitly setting the env var to "false" will skip checksum validation + # e.g. in case the included checksums are stale. + skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") + enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") # validate binary checksum for CRAN release only - if (dir.exists(checksum_path) && is_release || - env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { + if (!skip_checksum && dir.exists(checksum_path) && is_release || + enforce_checksum) { checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url) checksum_file <- file.path(checksum_path, checksum_file) checksum_cmd <- "shasum" diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index 8ddc2284ecc..314062044dc 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -76,9 +76,13 @@ if (length(args) > 1) { } checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") + # Explicitly setting the env var to "false" will skip checksum validation + # e.g. in case the included checksums are stale. + skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") + enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") # validate binary checksum for CRAN release only - if (dir.exists(checksum_path) && is_release || - env_is("ARROW_R_ENFORCE_CHECKSUM", "true")) { + if (!skip_checksum && dir.exists(checksum_path) && is_release || + enforce_checksum) { checksum_file <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) # rtools does not have shasum with default config checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) From 4ff5d4ca97e1d010160c524fff06aece25452d21 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 11 Oct 2023 03:07:07 +0200 Subject: [PATCH 28/30] add checksum download to release checklist --- r/Makefile | 2 +- r/PACKAGING.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/r/Makefile b/r/Makefile index 3679840ca9f..514f5adf4e1 100644 --- a/r/Makefile +++ b/r/Makefile @@ -67,5 +67,5 @@ clean: -rm src/Makevars.win -rm -rf arrow.Rcheck/ -rm -rf libarrow/ - -rm -rf tools/cpp/ tools/.env tools/NOTICE.txt tools/LICENSE.txt + -rm -rf tools/cpp/ tools/.env tools/NOTICE.txt tools/LICENSE.txt tools/checksums -find . -name "*.orig" -delete diff --git a/r/PACKAGING.md b/r/PACKAGING.md index edfca651e9d..3140ffe2b2b 100644 --- a/r/PACKAGING.md +++ b/r/PACKAGING.md @@ -100,6 +100,7 @@ instead of the *release candidate*: - [ ] Create a PR entitled `WIP: [R] Verify CRAN release-10.0.1-rc0`. Add a comment `@github-actions crossbow submit --group r` to run all R crossbow jobs against the CRAN-specific release branch. +- [ ] Run `Rscript tools/update-checksums.R ` to download the checksums for the pre-compiled binaries from the ASF artifactory into the tools directory. - [ ] Regenerate arrow_X.X.X.tar.gz (i.e., `make build`) Ensure linux binary packages are available: From 4c8511ea62f0601750966f54aa2c17757db09fb2 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 11 Oct 2023 03:11:40 +0200 Subject: [PATCH 29/30] fix name of update script --- r/tools/{update-checksum.R => update-checksums.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/tools/{update-checksum.R => update-checksums.R} (100%) diff --git a/r/tools/update-checksum.R b/r/tools/update-checksums.R similarity index 100% rename from r/tools/update-checksum.R rename to r/tools/update-checksums.R From dd292b667b56a45a171276b1cb5d8dd0f8f076b3 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 11 Oct 2023 03:12:15 +0200 Subject: [PATCH 30/30] update usage --- r/tools/update-checksums.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tools/update-checksums.R b/r/tools/update-checksums.R index eb241d57b6b..2aa9df31716 100644 --- a/r/tools/update-checksums.R +++ b/r/tools/update-checksums.R @@ -29,7 +29,7 @@ VERSION <- args[1] tools_root <- "" if (length(args) != 1) { - stop("Usage: Rscript tools/update-checksum.R ") + stop("Usage: Rscript tools/update-checksums.R ") } tasks_yml <- "../dev/tasks/tasks.yml"