-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-37941: [R][CI][Release] Add checksum verification for pre-compiled binaries #38115
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8974e19
357f62a
b80ee6d
f0cfd43
eceadb4
f1219f2
cc0d876
0beda0e
16ae261
9fbd82a
271574c
b4e324f
0428ad9
28ce9e7
5e01e62
ed758f6
6ef1495
3aad002
cec5269
88dde98
3aaecf6
924286e
7477b23
add36ed
4a9619e
4dae43c
7a6c452
4ff5d4c
77fe890
4c8511e
dd292b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,11 +28,12 @@ if (test_mode && is.na(VERSION)) { | |
| } | ||
|
|
||
| dev_version <- package_version(VERSION)[1, 4] | ||
| is_release <- is.na(dev_version) || dev_version < "100" | ||
| on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" | ||
|
|
||
| checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") | ||
|
|
||
| # Small dev versions are added for R-only changes during CRAN submission. | ||
| if (is.na(dev_version) || dev_version < "100") { | ||
| if (is_release) { | ||
| VERSION <- package_version(VERSION)[1, 1:3] | ||
| arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") | ||
| } else { | ||
|
|
@@ -88,7 +89,7 @@ thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tool | |
|
|
||
|
|
||
| download_binary <- function(lib) { | ||
| libfile <- tempfile() | ||
| libfile <- paste0("arrow-", VERSION, ".zip") | ||
| binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") | ||
| if (try_download(binary_url, libfile)) { | ||
| if (!quietly) { | ||
|
|
@@ -103,6 +104,42 @@ download_binary <- function(lib) { | |
| } | ||
| libfile <- NULL | ||
| } | ||
| # Explicitly setting the env var to "false" will skip checksum validation | ||
| # e.g. in case the included checksums are stale. | ||
| skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") | ||
| enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") | ||
| # validate binary checksum for CRAN release only | ||
| if (!skip_checksum && dir.exists(checksum_path) && is_release || | ||
| enforce_checksum) { | ||
paleolimbot marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url) | ||
assignUser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| checksum_file <- file.path(checksum_path, checksum_file) | ||
| checksum_cmd <- "shasum" | ||
| checksum_args <- c("--status", "-a", "512", "-c", checksum_file) | ||
|
|
||
| # shasum is not available on all linux versions | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah nice 👍 |
||
| status_shasum <- try( | ||
| suppressWarnings( | ||
| system2("shasum", args = c("--help"), stdout = FALSE, stderr = FALSE) | ||
| ), | ||
| silent = TRUE | ||
| ) | ||
|
|
||
| if (inherits(status_shasum, "try-error") || is.integer(status_shasum) && status_shasum != 0) { | ||
| checksum_cmd <- "sha512sum" | ||
| checksum_args <- c("--status", "-c", checksum_file) | ||
| } | ||
|
|
||
| checksum_ok <- system2(checksum_cmd, args = checksum_args) | ||
assignUser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (checksum_ok != 0) { | ||
| cat("*** Checksum validation failed for libarrow binary: ", libfile, "\n") | ||
| unlink(libfile) | ||
| libfile <- NULL | ||
| } else { | ||
| cat("*** Checksum validated successfully for libarrow binary: ", libfile, "\n") | ||
| } | ||
| } | ||
|
|
||
| libfile | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. | ||
|
|
||
| # Run this script AFTER the release was voted and the artifacts | ||
| # are moved into the final dir. This script will download the checksum | ||
| # files and save them to the tools/checksums directory mirroring the | ||
| # artifactory layout. *libs.R uses these files to validated the downloaded | ||
| # binaries when installing the package. | ||
| # | ||
| # Run this script from the r/ directory of the arrow repo with the version | ||
| # as the first argument$ Rscript tools/update-checksum.R 14.0.0 | ||
|
|
||
| args <- commandArgs(TRUE) | ||
| VERSION <- args[1] | ||
| tools_root <- "" | ||
|
|
||
| if (length(args) != 1) { | ||
| stop("Usage: Rscript tools/update-checksums.R <version>") | ||
| } | ||
|
|
||
| tasks_yml <- "../dev/tasks/tasks.yml" | ||
|
|
||
| if (!file.exists(tasks_yml)) { | ||
| stop("Run this script from the r/ directory of the arrow repo") | ||
| } | ||
|
|
||
| # Get the libarrow binary paths from the tasks.yml file | ||
| binary_paths <- readLines(tasks_yml) |> | ||
| grep("r-lib__libarrow", x = _, value = TRUE) |> | ||
| sub(".+r-lib__libarrow__bin__(.+\\.zip)", "\\1", x = _) |> | ||
| sub("{no_rc_r_version}", VERSION, fixed = TRUE, x = _) |> | ||
| sub("__", "/", x = _) |> | ||
| sub("\\.zip", ".zip", fixed = TRUE, x = _) | ||
|
|
||
| artifactory_root <- "https://apache.jfrog.io/artifactory/arrow/r/%s/libarrow/bin/%s" | ||
|
|
||
| # Get the checksuym file from the artifactory | ||
assignUser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| for (path in binary_paths) { | ||
| sha_path <- paste0(path, ".sha512") | ||
| file <- file.path("tools/checksums", sha_path) | ||
| dirname(file) |> dir.create(path = _, recursive = TRUE, showWarnings = FALSE) | ||
|
|
||
| url <- sprintf(artifactory_root, VERSION, sha_path) | ||
| download.file(url, file, quiet = TRUE, cacheOK = FALSE) | ||
|
|
||
| if (grepl("windows", path)) { | ||
| # UNIX style line endings cause errors with mysys2 sha512sum | ||
| sed_status <- system2("sed", args = c("-i", "s/\\r//", file)) | ||
| if (sed_status != 0) { | ||
| stop("Failed to remove \\r from windows checksum file. Exit code: ", sed_status) | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,55 +17,84 @@ | |
|
|
||
| args <- commandArgs(TRUE) | ||
| VERSION <- args[1] | ||
| if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { | ||
| if (length(args) > 1) { | ||
| # Arg 2 would be the path/to/lib.zip | ||
| localfile <- args[2] | ||
| cat(sprintf("*** Using RWINLIB_LOCAL %s\n", localfile)) | ||
| if (!file.exists(localfile)) { | ||
| cat(sprintf("*** %s does not exist; build will fail\n", localfile)) | ||
| } | ||
| file.copy(localfile, "lib.zip") | ||
| } else { | ||
| # Download static arrow from the apache artifactory | ||
| quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") | ||
| get_file <- function(template, version) { | ||
| try( | ||
| suppressWarnings( | ||
| download.file(sprintf(template, version), "lib.zip", quiet = quietly) | ||
| ), | ||
| silent = quietly | ||
| ) | ||
| } | ||
| dev_version <- package_version(VERSION)[1, 4] | ||
| # Small dev versions are added for R-only changes during CRAN submission | ||
| is_release <- is.na(dev_version) || dev_version < "100" | ||
| env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) | ||
| # We want to log the message in the style of the configure script | ||
| # not as an R error. Use `return` to exit the script after logging. | ||
| lg <- function(...) { | ||
| cat("*** ", sprintf(...), "\n") | ||
| } | ||
|
|
||
| # URL templates | ||
| nightly <- paste0( | ||
| getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), | ||
| "/libarrow/bin/windows/arrow-%s.zip" | ||
| ) | ||
| # %1$s uses the first variable for both substitutions | ||
| artifactory <- paste0( | ||
| getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), | ||
| "/libarrow/bin/windows/arrow-%1$s.zip" | ||
| ) | ||
| rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip" | ||
| if (is_release) { | ||
| # This is a release version, so we need to use the major.minor.patch version without | ||
| # the CRAN suffix/dev_version | ||
| VERSION <- package_version(VERSION)[1, 1:3] | ||
| # %1$s uses the first variable for both substitutions | ||
| url_template <- paste0( | ||
| getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), | ||
| "/libarrow/bin/windows/arrow-%1$s.zip" | ||
| ) | ||
| } else { | ||
| url_template <- paste0( | ||
assignUser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), | ||
| "/libarrow/bin/windows/arrow-%s.zip" | ||
| ) | ||
| } | ||
|
|
||
| dev_version <- package_version(VERSION)[1, 4] | ||
| if (file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was previously before the truncation of the version string so theoretically having a local 13.0.0 version of arrow would not have worked with package 13.0.0.1. Likely never happened but still ^^ |
||
| lg("Found local Arrow %s!", VERSION) | ||
| return() | ||
| } | ||
|
|
||
| # Small dev versions are added for R-only changes during CRAN submission. | ||
| if (is.na(dev_version) || dev_version < "100") { | ||
| VERSION <- package_version(VERSION)[1, 1:3] | ||
| get_file(rwinlib, VERSION) | ||
| zip_file <- sprintf("arrow-%s.zip", VERSION) | ||
|
|
||
| # If not found, fall back to apache artifactory | ||
| if (!file.exists("lib.zip")) { | ||
| get_file(artifactory, VERSION) | ||
| } | ||
| } else { | ||
| get_file(nightly, VERSION) | ||
| if (length(args) > 1) { | ||
| # Arg 2 would be the path/to/lib.zip | ||
| localfile <- args[2] | ||
| if (!file.exists(localfile)) { | ||
| lg("RWINLIB_LOCAL '%s' does not exist. Build will fail.", localfile) | ||
| return() | ||
| } else { | ||
| lg("Using RWINLIB_LOCAL %s", localfile) | ||
| } | ||
| file.copy(localfile, zip_file) | ||
| } else { | ||
| quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") | ||
| binary_url <- sprintf(url_template, VERSION) | ||
| try( | ||
| suppressWarnings( | ||
| download.file(binary_url, zip_file, quiet = quietly) | ||
| ), | ||
| silent = quietly | ||
| ) | ||
|
|
||
| if (!file.exists(zip_file) || file.size(zip_file) == 0) { | ||
| lg("Failed to download libarrow binary from %s. Build will fail.", binary_url) | ||
| return() | ||
| } | ||
|
|
||
| checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") | ||
| # Explicitly setting the env var to "false" will skip checksum validation | ||
| # e.g. in case the included checksums are stale. | ||
| skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") | ||
| enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") | ||
| # validate binary checksum for CRAN release only | ||
| if (!skip_checksum && dir.exists(checksum_path) && is_release || | ||
| enforce_checksum) { | ||
| checksum_file <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) | ||
| # rtools does not have shasum with default config | ||
| checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) | ||
|
|
||
| if (checksum_ok != 0) { | ||
| lg("Checksum validation failed for libarrow binary: %s", zip_file) | ||
| return() | ||
| } | ||
| lg("Checksum validated successfully for libarrow binary: %s", zip_file) | ||
| } | ||
| dir.create("windows", showWarnings = FALSE) | ||
| unzip("lib.zip", exdir = "windows") | ||
| unlink("lib.zip") | ||
| } | ||
|
|
||
| dir.create("windows", showWarnings = FALSE) | ||
| unzip(zip_file, exdir = "windows") | ||
| unlink(zip_file) | ||
Uh oh!
There was an error while loading. Please reload this page.