diff --git a/.env b/.env index d9f875a4d45..ab2e4b4fbe7 100644 --- a/.env +++ b/.env @@ -71,12 +71,12 @@ NUMBA=latest NUMPY=latest PANDAS=latest PYTHON=3.8 -R=4.2 +R=4.4 SPARK=master TURBODBC=latest -# These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-gcc-release:latest -R_IMAGE=ubuntu-gcc-release +# These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-release:latest +R_IMAGE=ubuntu-release R_ORG=rhub R_TAG=latest diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 05c85fa6dc2..8228aaad7ce 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -121,7 +121,7 @@ jobs: strategy: fail-fast: false matrix: - r: ["4.3"] + r: ["4.4"] ubuntu: [20.04] force-tests: ["true"] env: @@ -192,7 +192,7 @@ jobs: fail-fast: false matrix: config: - - { org: "rhub", image: "debian-gcc-devel", tag: "latest", devtoolset: "" } + - { org: "rhub", image: "ubuntu-gcc12", tag: "latest", devtoolset: "" } env: R_ORG: ${{ matrix.config.org }} R_IMAGE: ${{ matrix.config.image }} diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 76b5ae6f143..ec424b4e6ea 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -18,7 +18,7 @@ ARG base FROM ${base} -ARG r=4.2 +ARG r=4.4 ARG jdk=8 # See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/ diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile index 2b94a488718..9ec80440a3c 100644 --- a/ci/docker/linux-apt-lint.dockerfile +++ b/ci/docker/linux-apt-lint.dockerfile @@ -40,7 +40,7 @@ RUN apt-get update && \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -ARG r=4.2 +ARG r=4.4 RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \ tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc && \ # NOTE: Only R >= 4.0 is available in this repo diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index d93732abb00..a68354e3abf 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -35,7 +35,7 @@ ENV LANG=C.UTF-8 # Build R # [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04 # [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran -ARG r=3.6 +ARG r=4.4 RUN apt-get update -y && \ apt-get install -y \ dirmngr \ diff --git a/ci/etc/valgrind-cran.supp b/ci/etc/valgrind-cran.supp index 4d292202608..e93c2a3465f 100644 --- a/ci/etc/valgrind-cran.supp +++ b/ci/etc/valgrind-cran.supp @@ -16,7 +16,7 @@ # under the License. { - # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. + # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. Memcheck:Cond fun:gregexpr_Regexc @@ -32,3 +32,21 @@ fun:getvar fun:bcEval } +{ + # This also doesn't seem to cause issues on CRAN, so suppress it. + + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:libdeflate_alloc_compressor + fun:do_memCompress + fun:bcEval_loop + fun:bcEval + fun:Rf_eval + fun:R_execClosure + fun:applyClosure_core + fun:Rf_applyClosure + fun:Rf_eval + fun:do_set + fun:Rf_eval +} diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh index 600ee0fa2cb..f7ed07f0c86 100755 --- a/ci/scripts/r_sanitize.sh +++ b/ci/scripts/r_sanitize.sh @@ -49,7 +49,7 @@ export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" # run tests pushd tests -${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } +${R_BIN} --no-save < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } cat testthat.out if grep -q "runtime error" testthat.out; then @@ -58,7 +58,7 @@ fi # run examples popd -${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> examples.out 2>&1 || { cat examples.out; exit 1; } +${R_BIN} --no-save -e 'library(arrow); testthat::test_examples(".")' >> examples.out 2>&1 || { cat examples.out; exit 1; } cat examples.out if grep -q "runtime error" examples.out; then diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh index 72078ab3c06..95a49ee83a7 100755 --- a/ci/scripts/r_test.sh +++ b/ci/scripts/r_test.sh @@ -46,7 +46,12 @@ if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH} fi -export _R_CHECK_COMPILATION_FLAGS_KNOWN_=${ARROW_R_CXXFLAGS} + +export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} ${ARROW_R_CXXFLAGS}" +# These should generally be picked up, but are slightly wrong in rhub's containers it appears +# https://github.com/r-hub/containers/pull/63 +export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-parentheses -Werror=format-security -Wp,-D_FORTIFY_SOURCE=3" + if [ "$ARROW_R_DEV" = "TRUE" ]; then # These are sometimes used in the Arrow C++ build and are not a problem export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2 -Wno-noexcept-type -Wno-subobject-linkage" diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh index a14cb803ca8..0e40d792111 100755 --- a/ci/scripts/r_valgrind.sh +++ b/ci/scripts/r_valgrind.sh @@ -33,7 +33,7 @@ ${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz pushd tests # to generate suppression files run: -# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testthat.supp +# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testthat.R ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out # valgrind --error-exitcode=1 should return an erroring exit code that we can catch, diff --git a/dev/tasks/r/github.linux.arrow.version.back.compat.yml b/dev/tasks/r/github.linux.arrow.version.back.compat.yml index 804f0d21270..086705dbb9c 100644 --- a/dev/tasks/r/github.linux.arrow.version.back.compat.yml +++ b/dev/tasks/r/github.linux.arrow.version.back.compat.yml @@ -73,6 +73,8 @@ jobs: config: # We use the R version that was released at the time of the arrow release in order # to make sure we can download binaries from RSPM. + - { old_arrow_version: '14.0.2.1', r: '4.3' } + - { old_arrow_version: '13.0.0.1', r: '4.3' } - { old_arrow_version: '12.0.1.1', r: '4.3' } - { old_arrow_version: '11.0.0.3', r: '4.2' } - { old_arrow_version: '10.0.1', r: '4.2' } diff --git a/dev/tasks/r/github.linux.offline.build.yml b/dev/tasks/r/github.linux.offline.build.yml index 7a747ac4800..9ac0ebc4083 100644 --- a/dev/tasks/r/github.linux.offline.build.yml +++ b/dev/tasks/r/github.linux.offline.build.yml @@ -84,7 +84,7 @@ jobs: {{ macros.github_set_sccache_envvars()|indent(8)}} run: | cd arrow/r - R CMD INSTALL --install-tests --no-test-load --no-docs --no-help --no-byte-compile arrow_with_deps.tar.gz + R CMD INSTALL --install-tests --no-test-load --no-byte-compile arrow_with_deps.tar.gz - name: Run the tests run: R -e 'if(tools::testInstalledPackage("arrow") != 0L) stop("There was a test failure.")' - name: Dump test logs diff --git a/dev/tasks/r/github.linux.versions.yml b/dev/tasks/r/github.linux.versions.yml index 48093e9fd5b..753efe61d04 100644 --- a/dev/tasks/r/github.linux.versions.yml +++ b/dev/tasks/r/github.linux.versions.yml @@ -30,9 +30,9 @@ jobs: r_version: # We test devel, release, and oldrel in regular CI. # This is for older versions - - "3.6" - "4.0" - "4.1" + - "4.2" env: R_ORG: "rstudio" R_IMAGE: "r-base" diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 2979f57bb61..9ca7e59a957 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -227,7 +227,7 @@ jobs: working-directory: 'arrow' extra-packages: cpp11 - name: Set CRAN like openssl - if: contains(matrix.platform.runs_on, 'arm64') + if: contains(matrix.platform.name, 'arm64') run: | # The arm64 runners contain openssl 1.1.1t in this path that is always included first so we need to override the # default setting of the brew --prefix as root dir to avoid version conflicts. @@ -300,16 +300,14 @@ jobs: # an OS that is not in the allowlist, so we have to opt-in to use the # binary. Other env vars used in r_docker_configure.sh can be added # here (like devtoolset) and wired up in the later steps. - - {image: "rhub/debian-clang-devel", libarrow_binary: "TRUE"} + - {image: "rhub/ubuntu-clang", libarrow_binary: "TRUE"} # fedora-clang-devel cannot use binaries bc of libc++ (uncomment to see the error) # - {image: "rhub/fedora-clang-devel", libarrow_binary: "TRUE"} - - {image: "rhub/ubuntu-gcc-release"} # currently ubuntu-20.04 (focal) - - {image: "rocker/r-ubuntu:22.04"} # openssl3 - - {image: "rocker/r-ver"} # whatever is latest ubuntu LTS + - {image: "rhub/ubuntu-release"} # currently ubuntu-22.04 - {image: "rocker/r-ver:4.0.0"} # ubuntu-20.04 - - {image: "rocker/r-ver:3.6.3", libarrow_binary: "TRUE"} # debian:buster (10) - {image: "rstudio/r-base:4.1-focal"} # ubuntu-20.04 - {image: "rstudio/r-base:4.2-centos7", devtoolset: "8"} + - {image: "rstudio/r-base:4.3-noble"} steps: # Get the arrow checkout just for the docker config scripts # Don't need submodules for this (hence false arg to macro): they fail on diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 3e7f7ea0c43..52a235c688e 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -888,12 +888,12 @@ tasks: - r-lib__libarrow__bin__darwin-arm64-openssl-3.0__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__darwin-x86_64-openssl-1.1__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__darwin-x86_64-openssl-3.0__arrow-{no_rc_r_version}\.zip + - r-pkg__bin__windows__contrib__4.4__arrow_{no_rc_r_version}\.zip - r-pkg__bin__windows__contrib__4.3__arrow_{no_rc_r_version}\.zip - - r-pkg__bin__windows__contrib__4.2__arrow_{no_rc_r_version}\.zip + - r-pkg__bin__macosx__big-sur-x86_64__contrib__4.4__arrow_{no_rc_r_version}\.tgz - r-pkg__bin__macosx__big-sur-x86_64__contrib__4.3__arrow_{no_rc_r_version}\.tgz - - r-pkg__bin__macosx__contrib__4.2__arrow_{no_rc_r_version}\.tgz + - r-pkg__bin__macosx__big-sur-arm64__contrib__4.4__arrow_{no_rc_r_version}\.tgz - r-pkg__bin__macosx__big-sur-arm64__contrib__4.3__arrow_{no_rc_r_version}\.tgz - - r-pkg__bin__macosx__big-sur-arm64__contrib__4.2__arrow_{no_rc_r_version}\.tgz - r-pkg__src__contrib__arrow_{no_rc_r_version}\.tar\.gz @@ -1356,7 +1356,7 @@ tasks: r_tag: latest r_custom_ccache: true -{% for r_org, r_image, r_tag in [("rhub", "ubuntu-gcc-release", "latest"), +{% for r_org, r_image, r_tag in [("rhub", "ubuntu-release", "latest"), ("rocker", "r-ver", "latest"), ("rstudio", "r-base", "4.2-focal"), ("rstudio", "r-base", "4.1-opensuse153")] %} @@ -1377,9 +1377,9 @@ tasks: template: r/azure.linux.yml params: r_org: rhub - r_image: debian-gcc-devel-lto + r_image: gcc13 r_tag: latest - flags: '-e NOT_CRAN=false -e INSTALL_ARGS=--use-LTO' + flags: '-e INSTALL_ARGS=--use-LTO' # This one has -flto=auto test-r-ubuntu-22.04: diff --git a/docker-compose.yml b/docker-compose.yml index 60edf1420bc..d771fc2d22a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1459,8 +1459,8 @@ services: # (including building the C++ library) on any Docker image that contains R # # Usage: - # R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r - # R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r + # R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose build r + # R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose run r image: ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} build: context: . @@ -1523,6 +1523,7 @@ services: cache_from: - ${REPO}:r-rhub-fedora-clang-devel-latest args: + # TODO: change this to rhub/clang-asan base: rhub/fedora-clang-devel-san r_dev: ${ARROW_R_DEV} devtoolset_version: ${DEVTOOLSET_VERSION} diff --git a/r/DESCRIPTION b/r/DESCRIPTION index eeff8168b36..38cbaa94a3c 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -21,7 +21,7 @@ Description: 'Apache' 'Arrow' is a cross-language language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the 'Arrow C++' library. -Depends: R (>= 3.4) +Depends: R (>= 4.0) License: Apache License (>= 2.0) URL: https://github.com/apache/arrow/, https://arrow.apache.org/docs/r/ BugReports: https://github.com/apache/arrow/issues diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R index f244682737c..efb3c6b756a 100644 --- a/r/R/dplyr-funcs-type.R +++ b/r/R/dplyr-funcs-type.R @@ -140,7 +140,7 @@ register_bindings_type_cast <- function() { fix.empty.names = TRUE, stringsAsFactors = FALSE) { # we need a specific value of stringsAsFactors because the default was - # TRUE in R <= 3.6 + # TRUE in R <= 3.6 and folks might still be cargoculting to stay in the past. if (!identical(stringsAsFactors, FALSE)) { arrow_not_supported("stringsAsFactors = TRUE") } diff --git a/r/R/util.R b/r/R/util.R index a7cb5b3792d..14e4544ab1e 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -15,20 +15,6 @@ # specific language governing permissions and limitations # under the License. -# for compatibility with R versions earlier than 4.0.0 -if (!exists("deparse1")) { - deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) { - paste(deparse(expr, width.cutoff, ...), collapse = collapse) - } -} - -# for compatibility with R versions earlier than 3.6.0 -if (!exists("str2lang")) { - str2lang <- function(s) { - parse(text = s, keep.source = FALSE)[[1]] - } -} - oxford_paste <- function(x, conjunction = "and", quote = TRUE, diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index bb005605de3..98068bdea20 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -818,11 +818,6 @@ test_that("Handling string data with embedded nuls", { ) array_with_nul <- arrow_array(raws)$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") - # no error on conversion, because altrep laziness v <- expect_error(as.vector(array_with_nul), NA) diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index f29b75dbf40..5987f5a4b7c 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -595,14 +595,10 @@ test_that("RecordBatch supports cbind", { ) # Rejects Table and ChunkedArray arguments - if (getRversion() >= "4.0.0") { - # R 3.6 cbind dispatch rules cause cbind to fall back to default impl if - # there are multiple arguments with distinct cbind implementations - expect_error( - cbind(record_batch(a = 1:2), arrow_table(b = 3:4)), - regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" - ) - } + expect_error( + cbind(record_batch(a = 1:2), arrow_table(b = 3:4)), + regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" + ) expect_error( cbind(record_batch(a = 1:2), b = chunked_array(1, 2)), regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" @@ -622,10 +618,6 @@ test_that("Handling string data with embedded nuls", { batch_with_nul <- record_batch(a = 1:5, b = raws) batch_with_nul$b <- batch_with_nul$b$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") df <- as.data.frame(batch_with_nul) expect_error( diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index ce3254a158e..f6cec3b2b76 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -581,10 +581,6 @@ test_that("Table supports cbind", { }) test_that("cbind.Table handles record batches and tables", { - # R 3.6 cbind dispatch rules cause cbind to fall back to default impl if - # there are multiple arguments with distinct cbind implementations - skip_if(getRversion() < "4.0.0", "R 3.6 cbind dispatch rules prevent this behavior") - expect_equal( cbind(arrow_table(a = 1L:2L), record_batch(b = 4:5)), arrow_table(a = 1L:2L, b = 4:5) diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index 7a66d0e7782..50bd40988e5 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -skip_on_r_older_than("3.6") - test_that("altrep test functions do not include base altrep", { expect_false(is_arrow_altrep(1:10)) expect_identical(test_arrow_altrep_is_materialized(1:10), NA) @@ -373,6 +371,11 @@ test_that("altrep min/max/sum identical to R versions for double", { expect_altrep_roundtrip(x, max) expect_altrep_roundtrip(x, sum) + # On valgrind the NA_real_ is sometimes transformed to NaN + # https://stat.ethz.ch/pipermail/r-devel/2021-April/080683.html + # so we skip these there to avoid complicated NA == NaN logic, + # and they are tested on a number of other platforms / conditions + skip_on_linux_devel() x <- c(1, 2, NA_real_) expect_altrep_roundtrip(x, min, na.rm = TRUE) expect_altrep_roundtrip(x, max, na.rm = TRUE) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 223f5022d3b..bb01df427f7 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -475,11 +475,6 @@ test_that("Handling string data with embedded nuls", { ) chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") - v <- expect_error(as.vector(chunked_array_with_nul), NA) expect_error( diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index a8aa5556f1e..f50fa8945db 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -185,16 +185,6 @@ See $.data for the source Arrow object", fixed = TRUE ) - skip_if(getRversion() < "3.6.0", "TODO investigate why these aren't equal") - # On older R versions: - # ── Failure (test-dplyr-collapse.R:172:3): Properties of collapsed query ──────── - # head(q, 1) %>% collect() not equal to tibble::tibble(lgl = FALSE, total = 8L, extra = 40). - # Component "total": Mean relative difference: 0.3846154 - # Component "extra": Mean relative difference: 0.3846154 - # ── Failure (test-dplyr-collapse.R:176:3): Properties of collapsed query ──────── - # tail(q, 1) %>% collect() not equal to tibble::tibble(lgl = NA, total = 25L, extra = 125). - # Component "total": Mean relative difference: 0.9230769 - # Component "extra": Mean relative difference: 0.9230769 expect_equal( q %>% arrange(lgl) %>% diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 4d3226798d3..6f520f6e322 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -180,7 +180,7 @@ test_that("strptime", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -198,7 +198,7 @@ test_that("strptime works for individual formats", { skip_on_cran() # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") expect_equal( @@ -269,7 +269,7 @@ test_that("timestamp round trip correctly via strftime and strptime", { skip_on_cran() # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") tz <- "Pacific/Marquesas" @@ -291,7 +291,9 @@ test_that("timestamp round trip correctly via strftime and strptime", { # Some formats are not supported on Windows if (!tolower(Sys.info()[["sysname"]]) == "windows") { - formats <- c(formats, "%a", "%A", "%b", "%B", "%OS", "%I%p", "%r", "%T%z") + # "%r" could also be here, though it is only valid in some locales (those + # that use 12 hour formats, so skip for now) + formats <- c(formats, "%a", "%A", "%b", "%B", "%OS", "%I%p", "%T%z") } for (fmt in formats) { @@ -2080,7 +2082,7 @@ test_that("as_datetime() works with other functions", { test_that("parse_date_time() works with year, month, and date components", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2139,7 +2141,7 @@ test_that("parse_date_time() works with year, month, and date components", { test_that("parse_date_time() works with a mix of formats and orders", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") test_df <- tibble( string_combi = c("2021-09-1", "2/09//2021", "09.3.2021") @@ -2169,7 +2171,7 @@ test_that("year, month, day date/time parsers", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2221,7 +2223,7 @@ test_that("ym, my & yq parsers", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2270,7 +2272,7 @@ test_that("ym, my & yq parsers", { test_that("parse_date_time's other formats", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2401,7 +2403,7 @@ test_that("lubridate's fast_strptime", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2508,7 +2510,7 @@ test_that("parse_date_time with hours, minutes and seconds components", { # the unseparated strings are versions of "1987-08-22 20:13:59" (with %y) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2638,7 +2640,7 @@ test_that("parse_date_time with month names and HMS", { skip_on_os("windows") # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6 & the minimal nightly builds) + # RE2 library (not available in the minimal nightly builds) skip_if_not_available("re2") test_dates_times2 <- tibble( @@ -2737,7 +2739,7 @@ test_that("parse_date_time with `quiet = FALSE` not supported", { # https://issues.apache.org/jira/browse/ARROW-17146 # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6 & the minimal nightly builds) + # RE2 library (not available in the minimal nightly builds) skip_if_not_available("re2") expect_warning( @@ -2766,7 +2768,7 @@ test_that("parse_date_time with `quiet = FALSE` not supported", { test_that("parse_date_time with truncated formats", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") test_truncation_df <- tibble( @@ -2853,7 +2855,7 @@ test_that("parse_date_time with `exact = TRUE`, and with regular R objects", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 2624e16156b..ecb6b3b7b45 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -754,11 +754,10 @@ test_that("structs/nested data frames/tibbles can be created", { ) # check that data.frame is mapped too - # stringsAsFactors default is TRUE in R 3.6, which is still tested on CI compare_dplyr_binding( .input %>% transmute( - df_col = data.frame(regular_col1, regular_col2, stringsAsFactors = FALSE) + df_col = data.frame(regular_col1, regular_col2) ) %>% collect() %>% mutate(df_col = as.data.frame(df_col)), diff --git a/r/tests/testthat/test-dplyr-glimpse.R b/r/tests/testthat/test-dplyr-glimpse.R index c93273bdeef..d39fef9e82c 100644 --- a/r/tests/testthat/test-dplyr-glimpse.R +++ b/r/tests/testthat/test-dplyr-glimpse.R @@ -15,11 +15,6 @@ # specific language governing permissions and limitations # under the License. -# The glimpse output for tests with `example_data` is different on R < 3.6 -# because the `lgl` column is generated with `sample()` and the RNG -# algorithm is different in older R versions. -skip_on_r_older_than("3.6") - library(dplyr, warn.conflicts = FALSE) test_that("glimpse() Table/ChunkedArray", { diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index 06f95650435..8335dc95cd8 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -94,10 +94,6 @@ test_that("Handling string data with embedded nuls", { ) scalar_with_nul <- scalar(raws, binary())$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") v <- expect_error(as.vector(scalar_with_nul), NA) expect_error( v[1], diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R index ed5192d8069..6996f234ced 100644 --- a/r/tools/test-nixlibs.R +++ b/r/tools/test-nixlibs.R @@ -23,8 +23,9 @@ TESTING <- TRUE # The functions use `on_macos` from the env they were sourced in, so we need tool # explicitly set it in that environment. +# We capture.output for a cleaner testthat output. nixlibs_env <- environment() -source("nixlibs.R", local = nixlibs_env) +capture.output(source("nixlibs.R", local = nixlibs_env)) test_that("identify_binary() based on LIBARROW_BINARY", { expect_null(identify_binary("FALSE")) @@ -157,6 +158,10 @@ test_that("check_allowlist", { }) test_that("find_latest_nightly()", { + skip_if( + getRversion() > "4.4.0", + "long last version components (>8) fail to max on r-devel" + ) tf <- tempfile() tf_uri <- paste0("file://", tf) on.exit(unlink(tf)) diff --git a/r/vignettes/developers/docker.Rmd b/r/vignettes/developers/docker.Rmd index de2795cfa6b..13f60904c94 100644 --- a/r/vignettes/developers/docker.Rmd +++ b/r/vignettes/developers/docker.Rmd @@ -5,23 +5,23 @@ description: > output: rmarkdown::html_vignette --- -Arrow is compatible with a huge number of combinations of OSs, OS versions, -compilers, R versions, and other variables. Sometimes these combinations of -variables means that behaviours are found in some environments which cannot be -replicated in others. In addition, there are different ways of building Arrow, -for example, using environment variables to specify the building of optional +Arrow is compatible with a huge number of combinations of OSs, OS versions, +compilers, R versions, and other variables. Sometimes these combinations of +variables means that behaviours are found in some environments which cannot be +replicated in others. In addition, there are different ways of building Arrow, +for example, using environment variables to specify the building of optional components. -What all this means is that you may need to use a different setup to the one in -which you are working, when diagnosing a bug or testing out a new feature which -you have reason to believe may be affected by these variables. One way to do +What all this means is that you may need to use a different setup to the one in +which you are working, when diagnosing a bug or testing out a new feature which +you have reason to believe may be affected by these variables. One way to do this is so spin up a Docker image containing the desired setup. This article provides a basic guide to using Docker in your R development. ## How do I run a Docker container? -There are a number of images which have been created for the convenience of +There are a number of images which have been created for the convenience of Arrow devs and you can find them on [the DockerHub repo](https://hub.docker.com/r/apache/arrow-dev/tags). The code below shows an example command you could use to run a Docker container. @@ -29,7 +29,7 @@ The code below shows an example command you could use to run a Docker container. This should be run in the root directory of a checkout of the arrow repo. ```shell -docker run -it -e ARROW_DEPENDENCY_SOURCE=AUTO -v $(pwd):/arrow apache/arrow-dev:r-rhub-ubuntu-gcc-release-latest +docker run -it -e ARROW_DEPENDENCY_SOURCE=AUTO -v $(pwd):/arrow apache/arrow-dev:r-rhub-ubuntu-release-latest ``` Components: @@ -39,13 +39,13 @@ Components: * `-e ARROW_DEPENDENCY_SOURCE=AUTO` - set the environment variable `ARROW_DEPENDENCY_SOURCE` to the value `AUTO` * `-v $(pwd):/arrow` - mount the current directory at `/arrow` in the container * `apache/arrow-dev` - the DockerHub repo to get this container from -* `r-rhub-ubuntu-gcc-release-latest` - the image tag +* `r-rhub-ubuntu-release-latest` - the image tag -Once you run this command, if you don't have a copy of that particular image +Once you run this command, if you don't have a copy of that particular image saved locally, it will first be downloaded before a container is spun up. -In the example above, mounting the directory in which the Arrow repo was stored -on the local machine, meant that that code could be built and tested on the +In the example above, mounting the directory in which the Arrow repo was stored +on the local machine, meant that that code could be built and tested on the container. ## How do I exit this image? @@ -73,29 +73,29 @@ sudo docker ps -a ## Running existing workflows from docker-compose.yml There are a number of workflows outlined in the file `docker-compose.yml` in the -arrow repo root directory. For example, you can use the workflow called `r` to -test building and installing the R package. This is advantageous as you can use -existing utility scripts and install it onto a container which already has R on +arrow repo root directory. For example, you can use the workflow called `r` to +test building and installing the R package. This is advantageous as you can use +existing utility scripts and install it onto a container which already has R on it. -These workflows are also parameterized, which means you can specify different +These workflows are also parameterized, which means you can specify different options (or just use the defaults, which can be found in `.env`) ### Example - The manual way -If you wanted to run [RHub's latest `ubuntu-gcc-release` image](https://hub.docker.com/r/rhub/ubuntu-gcc-release), you could +If you wanted to run [RHub's latest `ubuntu-release` image](https://hub.docker.com/r/rhub/ubuntu-release), you could run: -```shell -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r +```shell +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose build r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose run r ``` ### Example - Using Archery Alternatively, you may prefer to use the [Archery tool to run docker images](https://arrow.apache.org/docs/developers/docker.html). -This has the advantage of making it simpler to build some of the existing Arrow -CI jobs which have hierarchical dependencies, and so for example, you could +This has the advantage of making it simpler to build some of the existing Arrow +CI jobs which have hierarchical dependencies, and so for example, you could build the R package on a container which already has the C++ code pre-built. This is the same tool which our CI uses - via a tool called [Crossbow](https://arrow.apache.org/docs/developers/crossbow.html). @@ -103,5 +103,5 @@ This is the same tool which our CI uses - via a tool called [Crossbow](https://a If you want to run the `r` workflow discussed above, you could run: ```shell -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest archery docker run r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest archery docker run r ``` diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd index df43a9de36f..cc90c5ff08c 100644 --- a/r/vignettes/install.Rmd +++ b/r/vignettes/install.Rmd @@ -69,7 +69,7 @@ The prebuilt binaries come with S3 and GCS support enabled, so you will need to ## Install release version (easy way) -On macOS and Windows, when you run `install.packages("arrow")` and install arrow from CRAN, you get an R binary package that contains a precompiled version of libarrow. Installing binaries is much easier that installing from source, but CRAN does not host binaries for Linux. This means that the default behaviour when you run `install.packages()` on Linux is to retrieve the source version of the R package and compile both the R package _and_ libarrow from source. We'll talk about this scenario in the next section (the "less easy" way), but first we'll suggest two faster alternatives that are usually much easier. +On macOS and Windows, when you run `install.packages("arrow")` and install arrow from CRAN, you get an R binary package that contains a precompiled version of libarrow. Installing binaries is much easier that installing from source, but CRAN does not host binaries for Linux. This means that the default behaviour when you run `install.packages()` on Linux is to retrieve the source version of the R package and compile both the R package _and_ libarrow from source. We'll talk about this scenario in the next section (the "less easy" way), but first we'll suggest two faster alternatives that are usually much easier. ### Binary R package with libarrow binary via RSPM/conda @@ -154,15 +154,15 @@ knitr::include_graphics("./r_source_libarrow_source.png") ``` The "less easy" way to install arrow is to install both the R package and the underlying Arrow C++ library (libarrow) from source. This method is somewhat more -difficult because compiling and installing R packages with C++ dependencies -generally requires installing system packages, which you may not have privileges -to do, and/or building the C++ dependencies separately, which introduces all sorts +difficult because compiling and installing R packages with C++ dependencies +generally requires installing system packages, which you may not have privileges +to do, and/or building the C++ dependencies separately, which introduces all sorts of additional ways for things to go wrong. -Installing from the full source build of arrow, compiling both C++ and R -bindings, will handle most of the dependency management for you, but it is -much slower than using binaries. However, if using binaries isn't an option -for you,or you wish to customize your Linux installation, the instructions in +Installing from the full source build of arrow, compiling both C++ and R +bindings, will handle most of the dependency management for you, but it is +much slower than using binaries. However, if using binaries isn't an option +for you,or you wish to customize your Linux installation, the instructions in this section explain how to do that. ### Basic configuration @@ -369,10 +369,10 @@ satisfy C++ dependencies. ## Offline installation -The `install-arrow.R` file mentioned in the previous section includes a -function called `create_package_with_all_dependencies()`. Normally, when -installing on a computer with internet access, the build process will -download third-party dependencies as needed. This function provides a +The `install-arrow.R` file mentioned in the previous section includes a +function called `create_package_with_all_dependencies()`. Normally, when +installing on a computer with internet access, the build process will +download third-party dependencies as needed. This function provides a way to download them in advance, which can be useful when installing Arrow on a computer without internet access. The process is as follows: @@ -380,11 +380,11 @@ on a computer without internet access. The process is as follows: **Step 1.** Using a computer with internet access, download dependencies: * Install the arrow package **or** source the script directly using the following command: - + ```r source("https://raw.githubusercontent.com/apache/arrow/main/r/R/install-arrow.R") ``` - + * Use the `create_package_with_all_dependencies()` function to create the installation bundle: ```r @@ -399,27 +399,27 @@ on a computer without internet access. The process is as follows: ```r install.packages( - "my_arrow_pkg.tar.gz", + "my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo") ) ``` This installation will build from source, so `cmake` must be available - + * Run `arrow_info()` to check installed capabilities Notes: -- arrow _can_ be installed on a computer without internet access -without using this function, but many useful features will be disabled, +- arrow _can_ be installed on a computer without internet access +without using this function, but many useful features will be disabled, as they depend on third-party components. More precisely, `arrow::arrow_info()$capabilities()` will be `FALSE` for every capability. - If you are using binary packages you shouldn't need to this function. You can download the appropriate binary from your package repository, transfer -that to the offline computer, and install that. +that to the offline computer, and install that. - If you're using RStudio Package Manager on Linux (RSPM), and you want to make a source bundle with this function, make sure to set the first repository @@ -523,11 +523,11 @@ so that we can improve the script. ## Contributing -We are constantly working to make the installation process as painless as +We are constantly working to make the installation process as painless as possible. If you find ways to improve the process, please [report an issue](https://github.com/apache/arrow/issues) so that we can document it. Similarly, if you find that your Linux distribution -or version is not supported, we would welcome the contribution of Docker -images (hosted on Docker Hub) that we can use in our continuous integration +or version is not supported, we would welcome the contribution of Docker +images (hosted on Docker Hub) that we can use in our continuous integration and hopefully improve our coverage. If you do contribute a Docker image, it should be as minimal as possible, containing only R and the dependencies it requires. For reference, see the images that @@ -537,19 +537,16 @@ You can test the arrow R package installation using the `docker-compose` setup included in the `apache/arrow` git repository. For example, ``` -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose build r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose run r ``` installs the arrow R package, including libarrow, on the -[rhub/ubuntu-gcc-release](https://hub.docker.com/r/rhub/ubuntu-gcc-release) +[rhub/ubuntu-release](https://hub.docker.com/r/rhub/ubuntu-release) image. ## Further reading - To learn about installing development versions, see the article on [installing nightly builds](./install_nightly.html). -- If you're contributing to the Arrow project, see the [Arrow R developers guide](./developing.html) for resources to help you on set up your development environment. +- If you're contributing to the Arrow project, see the [Arrow R developers guide](./developing.html) for resources to help you on set up your development environment. - Arrow developers may also wish to read a more detailed discussion of the code run during the installation process, described in the [install details article](./developers/install_details.html). - - -