apache · thisisnic · May 3, 2023 · May 3, 2023 · Apr 26, 2023 · May 3, 2023
diff --git a/r/NAMESPACE b/r/NAMESPACE
@@ -503,6 +503,7 @@ importFrom(tidyselect,one_of)
 importFrom(tidyselect,starts_with)
 importFrom(tidyselect,vars_pull)
 importFrom(utils,capture.output)
+importFrom(utils,download.file)
 importFrom(utils,getFromNamespace)
 importFrom(utils,head)
 importFrom(utils,install.packages)

diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
@@ -83,7 +83,7 @@
 #' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
 #' `str_sub()` and `stringr::str_sub()` work.
 #'
-#' In addition to these functions, you can call any of Arrow's 246 compute
+#' In addition to these functions, you can call any of Arrow's 251 compute
 #' functions directly. Arrow has many functions that don't map to an existing R
 #' function. In other cases where there is an R function mapping, you can still
 #' call the Arrow function directly if you don't want the adaptations that the R
@@ -99,30 +99,31 @@
 #'
 #' ## base
 #'
-#' * [`-`][-()]
 #' * [`!`][!()]
 #' * [`!=`][!=()]
-#' * [`*`][*()]
-#' * [`/`][/()]
-#' * [`&`][&()]
-#' * [`%/%`][%/%()]
 #' * [`%%`][%%()]
+#' * [`%/%`][%/%()]
 #' * [`%in%`][%in%()]
-#' * [`^`][^()]
+#' * [`&`][&()]
+#' * [`*`][*()]
 #' * [`+`][+()]
+#' * [`-`][-()]
+#' * [`/`][/()]
 #' * [`<`][<()]
 #' * [`<=`][<=()]
 #' * [`==`][==()]
 #' * [`>`][>()]
 #' * [`>=`][>=()]
-#' * [`|`][|()]
+#' * [`ISOdate()`][base::ISOdate()]
+#' * [`ISOdatetime()`][base::ISOdatetime()]
+#' * [`^`][^()]
 #' * [`abs()`][base::abs()]
 #' * [`acos()`][base::acos()]
 #' * [`all()`][base::all()]
 #' * [`any()`][base::any()]
-#' * [`as.character()`][base::as.character()]
 #' * [`as.Date()`][base::as.Date()]: Multiple `tryFormats` not supported in Arrow.
 #' Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc.
+#' * [`as.character()`][base::as.character()]
 #' * [`as.difftime()`][base::as.difftime()]: only supports `units = "secs"` (the default)
 #' * [`as.double()`][base::as.double()]
 #' * [`as.integer()`][base::as.integer()]
@@ -153,8 +154,6 @@
 #' * [`is.na()`][base::is.na()]
 #' * [`is.nan()`][base::is.nan()]
 #' * [`is.numeric()`][base::is.numeric()]
-#' * [`ISOdate()`][base::ISOdate()]
-#' * [`ISOdatetime()`][base::ISOdatetime()]
 #' * [`log()`][base::log()]
 #' * [`log10()`][base::log10()]
 #' * [`log1p()`][base::log1p()]
@@ -186,6 +185,7 @@
 #' * [`tolower()`][base::tolower()]
 #' * [`toupper()`][base::toupper()]
 #' * [`trunc()`][base::trunc()]
+#' * [`|`][|()]
 #'
 #' ## bit64
 #'
@@ -242,8 +242,8 @@
 #' * [`format_ISO8601()`][lubridate::format_ISO8601()]
 #' * [`hour()`][lubridate::hour()]
 #' * [`is.Date()`][lubridate::is.Date()]
-#' * [`is.instant()`][lubridate::is.instant()]
 #' * [`is.POSIXct()`][lubridate::is.POSIXct()]
+#' * [`is.instant()`][lubridate::is.instant()]
 #' * [`is.timepoint()`][lubridate::is.timepoint()]
 #' * [`isoweek()`][lubridate::isoweek()]
 #' * [`isoyear()`][lubridate::isoyear()]

diff --git a/r/R/io.R b/r/R/io.R
@@ -232,6 +232,7 @@ mmap_open <- function(path, mode = c("read", "write", "readwrite")) {
 #' @param random_access Logical: whether the result must be a RandomAccessFile
 #' @return An `InputStream` or a subclass of one.
 #' @keywords internal
+#' @importFrom utils download.file
 make_readable_file <- function(file, mmap = TRUE, random_access = TRUE) {
   if (inherits(file, "SubTreeFileSystem")) {
     filesystem <- file$base_fs

diff --git a/r/README.md b/r/README.md
@@ -1,9 +1,5 @@
 # arrow <img src="https://arrow.apache.org/img/arrow-logo_hex_black-txt_white-bg.png" align="right" alt="" width="120" />
 
-[![cran](https://www.r-pkg.org/badges/version-last-release/arrow)](https://cran.r-project.org/package=arrow)
-[![CI](https://github.com/apache/arrow/workflows/R/badge.svg?event=push)](https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amain+event%3Apush)
-[![conda-forge](https://img.shields.io/conda/vn/conda-forge/r-arrow.svg)](https://anaconda.org/conda-forge/r-arrow)
-
 [Apache Arrow](https://arrow.apache.org/) is a cross-language
 development platform for in-memory and larger-than-memory data. It specifies a standardized
 language-independent columnar memory format for flat and hierarchical

diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
@@ -55,7 +55,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DARROW_BUILD_TESTS=OFF \
     -DARROW_BUILD_SHARED=OFF \
     -DARROW_BUILD_STATIC=ON \
-    -DARROW_ACERO=${ARROW_ACERO:-$ARROW_DEFAULT_PARAM} \
+    -DARROW_ACERO=${ARROW_ACERO:-ON} \
     -DARROW_COMPUTE=ON \
     -DARROW_CSV=ON \
     -DARROW_DATASET=${ARROW_DATASET:-ON} \

diff --git a/r/man/acero.Rd b/r/man/acero.Rd
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
@@ -108,29 +108,34 @@ test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results
 })
 
 test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on floats", {
+
+  test_vec <- tbl$dbl
+  # Arrow sorts NA and NaN differently, but it's not important, so eliminate here
+  test_vec[is.nan(test_vec)] <- NA_real_
+
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = TRUE),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = TRUE),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = NA),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = FALSE),
-    tbl$dbl,
+    test_vec,
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = NA),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = FALSE),
-    tbl$dbl,
+    test_vec,
   )
 })
 

diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -377,8 +377,11 @@ test_that("coalesce()", {
     y = c(NA_real_, 2.2, 3.3),
     z = c(1.1, 2.2, 3.3)
   )
-  compare_dplyr_binding(
-    .input %>%
+
+  # we can't use compare_dplyr_binding here as dplyr silently converts NaN to NA in coalesce()
+  # see https://github.com/tidyverse/dplyr/issues/6833
+  expect_identical(
+    arrow_table(df) %>%
       mutate(
         cw = coalesce(w),
         cz = coalesce(z),
@@ -387,21 +390,29 @@ test_that("coalesce()", {
         cwxyz = coalesce(w, x, y, z)
       ) %>%
       collect(),
-    df
+    mutate(
+      df,
+      cw = c(NA, NaN, NA),
+      cz = c(1.1, 2.2, 3.3),
+      cwx = c(NA, NaN, 3.3),
+      cwxy = c(NA, 2.2, 3.3),
+      cwxyz = c(1.1, 2.2, 3.3)
+    )
   )
+
   # NaNs stay NaN and are not converted to NA in the results
   # (testing this requires expect_identical())
   expect_identical(
     df %>% Table$create() %>% mutate(cwx = coalesce(w, x)) %>% collect(),
-    df %>% mutate(cwx = coalesce(w, x))
+    df %>% mutate(cwx = c(NA, NaN, 3.3))
   )
   expect_identical(
     df %>% Table$create() %>% transmute(cw = coalesce(w)) %>% collect(),
-    df %>% transmute(cw = coalesce(w))
+    df %>% transmute(cw = w)
   )
   expect_identical(
     df %>% Table$create() %>% transmute(cn = coalesce(NaN)) %>% collect(),
-    df %>% transmute(cn = coalesce(NaN))
+    df %>% transmute(cn = NaN)
   )
   # singles stay single
   expect_equal(
@@ -418,8 +429,8 @@ test_that("coalesce()", {
     float32()
   )
   # with R literal values
-  compare_dplyr_binding(
-    .input %>%
+  expect_identical(
+    arrow_table(df) %>%
       mutate(
         c1 = coalesce(4.4),
         c2 = coalesce(NA_real_),
@@ -429,7 +440,15 @@ test_that("coalesce()", {
         c6 = coalesce(w, x, y, NaN)
       ) %>%
       collect(),
-    df
+    mutate(
+      df,
+      c1 = 4.4,
+      c2 = NA_real_,
+      c3 = NaN,
+      c4 = c(5.5, 2.2, 3.3),
+      c5 = c(NA, 2.2, 3.3),
+      c6 = c(NaN, 2.2, 3.3)
+    )
   )
 
   # no arguments