From f2a17ab8798e61933e47721c0da7e72262be8ee3 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 3 May 2023 11:20:38 +0200
Subject: [PATCH 1/5] Call docgen

---
 r/R/dplyr-funcs-doc.R | 24 ++++++++++++------------
 r/man/acero.Rd        | 24 ++++++++++++------------
 r/man/enums.Rd        |  2 +-
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index b619cfe509b..7a1fc14b36e 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -83,7 +83,7 @@
 #' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
 #' `str_sub()` and `stringr::str_sub()` work.
 #'
-#' In addition to these functions, you can call any of Arrow's 246 compute
+#' In addition to these functions, you can call any of Arrow's 251 compute
 #' functions directly. Arrow has many functions that don't map to an existing R
 #' function. In other cases where there is an R function mapping, you can still
 #' call the Arrow function directly if you don't want the adaptations that the R
@@ -99,30 +99,31 @@
 #'
 #' ## base
 #'
-#' * [`-`][-()]
 #' * [`!`][!()]
 #' * [`!=`][!=()]
-#' * [`*`][*()]
-#' * [`/`][/()]
-#' * [`&`][&()]
-#' * [`%/%`][%/%()]
 #' * [`%%`][%%()]
+#' * [`%/%`][%/%()]
 #' * [`%in%`][%in%()]
-#' * [`^`][^()]
+#' * [`&`][&()]
+#' * [`*`][*()]
 #' * [`+`][+()]
+#' * [`-`][-()]
+#' * [`/`][/()]
 #' * [`<`][<()]
 #' * [`<=`][<=()]
 #' * [`==`][==()]
 #' * [`>`][>()]
 #' * [`>=`][>=()]
-#' * [`|`][|()]
+#' * [`ISOdate()`][base::ISOdate()]
+#' * [`ISOdatetime()`][base::ISOdatetime()]
+#' * [`^`][^()]
 #' * [`abs()`][base::abs()]
 #' * [`acos()`][base::acos()]
 #' * [`all()`][base::all()]
 #' * [`any()`][base::any()]
-#' * [`as.character()`][base::as.character()]
 #' * [`as.Date()`][base::as.Date()]: Multiple `tryFormats` not supported in Arrow.
 #' Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc.
+#' * [`as.character()`][base::as.character()]
 #' * [`as.difftime()`][base::as.difftime()]: only supports `units = "secs"` (the default)
 #' * [`as.double()`][base::as.double()]
 #' * [`as.integer()`][base::as.integer()]
@@ -153,8 +154,6 @@
 #' * [`is.na()`][base::is.na()]
 #' * [`is.nan()`][base::is.nan()]
 #' * [`is.numeric()`][base::is.numeric()]
-#' * [`ISOdate()`][base::ISOdate()]
-#' * [`ISOdatetime()`][base::ISOdatetime()]
 #' * [`log()`][base::log()]
 #' * [`log10()`][base::log10()]
 #' * [`log1p()`][base::log1p()]
@@ -186,6 +185,7 @@
 #' * [`tolower()`][base::tolower()]
 #' * [`toupper()`][base::toupper()]
 #' * [`trunc()`][base::trunc()]
+#' * [`|`][|()]
 #'
 #' ## bit64
 #'
@@ -242,8 +242,8 @@
 #' * [`format_ISO8601()`][lubridate::format_ISO8601()]
 #' * [`hour()`][lubridate::hour()]
 #' * [`is.Date()`][lubridate::is.Date()]
-#' * [`is.instant()`][lubridate::is.instant()]
 #' * [`is.POSIXct()`][lubridate::is.POSIXct()]
+#' * [`is.instant()`][lubridate::is.instant()]
 #' * [`is.timepoint()`][lubridate::is.timepoint()]
 #' * [`isoweek()`][lubridate::isoweek()]
 #' * [`isoyear()`][lubridate::isoyear()]
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 6d4476c44c2..9d390002ab0 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -68,7 +68,7 @@ can assume that the function works in Acero just as it does in R.
 Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. both
 \code{str_sub()} and \code{stringr::str_sub()} work.
 
-In addition to these functions, you can call any of Arrow's 246 compute
+In addition to these functions, you can call any of Arrow's 251 compute
 functions directly. Arrow has many functions that don't map to an existing R
 function. In other cases where there is an R function mapping, you can still
 call the Arrow function directly if you don't want the adaptations that the R
@@ -85,30 +85,31 @@ as \code{arrow_ascii_is_decimal}.
 
 \subsection{base}{
 \itemize{
-\item \code{\link[=-]{-}}
 \item \code{\link[=!]{!}}
 \item \code{\link[=!=]{!=}}
-\item \code{\link[=*]{*}}
-\item \code{\link[=/]{/}}
-\item \code{\link[=&]{&}}
-\item \code{\link[=\%/\%]{\%/\%}}
 \item \code{\link[=\%\%]{\%\%}}
+\item \code{\link[=\%/\%]{\%/\%}}
 \item \code{\link[=\%in\%]{\%in\%}}
-\item \code{\link[=^]{^}}
+\item \code{\link[=&]{&}}
+\item \code{\link[=*]{*}}
 \item \code{\link[=+]{+}}
+\item \code{\link[=-]{-}}
+\item \code{\link[=/]{/}}
 \item \code{\link[=<]{<}}
 \item \code{\link[=<=]{<=}}
 \item \code{\link[===]{==}}
 \item \code{\link[=>]{>}}
 \item \code{\link[=>=]{>=}}
-\item \code{\link[=|]{|}}
+\item \code{\link[base:ISOdatetime]{ISOdate()}}
+\item \code{\link[base:ISOdatetime]{ISOdatetime()}}
+\item \code{\link[=^]{^}}
 \item \code{\link[base:MathFun]{abs()}}
 \item \code{\link[base:Trig]{acos()}}
 \item \code{\link[base:all]{all()}}
 \item \code{\link[base:any]{any()}}
-\item \code{\link[base:character]{as.character()}}
 \item \code{\link[base:as.Date]{as.Date()}}: Multiple \code{tryFormats} not supported in Arrow.
 Consider using the lubridate specialised parsing functions \code{ymd()}, \code{ymd()}, etc.
+\item \code{\link[base:character]{as.character()}}
 \item \code{\link[base:difftime]{as.difftime()}}: only supports \code{units = "secs"} (the default)
 \item \code{\link[base:double]{as.double()}}
 \item \code{\link[base:integer]{as.integer()}}
@@ -139,8 +140,6 @@ Consider using the lubridate specialised parsing functions \code{ymd()}, \code{y
 \item \code{\link[base:NA]{is.na()}}
 \item \code{\link[base:is.finite]{is.nan()}}
 \item \code{\link[base:numeric]{is.numeric()}}
-\item \code{\link[base:ISOdatetime]{ISOdate()}}
-\item \code{\link[base:ISOdatetime]{ISOdatetime()}}
 \item \code{\link[base:Log]{log()}}
 \item \code{\link[base:Log]{log10()}}
 \item \code{\link[base:Log]{log1p()}}
@@ -172,6 +171,7 @@ Valid values are "s", "ms" (default), "us", "ns".
 \item \code{\link[base:chartr]{tolower()}}
 \item \code{\link[base:chartr]{toupper()}}
 \item \code{\link[base:Round]{trunc()}}
+\item \code{\link[=|]{|}}
 }
 }
 
@@ -234,8 +234,8 @@ Valid values are "s", "ms" (default), "us", "ns".
 \item \code{\link[lubridate:format_ISO8601]{format_ISO8601()}}
 \item \code{\link[lubridate:hour]{hour()}}
 \item \code{\link[lubridate:date_utils]{is.Date()}}
-\item \code{\link[lubridate:is.instant]{is.instant()}}
 \item \code{\link[lubridate:posix_utils]{is.POSIXct()}}
+\item \code{\link[lubridate:is.instant]{is.instant()}}
 \item \code{\link[lubridate:is.instant]{is.timepoint()}}
 \item \code{\link[lubridate:week]{isoweek()}}
 \item \code{\link[lubridate:year]{isoyear()}}
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
index 614c196fdee..853fa07028f 100644
--- a/r/man/enums.Rd
+++ b/r/man/enums.Rd
@@ -26,7 +26,7 @@ An object of class \code{DateUnit} (inherits from \code{arrow-enum}) of length 2
 
 An object of class \code{Type::type} (inherits from \code{arrow-enum}) of length 37.
 
-An object of class \code{StatusCode} (inherits from \code{arrow-enum}) of length 17.
+An object of class \code{StatusCode} (inherits from \code{arrow-enum}) of length 13.
 
 An object of class \code{FileMode} (inherits from \code{arrow-enum}) of length 3.
 

From 5b305ba2978fa9d6b6da5e6d26d08baf22f387fa Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 3 May 2023 11:22:11 +0200
Subject: [PATCH 2/5] Import download.file from utils to suppress note

---
 r/NAMESPACE | 1 +
 r/R/io.R    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 7ab8d5c9020..eec50167bd1 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -503,6 +503,7 @@ importFrom(tidyselect,one_of)
 importFrom(tidyselect,starts_with)
 importFrom(tidyselect,vars_pull)
 importFrom(utils,capture.output)
+importFrom(utils,download.file)
 importFrom(utils,getFromNamespace)
 importFrom(utils,head)
 importFrom(utils,install.packages)
diff --git a/r/R/io.R b/r/R/io.R
index b2989de78a1..e952d656f8c 100644
--- a/r/R/io.R
+++ b/r/R/io.R
@@ -232,6 +232,7 @@ mmap_open <- function(path, mode = c("read", "write", "readwrite")) {
 #' @param random_access Logical: whether the result must be a RandomAccessFile
 #' @return An `InputStream` or a subclass of one.
 #' @keywords internal
+#' @importFrom utils download.file
 make_readable_file <- function(file, mmap = TRUE, random_access = TRUE) {
   if (inherits(file, "SubTreeFileSystem")) {
     filesystem <- file$base_fs

From 27f86a8e2f29da37f3e8d03c43207c1312562f09 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 26 Apr 2023 10:12:54 +0100
Subject: [PATCH 3/5] GH-35131: [R] Test failure with dev waldo (#35308)

This PR fixes the tests failing due to the dev version of the waldo package being more strict comparing NaN and NA_real_ values.  (n.b. our CI doesn't yet use the dev version of waldo, so this PR should be tested locally to verify tests pass).
* Closes: #35131

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Nic Crane <thisisnic@gmail.com>
---
 r/tests/testthat/test-compute-sort.R          | 17 ++++++---
 .../testthat/test-dplyr-funcs-conditional.R   | 37 ++++++++++++++-----
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
index f521efeddc5..ba3039c3313 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -108,29 +108,34 @@ test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results
 })
 
 test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on floats", {
+
+  test_vec <- tbl$dbl
+  # Arrow sorts NA and NaN differently, but it's not important, so eliminate here
+  test_vec[is.nan(test_vec)] <- NA_real_
+
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = TRUE),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = TRUE),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = NA),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = FALSE),
-    tbl$dbl,
+    test_vec,
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = NA),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = FALSE),
-    tbl$dbl,
+    test_vec,
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R
index 85d21b73226..b3d86da8b41 100644
--- a/r/tests/testthat/test-dplyr-funcs-conditional.R
+++ b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -377,8 +377,11 @@ test_that("coalesce()", {
     y = c(NA_real_, 2.2, 3.3),
     z = c(1.1, 2.2, 3.3)
   )
-  compare_dplyr_binding(
-    .input %>%
+
+  # we can't use compare_dplyr_binding here as dplyr silently converts NaN to NA in coalesce()
+  # see https://github.com/tidyverse/dplyr/issues/6833
+  expect_identical(
+    arrow_table(df) %>%
       mutate(
         cw = coalesce(w),
         cz = coalesce(z),
@@ -387,21 +390,29 @@ test_that("coalesce()", {
         cwxyz = coalesce(w, x, y, z)
       ) %>%
       collect(),
-    df
+    mutate(
+      df,
+      cw = c(NA, NaN, NA),
+      cz = c(1.1, 2.2, 3.3),
+      cwx = c(NA, NaN, 3.3),
+      cwxy = c(NA, 2.2, 3.3),
+      cwxyz = c(1.1, 2.2, 3.3)
+    )
   )
+
   # NaNs stay NaN and are not converted to NA in the results
   # (testing this requires expect_identical())
   expect_identical(
     df %>% Table$create() %>% mutate(cwx = coalesce(w, x)) %>% collect(),
-    df %>% mutate(cwx = coalesce(w, x))
+    df %>% mutate(cwx = c(NA, NaN, 3.3))
   )
   expect_identical(
     df %>% Table$create() %>% transmute(cw = coalesce(w)) %>% collect(),
-    df %>% transmute(cw = coalesce(w))
+    df %>% transmute(cw = w)
   )
   expect_identical(
     df %>% Table$create() %>% transmute(cn = coalesce(NaN)) %>% collect(),
-    df %>% transmute(cn = coalesce(NaN))
+    df %>% transmute(cn = NaN)
   )
   # singles stay single
   expect_equal(
@@ -418,8 +429,8 @@ test_that("coalesce()", {
     float32()
   )
   # with R literal values
-  compare_dplyr_binding(
-    .input %>%
+  expect_identical(
+    arrow_table(df) %>%
       mutate(
         c1 = coalesce(4.4),
         c2 = coalesce(NA_real_),
@@ -429,7 +440,15 @@ test_that("coalesce()", {
         c6 = coalesce(w, x, y, NaN)
       ) %>%
       collect(),
-    df
+    mutate(
+      df,
+      c1 = 4.4,
+      c2 = NA_real_,
+      c3 = NaN,
+      c4 = c(5.5, 2.2, 3.3),
+      c5 = c(NA, 2.2, 3.3),
+      c6 = c(NaN, 2.2, 3.3)
+    )
   )
 
   # no arguments

From c68226a4323ecbcd1d63da6ae1ce6a1090a9adf0 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 3 May 2023 12:01:49 +0200
Subject: [PATCH 4/5] Remove badges from README

---
 r/README.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/r/README.md b/r/README.md
index d343d6979c0..ee4036d48f3 100644
--- a/r/README.md
+++ b/r/README.md
@@ -1,9 +1,5 @@
 # arrow <img src="https://arrow.apache.org/img/arrow-logo_hex_black-txt_white-bg.png" align="right" alt="" width="120" />
 
-[![cran](https://www.r-pkg.org/badges/version-last-release/arrow)](https://cran.r-project.org/package=arrow)
-[![CI](https://github.com/apache/arrow/workflows/R/badge.svg?event=push)](https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amain+event%3Apush)
-[![conda-forge](https://img.shields.io/conda/vn/conda-forge/r-arrow.svg)](https://anaconda.org/conda-forge/r-arrow)
-
 [Apache Arrow](https://arrow.apache.org/) is a cross-language
 development platform for in-memory and larger-than-memory data. It specifies a standardized
 language-independent columnar memory format for flat and hierarchical

From 70dc03ea827017594b7def6f3e937c1a4cdc2c2f Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 3 May 2023 10:08:05 -0400
Subject: [PATCH 5/5] ARROW_ACERO should be ON by default

---
 r/inst/build_arrow_static.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index e5a9f127edb..1baf011a412 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -55,7 +55,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DARROW_BUILD_TESTS=OFF \
     -DARROW_BUILD_SHARED=OFF \
     -DARROW_BUILD_STATIC=ON \
-    -DARROW_ACERO=${ARROW_ACERO:-$ARROW_DEFAULT_PARAM} \
+    -DARROW_ACERO=${ARROW_ACERO:-ON} \
     -DARROW_COMPUTE=ON \
     -DARROW_CSV=ON \
     -DARROW_DATASET=${ARROW_DATASET:-ON} \