From 8ebc317795929dfd0d348ae620c2e3cdaa38a651 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 31 Aug 2022 12:18:22 -0400
Subject: [PATCH 1/7] First pass at generating function docs

---
 r/DESCRIPTION                  |   1 +
 r/Makefile                     |   1 +
 r/R/dplyr-funcs-datetime.R     |  53 ++++---
 r/R/dplyr-funcs-doc.R          | 244 +++++++++++++++++++++++++++++++++
 r/R/dplyr-funcs-string.R       |  66 +++++----
 r/R/dplyr-funcs.R              |  17 ++-
 r/data-raw/docgen.R            |  91 ++++++++++++
 r/man/arrow-dplyr-functions.Rd | 228 ++++++++++++++++++++++++++++++
 r/man/register_binding.Rd      |  11 +-
 9 files changed, 652 insertions(+), 60 deletions(-)
 create mode 100644 r/R/dplyr-funcs-doc.R
 create mode 100644 r/data-raw/docgen.R
 create mode 100644 r/man/arrow-dplyr-functions.Rd

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 7ae6a8de29f..7b60f0c510a 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -103,6 +103,7 @@ Collate:
     'dplyr-funcs-augmented.R'
     'dplyr-funcs-conditional.R'
     'dplyr-funcs-datetime.R'
+    'dplyr-funcs-doc.R'
     'dplyr-funcs-math.R'
     'dplyr-funcs-string.R'
     'dplyr-funcs-type.R'
diff --git a/r/Makefile b/r/Makefile
index 1ddbe595dd2..cb76b4c9775 100644
--- a/r/Makefile
+++ b/r/Makefile
@@ -26,6 +26,7 @@ style-all:
 	R -s -e 'styler::style_file(setdiff(dir(pattern = "R$$", recursive = TRUE), source(".styler_excludes.R")$$value))'
 
 doc: style
+	R -s -f data-raw/docgen.R
 	R -s -e 'roxygen2::roxygenize()'
 	-git add --all man/*.Rd
 
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 9a010452b84..6106adbc5e4 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -649,55 +649,54 @@ register_bindings_datetime_parsers <- function() {
 
     build_expr("assume_timezone", coalesce_output, options = list(timezone = tz))
   })
-
 }
 
 register_bindings_datetime_rounding <- function() {
   register_binding(
-    "round_date",
+    "lubridate::round_date",
     function(x,
              unit = "second",
              week_start = getOption("lubridate.week.start", 7)) {
+      opts <- parse_period_unit(unit)
+      if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
+        return(shift_temporal_to_week("round_temporal", x, week_start, options = opts))
+      }
 
-    opts <- parse_period_unit(unit)
-    if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
-      return(shift_temporal_to_week("round_temporal", x, week_start, options = opts))
+      Expression$create("round_temporal", x, options = opts)
     }
-
-    Expression$create("round_temporal", x, options = opts)
-  })
+  )
 
   register_binding(
-    "floor_date",
+    "lubridate::floor_date",
     function(x,
              unit = "second",
              week_start = getOption("lubridate.week.start", 7)) {
+      opts <- parse_period_unit(unit)
+      if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
+        return(shift_temporal_to_week("floor_temporal", x, week_start, options = opts))
+      }
 
-    opts <- parse_period_unit(unit)
-    if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
-      return(shift_temporal_to_week("floor_temporal", x, week_start, options = opts))
+      Expression$create("floor_temporal", x, options = opts)
     }
-
-    Expression$create("floor_temporal", x, options = opts)
-  })
+  )
 
   register_binding(
-    "ceiling_date",
+    "lubridate::ceiling_date",
     function(x,
              unit = "second",
              change_on_boundary = NULL,
              week_start = getOption("lubridate.week.start", 7)) {
-    opts <- parse_period_unit(unit)
-    if (is.null(change_on_boundary)) {
-      change_on_boundary <- ifelse(call_binding("is.Date", x), TRUE, FALSE)
-    }
-    opts$ceil_is_strictly_greater <- change_on_boundary
-
-    if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
-      return(shift_temporal_to_week("ceil_temporal", x, week_start, options = opts))
-    }
+      opts <- parse_period_unit(unit)
+      if (is.null(change_on_boundary)) {
+        change_on_boundary <- ifelse(call_binding("is.Date", x), TRUE, FALSE)
+      }
+      opts$ceil_is_strictly_greater <- change_on_boundary
 
-    Expression$create("ceil_temporal", x, options = opts)
-  })
+      if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
+        return(shift_temporal_to_week("ceil_temporal", x, week_start, options = opts))
+      }
 
+      Expression$create("ceil_temporal", x, options = opts)
+    }
+  )
 }
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
new file mode 100644
index 00000000000..5104735cf1b
--- /dev/null
+++ b/r/R/dplyr-funcs-doc.R
@@ -0,0 +1,244 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Generated by using data-raw/docgen.R -> do not edit by hand
+
+#' Functions available in Arrow dplyr queries
+#'
+#' The `arrow` package contains mappings of 196 R functions to the corresponding
+#' functions in the Arrow compute library. This allows you to write code inside
+#' of `dplyr` methods that call R functions, including many in packages like
+#' `stringr` and `lubridate`, and they will get translated to Arrow and run
+#' on the Arrow query engine (Acero). This document lists all of the mapped
+#' functions.
+#'
+#' In the list below, any differences in behavior or support between Acero and
+#' the R function are listed. If no notes follow the function name, then you
+#' can assume that the function works in Acero just as it does in R.
+#'
+#' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
+#' `str_sub()` and `stringr::str_sub()` work.
+#'
+#' In addition to these functions, you can call any of Arrow's 243 compute
+#' functions directly. Arrow has many functions that don't map to an existing R
+#' function. In other cases where there is an R function mapping, you can still
+#' call the Arrow function directly if you don't want the adaptations that the R
+#' mapping has that make Acero behave like R. These functions are listed in the
+#' [C++ documentation](https://arrow.apache.org/docs/cpp/compute.html), and
+#' in the function registry in R, they are named with an `arrow_` prefix, such
+#' as `arrow_ascii_is_decimal`.
+#'
+#' * [-()]
+#' * [!()]
+#' * [!=()]
+#' * [*()]
+#' * [/()]
+#' * [&()]
+#' * [%/%()]
+#' * [%%()]
+#' * [%in%()]
+#' * [^()]
+#' * [+()]
+#' * [<()]
+#' * [<=()]
+#' * [==()]
+#' * [>()]
+#' * [>=()]
+#' * [|()]
+#' * [add_filename()]
+#' * [base::abs()]
+#' * [base::acos()]
+#' * [base::all()]
+#' * [base::any()]
+#' * [base::as.character()]
+#' * [base::as.Date()]
+#' * [base::as.difftime()]
+#' * [base::as.double()]
+#' * [base::as.factor()]
+#' * [base::as.integer()]
+#' * [base::as.logical()]
+#' * [base::as.numeric()]
+#' * [base::asin()]
+#' * [base::ceiling()]
+#' * [base::cos()]
+#' * [base::data.frame()]
+#' * [base::difftime()]
+#' * [base::endsWith()]
+#' * [base::exp()]
+#' * [base::floor()]
+#' * [base::format()]
+#' * [base::grepl()]
+#' * [base::gsub()]
+#' * [base::ifelse()]
+#' * [base::is.character()]
+#' * [base::is.double()]
+#' * [base::is.factor()]
+#' * [base::is.finite()]
+#' * [base::is.infinite()]
+#' * [base::is.integer()]
+#' * [base::is.list()]
+#' * [base::is.logical()]
+#' * [base::is.na()]
+#' * [base::is.nan()]
+#' * [base::is.numeric()]
+#' * [base::ISOdate()]
+#' * [base::ISOdatetime()]
+#' * [base::log()]
+#' * [base::log10()]
+#' * [base::log1p()]
+#' * [base::log2()]
+#' * [base::logb()]
+#' * [base::max()]
+#' * [base::mean()]
+#' * [base::min()]
+#' * [base::nchar()]
+#' * [base::paste()]: the `collapse` argument is not yet supported
+#' * [base::paste0()]: the `collapse` argument is not yet supported
+#' * [base::pmax()]
+#' * [base::pmin()]
+#' * [base::round()]
+#' * [base::sign()]
+#' * [base::sin()]
+#' * [base::sqrt()]
+#' * [base::startsWith()]
+#' * [base::strftime()]
+#' * [base::strptime()]
+#' * [base::strrep()]
+#' * [base::strsplit()]
+#' * [base::sub()]
+#' * [base::substr()]
+#' * [base::substring()]
+#' * [base::sum()]
+#' * [base::tan()]
+#' * [base::tolower()]
+#' * [base::toupper()]
+#' * [base::trunc()]
+#' * [bit64::as.integer64()]
+#' * [bit64::is.integer64()]
+#' * [cast()]
+#' * [dictionary_encode()]
+#' * [dplyr::between()]
+#' * [dplyr::case_when()]
+#' * [dplyr::coalesce()]
+#' * [dplyr::if_else()]
+#' * [dplyr::n()]
+#' * [dplyr::n_distinct()]
+#' * [lubridate::am()]
+#' * [lubridate::as_date()]
+#' * [lubridate::as_datetime()]
+#' * [lubridate::ceiling_date()]
+#' * [lubridate::date()]
+#' * [lubridate::date_decimal()]
+#' * [lubridate::day()]
+#' * [lubridate::ddays()]
+#' * [lubridate::decimal_date()]
+#' * [lubridate::dhours()]
+#' * [lubridate::dmicroseconds()]
+#' * [lubridate::dmilliseconds()]
+#' * [lubridate::dminutes()]
+#' * [lubridate::dmonths()]
+#' * [lubridate::dmy()]
+#' * [lubridate::dmy_h()]
+#' * [lubridate::dmy_hm()]
+#' * [lubridate::dmy_hms()]
+#' * [lubridate::dnanoseconds()]
+#' * [lubridate::dpicoseconds()]
+#' * [lubridate::dseconds()]
+#' * [lubridate::dst()]
+#' * [lubridate::dweeks()]
+#' * [lubridate::dyears()]
+#' * [lubridate::dym()]
+#' * [lubridate::epiweek()]
+#' * [lubridate::epiyear()]
+#' * [lubridate::fast_strptime()]
+#' * [lubridate::floor_date()]
+#' * [lubridate::format_ISO8601()]
+#' * [lubridate::hour()]
+#' * [lubridate::is.Date()]
+#' * [lubridate::is.instant()]
+#' * [lubridate::is.POSIXct()]
+#' * [lubridate::is.timepoint()]
+#' * [lubridate::isoweek()]
+#' * [lubridate::isoyear()]
+#' * [lubridate::leap_year()]
+#' * [lubridate::make_date()]
+#' * [lubridate::make_datetime()]
+#' * [lubridate::make_difftime()]
+#' * [lubridate::mday()]
+#' * [lubridate::mdy()]
+#' * [lubridate::mdy_h()]
+#' * [lubridate::mdy_hm()]
+#' * [lubridate::mdy_hms()]
+#' * [lubridate::minute()]
+#' * [lubridate::month()]
+#' * [lubridate::my()]
+#' * [lubridate::myd()]
+#' * [lubridate::parse_date_time()]
+#' * [lubridate::pm()]
+#' * [lubridate::qday()]
+#' * [lubridate::quarter()]
+#' * [lubridate::round_date()]
+#' * [lubridate::second()]
+#' * [lubridate::semester()]
+#' * [lubridate::tz()]
+#' * [lubridate::wday()]
+#' * [lubridate::week()]
+#' * [lubridate::yday()]
+#' * [lubridate::ydm()]
+#' * [lubridate::ydm_h()]
+#' * [lubridate::ydm_hm()]
+#' * [lubridate::ydm_hms()]
+#' * [lubridate::year()]
+#' * [lubridate::ym()]
+#' * [lubridate::ymd()]
+#' * [lubridate::ymd_h()]
+#' * [lubridate::ymd_hm()]
+#' * [lubridate::ymd_hms()]
+#' * [lubridate::yq()]
+#' * [methods::is()]
+#' * [rlang::is_character()]
+#' * [rlang::is_double()]
+#' * [rlang::is_integer()]
+#' * [rlang::is_list()]
+#' * [rlang::is_logical()]
+#' * [stats::median()]
+#' * [stats::quantile()]
+#' * [stats::sd()]
+#' * [stats::var()]
+#' * [stringi::stri_reverse()]
+#' * [stringr::str_c()]: the `collapse` argument is not yet supported
+#' * [stringr::str_count()]
+#' * [stringr::str_detect()]
+#' * [stringr::str_dup()]
+#' * [stringr::str_ends()]
+#' * [stringr::str_length()]
+#' * [stringr::str_like()]
+#' * [stringr::str_pad()]
+#' * [stringr::str_replace()]
+#' * [stringr::str_replace_all()]
+#' * [stringr::str_split()]
+#' * [stringr::str_starts()]
+#' * [stringr::str_sub()]
+#' * [stringr::str_to_lower()]
+#' * [stringr::str_to_title()]
+#' * [stringr::str_to_upper()]
+#' * [stringr::str_trim()]
+#' * [tibble::tibble()]
+#'
+#' @name arrow-dplyr-functions
+NULL
+
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index b300d7c439e..71dcce94cc7 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -161,32 +161,44 @@ register_bindings_string_join <- function() {
     }
   }
 
-  register_binding("base::paste", function(..., sep = " ", collapse = NULL, recycle0 = FALSE) {
-    assert_that(
-      is.null(collapse),
-      msg = "paste() with the collapse argument is not yet supported in Arrow"
-    )
-    if (!inherits(sep, "Expression")) {
-      assert_that(!is.na(sep), msg = "Invalid separator")
-    }
-    arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
-  })
-
-  register_binding("base::paste0", function(..., collapse = NULL, recycle0 = FALSE) {
-    assert_that(
-      is.null(collapse),
-      msg = "paste0() with the collapse argument is not yet supported in Arrow"
-    )
-    arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
-  })
-
-  register_binding("stringr::str_c", function(..., sep = "", collapse = NULL) {
-    assert_that(
-      is.null(collapse),
-      msg = "str_c() with the collapse argument is not yet supported in Arrow"
-    )
-    arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
-  })
+  register_binding(
+    "base::paste",
+    function(..., sep = " ", collapse = NULL, recycle0 = FALSE) {
+      assert_that(
+        is.null(collapse),
+        msg = "paste() with the collapse argument is not yet supported in Arrow"
+      )
+      if (!inherits(sep, "Expression")) {
+        assert_that(!is.na(sep), msg = "Invalid separator")
+      }
+      arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+    },
+    notes = "the `collapse` argument is not yet supported"
+  )
+
+  register_binding(
+    "base::paste0",
+    function(..., collapse = NULL, recycle0 = FALSE) {
+      assert_that(
+        is.null(collapse),
+        msg = "paste0() with the collapse argument is not yet supported in Arrow"
+      )
+      arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+    },
+    notes = "the `collapse` argument is not yet supported"
+  )
+
+  register_binding(
+    "stringr::str_c",
+    function(..., sep = "", collapse = NULL) {
+      assert_that(
+        is.null(collapse),
+        msg = "str_c() with the collapse argument is not yet supported in Arrow"
+      )
+      arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+    },
+    notes = "the `collapse` argument is not yet supported"
+  )
 }
 
 register_bindings_string_regex <- function() {
@@ -337,7 +349,7 @@ register_bindings_string_regex <- function() {
   register_binding("stringr::str_replace_all", arrow_stringr_string_replace_function(-1L))
 
   register_binding("base::strsplit", function(x, split, fixed = FALSE, perl = FALSE,
-                                        useBytes = FALSE) {
+                                              useBytes = FALSE) {
     assert_that(is.string(split))
 
     arrow_fun <- ifelse(fixed, "split_pattern", "split_pattern_regex")
diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R
index 4dadff54b48..a66db112d98 100644
--- a/r/R/dplyr-funcs.R
+++ b/r/R/dplyr-funcs.R
@@ -59,13 +59,17 @@ NULL
 #'   summarise) because the data mask has to be a list.
 #' @param registry An environment in which the functions should be
 #'   assigned.
-#'
+#' @param notes string for the docs: note any limitations or differences in
+#'   behavior between the Arrow version and the R function.
 #' @return The previously registered binding or `NULL` if no previously
 #'   registered function existed.
 #' @keywords internal
 #'
-register_binding <- function(fun_name, fun, registry = nse_funcs,
-                             update_cache = FALSE) {
+register_binding <- function(fun_name,
+                             fun,
+                             registry = nse_funcs,
+                             update_cache = FALSE,
+                             notes = character(0)) {
   unqualified_name <- sub("^.*?:{+}", "", fun_name)
 
   previous_fun <- registry[[unqualified_name]]
@@ -76,7 +80,8 @@ register_binding <- function(fun_name, fun, registry = nse_funcs,
       paste0(
         "A \"",
         unqualified_name,
-        "\" binding already exists in the registry and will be overwritten.")
+        "\" binding already exists in the registry and will be overwritten."
+      )
     )
   }
 
@@ -85,6 +90,8 @@ register_binding <- function(fun_name, fun, registry = nse_funcs,
   registry[[unqualified_name]] <- fun
   registry[[fun_name]] <- fun
 
+  .cache$docs[[fun_name]] <- notes
+
   if (update_cache) {
     fun_cache <- .cache$functions
     fun_cache[[unqualified_name]] <- fun
@@ -131,7 +138,7 @@ call_binding_agg <- function(fun_name, ...) {
 
 # Called in .onLoad()
 create_binding_cache <- function() {
-  arrow_funcs <- list()
+  .cache$docs <- list()
 
   # Register all available Arrow Compute functions, namespaced as arrow_fun.
   all_arrow_funs <- list_compute_functions()
diff --git a/r/data-raw/docgen.R b/r/data-raw/docgen.R
new file mode 100644
index 00000000000..a41aeb246c8
--- /dev/null
+++ b/r/data-raw/docgen.R
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This code generates dplyr-funcs-doc.R.
+# It requires that the package be installed.
+
+file_template <- "# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# \"License\"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Generated by using data-raw/docgen.R -> do not edit by hand
+
+#' Functions available in Arrow dplyr queries
+#'
+#' The `arrow` package contains mappings of %s R functions to the corresponding
+#' functions in the Arrow compute library. This allows you to write code inside
+#' of `dplyr` methods that call R functions, including many in packages like
+#' `stringr` and `lubridate`, and they will get translated to Arrow and run
+#' on the Arrow query engine (Acero). This document lists all of the mapped
+#' functions.
+#'
+#' In the list below, any differences in behavior or support between Acero and
+#' the R function are listed. If no notes follow the function name, then you
+#' can assume that the function works in Acero just as it does in R.
+#'
+#' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
+#' `str_sub()` and `stringr::str_sub()` work.
+#'
+#' In addition to these functions, you can call any of Arrow's %s compute
+#' functions directly. Arrow has many functions that don't map to an existing R
+#' function. In other cases where there is an R function mapping, you can still
+#' call the Arrow function directly if you don't want the adaptations that the R
+#' mapping has that make Acero behave like R. These functions are listed in the
+#' [C++ documentation](https://arrow.apache.org/docs/cpp/compute.html), and
+#' in the function registry in R, they are named with an `arrow_` prefix, such
+#' as `arrow_ascii_is_decimal`.
+#'
+%s
+#'
+#' @name arrow-dplyr-functions
+NULL
+"
+
+docs <- arrow:::.cache$docs
+docs <- docs[order(names(docs))]
+# TODO: group by package name, create subheadings
+
+doclets <- purrr::imap_chr(docs, function(x, n) {
+  out <- paste0("#' * [", n, "()]")
+  if (length(x)) {
+    out <- paste0(out, ": ", paste(x, collapse = " "))
+  }
+  out
+})
+
+writeLines(
+  sprintf(
+    file_template,
+    length(docs),
+    length(arrow::list_compute_functions()),
+    paste(doclets, collapse = "\n")
+  ),
+  "R/dplyr-funcs-doc.R"
+)
diff --git a/r/man/arrow-dplyr-functions.Rd b/r/man/arrow-dplyr-functions.Rd
new file mode 100644
index 00000000000..8cf74c5c78e
--- /dev/null
+++ b/r/man/arrow-dplyr-functions.Rd
@@ -0,0 +1,228 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-funcs-doc.R
+\name{arrow-dplyr-functions}
+\alias{arrow-dplyr-functions}
+\title{Functions available in Arrow dplyr queries}
+\description{
+The \code{arrow} package contains mappings of 196 R functions to the corresponding
+functions in the Arrow compute library. This allows you to write code inside
+of \code{dplyr} methods that call R functions, including many in packages like
+\code{stringr} and \code{lubridate}, and they will get translated to Arrow and run
+on the Arrow query engine (Acero). This document lists all of the mapped
+functions.
+}
+\details{
+In the list below, any differences in behavior or support between Acero and
+the R function are listed. If no notes follow the function name, then you
+can assume that the function works in Acero just as it does in R.
+
+Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. both
+\code{str_sub()} and \code{stringr::str_sub()} work.
+
+In addition to these functions, you can call any of Arrow's 243 compute
+functions directly. Arrow has many functions that don't map to an existing R
+function. In other cases where there is an R function mapping, you can still
+call the Arrow function directly if you don't want the adaptations that the R
+mapping has that make Acero behave like R. These functions are listed in the
+\href{https://arrow.apache.org/docs/cpp/compute.html}{C++ documentation}, and
+in the function registry in R, they are named with an \code{arrow_} prefix, such
+as \code{arrow_ascii_is_decimal}.
+\itemize{
+\item \code{\link[=-]{-()}}
+\item \code{\link[=!]{!()}}
+\item \code{\link[=!=]{!=()}}
+\item \code{\link[=*]{*()}}
+\item \code{\link[=/]{/()}}
+\item \code{\link[=&]{&()}}
+\item \code{\link[=\%/\%]{\%/\%()}}
+\item \code{\link[=\%\%]{\%\%()}}
+\item \code{\link[=\%in\%]{\%in\%()}}
+\item \code{\link[=^]{^()}}
+\item \code{\link[=+]{+()}}
+\item \code{\link[=<]{<()}}
+\item \code{\link[=<=]{<=()}}
+\item \code{\link[===]{==()}}
+\item \code{\link[=>]{>()}}
+\item \code{\link[=>=]{>=()}}
+\item \code{\link[=|]{|()}}
+\item \code{\link[=add_filename]{add_filename()}}
+\item \code{\link[base:MathFun]{base::abs()}}
+\item \code{\link[base:Trig]{base::acos()}}
+\item \code{\link[base:all]{base::all()}}
+\item \code{\link[base:any]{base::any()}}
+\item \code{\link[base:character]{base::as.character()}}
+\item \code{\link[base:as.Date]{base::as.Date()}}
+\item \code{\link[base:difftime]{base::as.difftime()}}
+\item \code{\link[base:double]{base::as.double()}}
+\item \code{\link[base:factor]{base::as.factor()}}
+\item \code{\link[base:integer]{base::as.integer()}}
+\item \code{\link[base:logical]{base::as.logical()}}
+\item \code{\link[base:numeric]{base::as.numeric()}}
+\item \code{\link[base:Trig]{base::asin()}}
+\item \code{\link[base:Round]{base::ceiling()}}
+\item \code{\link[base:Trig]{base::cos()}}
+\item \code{\link[base:data.frame]{base::data.frame()}}
+\item \code{\link[base:difftime]{base::difftime()}}
+\item \code{\link[base:startsWith]{base::endsWith()}}
+\item \code{\link[base:Log]{base::exp()}}
+\item \code{\link[base:Round]{base::floor()}}
+\item \code{\link[base:format]{base::format()}}
+\item \code{\link[base:grep]{base::grepl()}}
+\item \code{\link[base:grep]{base::gsub()}}
+\item \code{\link[base:ifelse]{base::ifelse()}}
+\item \code{\link[base:character]{base::is.character()}}
+\item \code{\link[base:double]{base::is.double()}}
+\item \code{\link[base:factor]{base::is.factor()}}
+\item \code{\link[base:is.finite]{base::is.finite()}}
+\item \code{\link[base:is.finite]{base::is.infinite()}}
+\item \code{\link[base:integer]{base::is.integer()}}
+\item \code{\link[base:list]{base::is.list()}}
+\item \code{\link[base:logical]{base::is.logical()}}
+\item \code{\link[base:NA]{base::is.na()}}
+\item \code{\link[base:is.finite]{base::is.nan()}}
+\item \code{\link[base:numeric]{base::is.numeric()}}
+\item \code{\link[base:ISOdatetime]{base::ISOdate()}}
+\item \code{\link[base:ISOdatetime]{base::ISOdatetime()}}
+\item \code{\link[base:Log]{base::log()}}
+\item \code{\link[base:Log]{base::log10()}}
+\item \code{\link[base:Log]{base::log1p()}}
+\item \code{\link[base:Log]{base::log2()}}
+\item \code{\link[base:Log]{base::logb()}}
+\item \code{\link[base:Extremes]{base::max()}}
+\item \code{\link[base:mean]{base::mean()}}
+\item \code{\link[base:Extremes]{base::min()}}
+\item \code{\link[base:nchar]{base::nchar()}}
+\item \code{\link[base:paste]{base::paste()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[base:paste]{base::paste0()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[base:Extremes]{base::pmax()}}
+\item \code{\link[base:Extremes]{base::pmin()}}
+\item \code{\link[base:Round]{base::round()}}
+\item \code{\link[base:sign]{base::sign()}}
+\item \code{\link[base:Trig]{base::sin()}}
+\item \code{\link[base:MathFun]{base::sqrt()}}
+\item \code{\link[base:startsWith]{base::startsWith()}}
+\item \code{\link[base:strptime]{base::strftime()}}
+\item \code{\link[base:strptime]{base::strptime()}}
+\item \code{\link[base:strrep]{base::strrep()}}
+\item \code{\link[base:strsplit]{base::strsplit()}}
+\item \code{\link[base:grep]{base::sub()}}
+\item \code{\link[base:substr]{base::substr()}}
+\item \code{\link[base:substr]{base::substring()}}
+\item \code{\link[base:sum]{base::sum()}}
+\item \code{\link[base:Trig]{base::tan()}}
+\item \code{\link[base:chartr]{base::tolower()}}
+\item \code{\link[base:chartr]{base::toupper()}}
+\item \code{\link[base:Round]{base::trunc()}}
+\item \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}}
+\item \code{\link[bit64:bit64-package]{bit64::is.integer64()}}
+\item \code{\link[=cast]{cast()}}
+\item \code{\link[=dictionary_encode]{dictionary_encode()}}
+\item \code{\link[dplyr:between]{dplyr::between()}}
+\item \code{\link[dplyr:case_when]{dplyr::case_when()}}
+\item \code{\link[dplyr:coalesce]{dplyr::coalesce()}}
+\item \code{\link[dplyr:if_else]{dplyr::if_else()}}
+\item \code{\link[dplyr:context]{dplyr::n()}}
+\item \code{\link[dplyr:n_distinct]{dplyr::n_distinct()}}
+\item \code{\link[lubridate:am]{lubridate::am()}}
+\item \code{\link[lubridate:as_date]{lubridate::as_date()}}
+\item \code{\link[lubridate:as_date]{lubridate::as_datetime()}}
+\item \code{\link[lubridate:round_date]{lubridate::ceiling_date()}}
+\item \code{\link[lubridate:date]{lubridate::date()}}
+\item \code{\link[lubridate:date_decimal]{lubridate::date_decimal()}}
+\item \code{\link[lubridate:day]{lubridate::day()}}
+\item \code{\link[lubridate:duration]{lubridate::ddays()}}
+\item \code{\link[lubridate:decimal_date]{lubridate::decimal_date()}}
+\item \code{\link[lubridate:duration]{lubridate::dhours()}}
+\item \code{\link[lubridate:duration]{lubridate::dmicroseconds()}}
+\item \code{\link[lubridate:duration]{lubridate::dmilliseconds()}}
+\item \code{\link[lubridate:duration]{lubridate::dminutes()}}
+\item \code{\link[lubridate:duration]{lubridate::dmonths()}}
+\item \code{\link[lubridate:ymd]{lubridate::dmy()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::dmy_h()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::dmy_hm()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::dmy_hms()}}
+\item \code{\link[lubridate:duration]{lubridate::dnanoseconds()}}
+\item \code{\link[lubridate:duration]{lubridate::dpicoseconds()}}
+\item \code{\link[lubridate:duration]{lubridate::dseconds()}}
+\item \code{\link[lubridate:dst]{lubridate::dst()}}
+\item \code{\link[lubridate:duration]{lubridate::dweeks()}}
+\item \code{\link[lubridate:duration]{lubridate::dyears()}}
+\item \code{\link[lubridate:ymd]{lubridate::dym()}}
+\item \code{\link[lubridate:week]{lubridate::epiweek()}}
+\item \code{\link[lubridate:year]{lubridate::epiyear()}}
+\item \code{\link[lubridate:parse_date_time]{lubridate::fast_strptime()}}
+\item \code{\link[lubridate:round_date]{lubridate::floor_date()}}
+\item \code{\link[lubridate:format_ISO8601]{lubridate::format_ISO8601()}}
+\item \code{\link[lubridate:hour]{lubridate::hour()}}
+\item \code{\link[lubridate:date_utils]{lubridate::is.Date()}}
+\item \code{\link[lubridate:is.instant]{lubridate::is.instant()}}
+\item \code{\link[lubridate:posix_utils]{lubridate::is.POSIXct()}}
+\item \code{\link[lubridate:is.instant]{lubridate::is.timepoint()}}
+\item \code{\link[lubridate:week]{lubridate::isoweek()}}
+\item \code{\link[lubridate:year]{lubridate::isoyear()}}
+\item \code{\link[lubridate:leap_year]{lubridate::leap_year()}}
+\item \code{\link[lubridate:make_datetime]{lubridate::make_date()}}
+\item \code{\link[lubridate:make_datetime]{lubridate::make_datetime()}}
+\item \code{\link[lubridate:make_difftime]{lubridate::make_difftime()}}
+\item \code{\link[lubridate:day]{lubridate::mday()}}
+\item \code{\link[lubridate:ymd]{lubridate::mdy()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::mdy_h()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::mdy_hm()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::mdy_hms()}}
+\item \code{\link[lubridate:minute]{lubridate::minute()}}
+\item \code{\link[lubridate:month]{lubridate::month()}}
+\item \code{\link[lubridate:ymd]{lubridate::my()}}
+\item \code{\link[lubridate:ymd]{lubridate::myd()}}
+\item \code{\link[lubridate:parse_date_time]{lubridate::parse_date_time()}}
+\item \code{\link[lubridate:am]{lubridate::pm()}}
+\item \code{\link[lubridate:day]{lubridate::qday()}}
+\item \code{\link[lubridate:quarter]{lubridate::quarter()}}
+\item \code{\link[lubridate:round_date]{lubridate::round_date()}}
+\item \code{\link[lubridate:second]{lubridate::second()}}
+\item \code{\link[lubridate:quarter]{lubridate::semester()}}
+\item \code{\link[lubridate:tz]{lubridate::tz()}}
+\item \code{\link[lubridate:day]{lubridate::wday()}}
+\item \code{\link[lubridate:week]{lubridate::week()}}
+\item \code{\link[lubridate:day]{lubridate::yday()}}
+\item \code{\link[lubridate:ymd]{lubridate::ydm()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::ydm_h()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::ydm_hm()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::ydm_hms()}}
+\item \code{\link[lubridate:year]{lubridate::year()}}
+\item \code{\link[lubridate:ymd]{lubridate::ym()}}
+\item \code{\link[lubridate:ymd]{lubridate::ymd()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::ymd_h()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::ymd_hm()}}
+\item \code{\link[lubridate:ymd_hms]{lubridate::ymd_hms()}}
+\item \code{\link[lubridate:ymd]{lubridate::yq()}}
+\item \code{\link[methods:is]{methods::is()}}
+\item \code{\link[rlang:type-predicates]{rlang::is_character()}}
+\item \code{\link[rlang:type-predicates]{rlang::is_double()}}
+\item \code{\link[rlang:type-predicates]{rlang::is_integer()}}
+\item \code{\link[rlang:type-predicates]{rlang::is_list()}}
+\item \code{\link[rlang:type-predicates]{rlang::is_logical()}}
+\item \code{\link[stats:median]{stats::median()}}
+\item \code{\link[stats:quantile]{stats::quantile()}}
+\item \code{\link[stats:sd]{stats::sd()}}
+\item \code{\link[stats:cor]{stats::var()}}
+\item \code{\link[stringi:stri_reverse]{stringi::stri_reverse()}}
+\item \code{\link[stringr:str_c]{stringr::str_c()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[stringr:str_count]{stringr::str_count()}}
+\item \code{\link[stringr:str_detect]{stringr::str_detect()}}
+\item \code{\link[stringr:str_dup]{stringr::str_dup()}}
+\item \code{\link[stringr:str_starts]{stringr::str_ends()}}
+\item \code{\link[stringr:str_length]{stringr::str_length()}}
+\item \code{\link[stringr:str_like]{stringr::str_like()}}
+\item \code{\link[stringr:str_pad]{stringr::str_pad()}}
+\item \code{\link[stringr:str_replace]{stringr::str_replace()}}
+\item \code{\link[stringr:str_replace]{stringr::str_replace_all()}}
+\item \code{\link[stringr:str_split]{stringr::str_split()}}
+\item \code{\link[stringr:str_starts]{stringr::str_starts()}}
+\item \code{\link[stringr:str_sub]{stringr::str_sub()}}
+\item \code{\link[stringr:case]{stringr::str_to_lower()}}
+\item \code{\link[stringr:case]{stringr::str_to_title()}}
+\item \code{\link[stringr:case]{stringr::str_to_upper()}}
+\item \code{\link[stringr:str_trim]{stringr::str_trim()}}
+\item \code{\link[tibble:tibble]{tibble::tibble()}}
+}
+}
diff --git a/r/man/register_binding.Rd b/r/man/register_binding.Rd
index c53df707516..d2a4a380543 100644
--- a/r/man/register_binding.Rd
+++ b/r/man/register_binding.Rd
@@ -4,7 +4,13 @@
 \alias{register_binding}
 \title{Register compute bindings}
 \usage{
-register_binding(fun_name, fun, registry = nse_funcs, update_cache = FALSE)
+register_binding(
+  fun_name,
+  fun,
+  registry = nse_funcs,
+  update_cache = FALSE,
+  notes = character(0)
+)
 }
 \arguments{
 \item{fun_name}{A string containing a function name in the form \code{"function"} or
@@ -26,6 +32,9 @@ non-aggregate functions could be revisited...it is currently used
 as the data mask in mutate, filter, and aggregate (but not
 summarise) because the data mask has to be a list.}
 
+\item{notes}{string for the docs: note any limitations or differences in
+behavior between the Arrow version and the R function.}
+
 \item{agg_fun}{An aggregate function or \code{NULL} to un-register a previous
 aggregate function. This function must accept \code{Expression} objects as
 arguments and return a \code{list()} with components:

From 8d036c7d961c233154c79cad3b2775a3f2ccf252 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 1 Sep 2022 08:52:18 -0400
Subject: [PATCH 2/7] Group by package name and improve links

---
 r/R/dplyr-funcs-augmented.R    |   2 +-
 r/R/dplyr-funcs-doc.R          | 426 ++++++++++++++++---------------
 r/R/dplyr-funcs-type.R         |  13 +-
 r/data-raw/docgen.R            |  54 +++-
 r/man/arrow-dplyr-functions.Rd | 445 ++++++++++++++++++---------------
 5 files changed, 521 insertions(+), 419 deletions(-)

diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R
index 6e751d49f61..828e3df12be 100644
--- a/r/R/dplyr-funcs-augmented.R
+++ b/r/R/dplyr-funcs-augmented.R
@@ -16,7 +16,7 @@
 # under the License.
 
 register_bindings_augmented <- function() {
-  register_binding("add_filename", function() {
+  register_binding("arrow::add_filename", function() {
     Expression$field_ref("__filename")
   })
 }
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 5104735cf1b..078cf4a9fa4 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -19,7 +19,7 @@
 
 #' Functions available in Arrow dplyr queries
 #'
-#' The `arrow` package contains mappings of 196 R functions to the corresponding
+#' The `arrow` package contains mappings of 195 R functions to the corresponding
 #' functions in the Arrow compute library. This allows you to write code inside
 #' of `dplyr` methods that call R functions, including many in packages like
 #' `stringr` and `lubridate`, and they will get translated to Arrow and run
@@ -42,203 +42,233 @@
 #' in the function registry in R, they are named with an `arrow_` prefix, such
 #' as `arrow_ascii_is_decimal`.
 #'
-#' * [-()]
-#' * [!()]
-#' * [!=()]
-#' * [*()]
-#' * [/()]
-#' * [&()]
-#' * [%/%()]
-#' * [%%()]
-#' * [%in%()]
-#' * [^()]
-#' * [+()]
-#' * [<()]
-#' * [<=()]
-#' * [==()]
-#' * [>()]
-#' * [>=()]
-#' * [|()]
-#' * [add_filename()]
-#' * [base::abs()]
-#' * [base::acos()]
-#' * [base::all()]
-#' * [base::any()]
-#' * [base::as.character()]
-#' * [base::as.Date()]
-#' * [base::as.difftime()]
-#' * [base::as.double()]
-#' * [base::as.factor()]
-#' * [base::as.integer()]
-#' * [base::as.logical()]
-#' * [base::as.numeric()]
-#' * [base::asin()]
-#' * [base::ceiling()]
-#' * [base::cos()]
-#' * [base::data.frame()]
-#' * [base::difftime()]
-#' * [base::endsWith()]
-#' * [base::exp()]
-#' * [base::floor()]
-#' * [base::format()]
-#' * [base::grepl()]
-#' * [base::gsub()]
-#' * [base::ifelse()]
-#' * [base::is.character()]
-#' * [base::is.double()]
-#' * [base::is.factor()]
-#' * [base::is.finite()]
-#' * [base::is.infinite()]
-#' * [base::is.integer()]
-#' * [base::is.list()]
-#' * [base::is.logical()]
-#' * [base::is.na()]
-#' * [base::is.nan()]
-#' * [base::is.numeric()]
-#' * [base::ISOdate()]
-#' * [base::ISOdatetime()]
-#' * [base::log()]
-#' * [base::log10()]
-#' * [base::log1p()]
-#' * [base::log2()]
-#' * [base::logb()]
-#' * [base::max()]
-#' * [base::mean()]
-#' * [base::min()]
-#' * [base::nchar()]
-#' * [base::paste()]: the `collapse` argument is not yet supported
-#' * [base::paste0()]: the `collapse` argument is not yet supported
-#' * [base::pmax()]
-#' * [base::pmin()]
-#' * [base::round()]
-#' * [base::sign()]
-#' * [base::sin()]
-#' * [base::sqrt()]
-#' * [base::startsWith()]
-#' * [base::strftime()]
-#' * [base::strptime()]
-#' * [base::strrep()]
-#' * [base::strsplit()]
-#' * [base::sub()]
-#' * [base::substr()]
-#' * [base::substring()]
-#' * [base::sum()]
-#' * [base::tan()]
-#' * [base::tolower()]
-#' * [base::toupper()]
-#' * [base::trunc()]
-#' * [bit64::as.integer64()]
-#' * [bit64::is.integer64()]
-#' * [cast()]
-#' * [dictionary_encode()]
-#' * [dplyr::between()]
-#' * [dplyr::case_when()]
-#' * [dplyr::coalesce()]
-#' * [dplyr::if_else()]
-#' * [dplyr::n()]
-#' * [dplyr::n_distinct()]
-#' * [lubridate::am()]
-#' * [lubridate::as_date()]
-#' * [lubridate::as_datetime()]
-#' * [lubridate::ceiling_date()]
-#' * [lubridate::date()]
-#' * [lubridate::date_decimal()]
-#' * [lubridate::day()]
-#' * [lubridate::ddays()]
-#' * [lubridate::decimal_date()]
-#' * [lubridate::dhours()]
-#' * [lubridate::dmicroseconds()]
-#' * [lubridate::dmilliseconds()]
-#' * [lubridate::dminutes()]
-#' * [lubridate::dmonths()]
-#' * [lubridate::dmy()]
-#' * [lubridate::dmy_h()]
-#' * [lubridate::dmy_hm()]
-#' * [lubridate::dmy_hms()]
-#' * [lubridate::dnanoseconds()]
-#' * [lubridate::dpicoseconds()]
-#' * [lubridate::dseconds()]
-#' * [lubridate::dst()]
-#' * [lubridate::dweeks()]
-#' * [lubridate::dyears()]
-#' * [lubridate::dym()]
-#' * [lubridate::epiweek()]
-#' * [lubridate::epiyear()]
-#' * [lubridate::fast_strptime()]
-#' * [lubridate::floor_date()]
-#' * [lubridate::format_ISO8601()]
-#' * [lubridate::hour()]
-#' * [lubridate::is.Date()]
-#' * [lubridate::is.instant()]
-#' * [lubridate::is.POSIXct()]
-#' * [lubridate::is.timepoint()]
-#' * [lubridate::isoweek()]
-#' * [lubridate::isoyear()]
-#' * [lubridate::leap_year()]
-#' * [lubridate::make_date()]
-#' * [lubridate::make_datetime()]
-#' * [lubridate::make_difftime()]
-#' * [lubridate::mday()]
-#' * [lubridate::mdy()]
-#' * [lubridate::mdy_h()]
-#' * [lubridate::mdy_hm()]
-#' * [lubridate::mdy_hms()]
-#' * [lubridate::minute()]
-#' * [lubridate::month()]
-#' * [lubridate::my()]
-#' * [lubridate::myd()]
-#' * [lubridate::parse_date_time()]
-#' * [lubridate::pm()]
-#' * [lubridate::qday()]
-#' * [lubridate::quarter()]
-#' * [lubridate::round_date()]
-#' * [lubridate::second()]
-#' * [lubridate::semester()]
-#' * [lubridate::tz()]
-#' * [lubridate::wday()]
-#' * [lubridate::week()]
-#' * [lubridate::yday()]
-#' * [lubridate::ydm()]
-#' * [lubridate::ydm_h()]
-#' * [lubridate::ydm_hm()]
-#' * [lubridate::ydm_hms()]
-#' * [lubridate::year()]
-#' * [lubridate::ym()]
-#' * [lubridate::ymd()]
-#' * [lubridate::ymd_h()]
-#' * [lubridate::ymd_hm()]
-#' * [lubridate::ymd_hms()]
-#' * [lubridate::yq()]
-#' * [methods::is()]
-#' * [rlang::is_character()]
-#' * [rlang::is_double()]
-#' * [rlang::is_integer()]
-#' * [rlang::is_list()]
-#' * [rlang::is_logical()]
-#' * [stats::median()]
-#' * [stats::quantile()]
-#' * [stats::sd()]
-#' * [stats::var()]
-#' * [stringi::stri_reverse()]
-#' * [stringr::str_c()]: the `collapse` argument is not yet supported
-#' * [stringr::str_count()]
-#' * [stringr::str_detect()]
-#' * [stringr::str_dup()]
-#' * [stringr::str_ends()]
-#' * [stringr::str_length()]
-#' * [stringr::str_like()]
-#' * [stringr::str_pad()]
-#' * [stringr::str_replace()]
-#' * [stringr::str_replace_all()]
-#' * [stringr::str_split()]
-#' * [stringr::str_starts()]
-#' * [stringr::str_sub()]
-#' * [stringr::str_to_lower()]
-#' * [stringr::str_to_title()]
-#' * [stringr::str_to_upper()]
-#' * [stringr::str_trim()]
-#' * [tibble::tibble()]
+#' ## arrow
+#' 
+#' * [add_filename][arrow::add_filename()]
+#' * [cast][arrow::cast()]
+#'
+#' ## base
+#' 
+#' * [-][-()]
+#' * [!][!()]
+#' * [!=][!=()]
+#' * [*][*()]
+#' * [/][/()]
+#' * [&][&()]
+#' * [%/%][%/%()]
+#' * [%%][%%()]
+#' * [%in%][%in%()]
+#' * [^][^()]
+#' * [+][+()]
+#' * [<][<()]
+#' * [<=][<=()]
+#' * [==][==()]
+#' * [>][>()]
+#' * [>=][>=()]
+#' * [|][|()]
+#' * [abs][base::abs()]
+#' * [acos][base::acos()]
+#' * [all][base::all()]
+#' * [any][base::any()]
+#' * [as.character][base::as.character()]
+#' * [as.Date][base::as.Date()]
+#' * [as.difftime][base::as.difftime()]
+#' * [as.double][base::as.double()]
+#' * [as.factor][base::as.factor()]
+#' * [as.integer][base::as.integer()]
+#' * [as.logical][base::as.logical()]
+#' * [as.numeric][base::as.numeric()]
+#' * [asin][base::asin()]
+#' * [ceiling][base::ceiling()]
+#' * [cos][base::cos()]
+#' * [data.frame][base::data.frame()]
+#' * [difftime][base::difftime()]
+#' * [endsWith][base::endsWith()]
+#' * [exp][base::exp()]
+#' * [floor][base::floor()]
+#' * [format][base::format()]
+#' * [grepl][base::grepl()]
+#' * [gsub][base::gsub()]
+#' * [ifelse][base::ifelse()]
+#' * [is.character][base::is.character()]
+#' * [is.double][base::is.double()]
+#' * [is.factor][base::is.factor()]
+#' * [is.finite][base::is.finite()]
+#' * [is.infinite][base::is.infinite()]
+#' * [is.integer][base::is.integer()]
+#' * [is.list][base::is.list()]
+#' * [is.logical][base::is.logical()]
+#' * [is.na][base::is.na()]
+#' * [is.nan][base::is.nan()]
+#' * [is.numeric][base::is.numeric()]
+#' * [ISOdate][base::ISOdate()]
+#' * [ISOdatetime][base::ISOdatetime()]
+#' * [log][base::log()]
+#' * [log10][base::log10()]
+#' * [log1p][base::log1p()]
+#' * [log2][base::log2()]
+#' * [logb][base::logb()]
+#' * [max][base::max()]
+#' * [mean][base::mean()]
+#' * [min][base::min()]
+#' * [nchar][base::nchar()]
+#' * [paste][base::paste()]: the `collapse` argument is not yet supported
+#' * [paste0][base::paste0()]: the `collapse` argument is not yet supported
+#' * [pmax][base::pmax()]
+#' * [pmin][base::pmin()]
+#' * [round][base::round()]
+#' * [sign][base::sign()]
+#' * [sin][base::sin()]
+#' * [sqrt][base::sqrt()]
+#' * [startsWith][base::startsWith()]
+#' * [strftime][base::strftime()]
+#' * [strptime][base::strptime()]
+#' * [strrep][base::strrep()]
+#' * [strsplit][base::strsplit()]
+#' * [sub][base::sub()]
+#' * [substr][base::substr()]
+#' * [substring][base::substring()]
+#' * [sum][base::sum()]
+#' * [tan][base::tan()]
+#' * [tolower][base::tolower()]
+#' * [toupper][base::toupper()]
+#' * [trunc][base::trunc()]
+#'
+#' ## bit64
+#' 
+#' * [as.integer64][bit64::as.integer64()]
+#' * [is.integer64][bit64::is.integer64()]
+#'
+#' ## dplyr
+#' 
+#' * [between][dplyr::between()]
+#' * [case_when][dplyr::case_when()]
+#' * [coalesce][dplyr::coalesce()]
+#' * [if_else][dplyr::if_else()]
+#' * [n][dplyr::n()]
+#' * [n_distinct][dplyr::n_distinct()]
+#'
+#' ## lubridate
+#' 
+#' * [am][lubridate::am()]
+#' * [as_date][lubridate::as_date()]
+#' * [as_datetime][lubridate::as_datetime()]
+#' * [ceiling_date][lubridate::ceiling_date()]
+#' * [date][lubridate::date()]
+#' * [date_decimal][lubridate::date_decimal()]
+#' * [day][lubridate::day()]
+#' * [ddays][lubridate::ddays()]
+#' * [decimal_date][lubridate::decimal_date()]
+#' * [dhours][lubridate::dhours()]
+#' * [dmicroseconds][lubridate::dmicroseconds()]
+#' * [dmilliseconds][lubridate::dmilliseconds()]
+#' * [dminutes][lubridate::dminutes()]
+#' * [dmonths][lubridate::dmonths()]
+#' * [dmy][lubridate::dmy()]
+#' * [dmy_h][lubridate::dmy_h()]
+#' * [dmy_hm][lubridate::dmy_hm()]
+#' * [dmy_hms][lubridate::dmy_hms()]
+#' * [dnanoseconds][lubridate::dnanoseconds()]
+#' * [dpicoseconds][lubridate::dpicoseconds()]
+#' * [dseconds][lubridate::dseconds()]
+#' * [dst][lubridate::dst()]
+#' * [dweeks][lubridate::dweeks()]
+#' * [dyears][lubridate::dyears()]
+#' * [dym][lubridate::dym()]
+#' * [epiweek][lubridate::epiweek()]
+#' * [epiyear][lubridate::epiyear()]
+#' * [fast_strptime][lubridate::fast_strptime()]
+#' * [floor_date][lubridate::floor_date()]
+#' * [format_ISO8601][lubridate::format_ISO8601()]
+#' * [hour][lubridate::hour()]
+#' * [is.Date][lubridate::is.Date()]
+#' * [is.instant][lubridate::is.instant()]
+#' * [is.POSIXct][lubridate::is.POSIXct()]
+#' * [is.timepoint][lubridate::is.timepoint()]
+#' * [isoweek][lubridate::isoweek()]
+#' * [isoyear][lubridate::isoyear()]
+#' * [leap_year][lubridate::leap_year()]
+#' * [make_date][lubridate::make_date()]
+#' * [make_datetime][lubridate::make_datetime()]
+#' * [make_difftime][lubridate::make_difftime()]
+#' * [mday][lubridate::mday()]
+#' * [mdy][lubridate::mdy()]
+#' * [mdy_h][lubridate::mdy_h()]
+#' * [mdy_hm][lubridate::mdy_hm()]
+#' * [mdy_hms][lubridate::mdy_hms()]
+#' * [minute][lubridate::minute()]
+#' * [month][lubridate::month()]
+#' * [my][lubridate::my()]
+#' * [myd][lubridate::myd()]
+#' * [parse_date_time][lubridate::parse_date_time()]
+#' * [pm][lubridate::pm()]
+#' * [qday][lubridate::qday()]
+#' * [quarter][lubridate::quarter()]
+#' * [round_date][lubridate::round_date()]
+#' * [second][lubridate::second()]
+#' * [semester][lubridate::semester()]
+#' * [tz][lubridate::tz()]
+#' * [wday][lubridate::wday()]
+#' * [week][lubridate::week()]
+#' * [yday][lubridate::yday()]
+#' * [ydm][lubridate::ydm()]
+#' * [ydm_h][lubridate::ydm_h()]
+#' * [ydm_hm][lubridate::ydm_hm()]
+#' * [ydm_hms][lubridate::ydm_hms()]
+#' * [year][lubridate::year()]
+#' * [ym][lubridate::ym()]
+#' * [ymd][lubridate::ymd()]
+#' * [ymd_h][lubridate::ymd_h()]
+#' * [ymd_hm][lubridate::ymd_hm()]
+#' * [ymd_hms][lubridate::ymd_hms()]
+#' * [yq][lubridate::yq()]
+#'
+#' ## methods
+#' 
+#' * [is][methods::is()]
+#'
+#' ## rlang
+#' 
+#' * [is_character][rlang::is_character()]
+#' * [is_double][rlang::is_double()]
+#' * [is_integer][rlang::is_integer()]
+#' * [is_list][rlang::is_list()]
+#' * [is_logical][rlang::is_logical()]
+#'
+#' ## stats
+#' 
+#' * [median][stats::median()]
+#' * [quantile][stats::quantile()]
+#' * [sd][stats::sd()]
+#' * [var][stats::var()]
+#'
+#' ## stringi
+#' 
+#' * [stri_reverse][stringi::stri_reverse()]
+#'
+#' ## stringr
+#' 
+#' * [str_c][stringr::str_c()]: the `collapse` argument is not yet supported
+#' * [str_count][stringr::str_count()]
+#' * [str_detect][stringr::str_detect()]
+#' * [str_dup][stringr::str_dup()]
+#' * [str_ends][stringr::str_ends()]
+#' * [str_length][stringr::str_length()]
+#' * [str_like][stringr::str_like()]
+#' * [str_pad][stringr::str_pad()]
+#' * [str_replace][stringr::str_replace()]
+#' * [str_replace_all][stringr::str_replace_all()]
+#' * [str_split][stringr::str_split()]
+#' * [str_starts][stringr::str_starts()]
+#' * [str_sub][stringr::str_sub()]
+#' * [str_to_lower][stringr::str_to_lower()]
+#' * [str_to_title][stringr::str_to_title()]
+#' * [str_to_upper][stringr::str_to_upper()]
+#' * [str_trim][stringr::str_trim()]
+#'
+#' ## tibble
+#' 
+#' * [tibble][tibble::tibble()]
 #'
 #' @name arrow-dplyr-functions
 NULL
-
diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
index 9925d0347f7..2c5112d7f73 100644
--- a/r/R/dplyr-funcs-type.R
+++ b/r/R/dplyr-funcs-type.R
@@ -24,23 +24,12 @@ register_bindings_type <- function() {
 }
 
 register_bindings_type_cast <- function() {
-  register_binding("cast", function(x, target_type, safe = TRUE, ...) {
+  register_binding("arrow::cast", function(x, target_type, safe = TRUE, ...) {
     opts <- cast_options(safe, ...)
     opts$to_type <- as_type(target_type)
     Expression$create("cast", x, options = opts)
   })
 
-  register_binding("dictionary_encode", function(x,
-                                                 null_encoding_behavior = c("mask", "encode")) {
-    behavior <- toupper(match.arg(null_encoding_behavior))
-    null_encoding_behavior <- NullEncodingBehavior[[behavior]]
-    Expression$create(
-      "dictionary_encode",
-      x,
-      options = list(null_encoding_behavior = null_encoding_behavior)
-    )
-  })
-
   # as.* type casting functions
   # as.factor() is mapped in expression.R
   register_binding("base::as.character", function(x) {
diff --git a/r/data-raw/docgen.R b/r/data-raw/docgen.R
index a41aeb246c8..f78c1d47db2 100644
--- a/r/data-raw/docgen.R
+++ b/r/data-raw/docgen.R
@@ -65,27 +65,59 @@ file_template <- "# Licensed to the Apache Software Foundation (ASF) under one
 %s
 #'
 #' @name arrow-dplyr-functions
-NULL
-"
+NULL"
+
+library(dplyr)
+library(purrr)
 
 docs <- arrow:::.cache$docs
-docs <- docs[order(names(docs))]
-# TODO: group by package name, create subheadings
 
-doclets <- purrr::imap_chr(docs, function(x, n) {
-  out <- paste0("#' * [", n, "()]")
-  if (length(x)) {
-    out <- paste0(out, ": ", paste(x, collapse = " "))
-  }
+docs_df <- tibble::tibble(
+  pkg_fun = names(docs),
+  notes = docs
+) %>%
+  mutate(
+    has_pkg = grepl("::", pkg_fun),
+    fun = sub("^.*?:{+}", "", pkg_fun),
+    pkg = sub(":{+}.*$", "", pkg_fun),
+    # We will list operators under "base" (everything else must be pkg::fun)
+    pkg = if_else(has_pkg, pkg, "base"),
+    # Flatten notes to a single string
+    notes = map_chr(notes, ~ paste(., collapse = " "))
+  ) %>%
+  arrange(pkg, fun)
+
+# Vectorized function to make entries for each function
+render_fun <- function(fun, pkg_fun, notes) {
+  out <- paste0("* [", fun, "][", pkg_fun, "()]")
+  has_notes <- nzchar(notes)
+  out[has_notes] <- paste0(out[has_notes], ": ", notes[has_notes])
   out
-})
+}
+
+# This renders a bulleted list under a package heading
+render_pkg <- function(df, pkg) {
+  bullets <- df %>%
+    transmute(render_fun(fun, pkg_fun, notes)) %>%
+    pull()
+  # Add header
+  bullets <- c(
+    paste("##", pkg),
+    "",
+    bullets
+  )
+  paste("#'", bullets, collapse = "\n")
+}
+
+# Group by package name and render the lists
+doclets <- imap_chr(split(docs_df, docs_df$pkg), render_pkg)
 
 writeLines(
   sprintf(
     file_template,
     length(docs),
     length(arrow::list_compute_functions()),
-    paste(doclets, collapse = "\n")
+    paste(doclets, collapse = "\n#'\n")
   ),
   "R/dplyr-funcs-doc.R"
 )
diff --git a/r/man/arrow-dplyr-functions.Rd b/r/man/arrow-dplyr-functions.Rd
index 8cf74c5c78e..9fcfeb23b07 100644
--- a/r/man/arrow-dplyr-functions.Rd
+++ b/r/man/arrow-dplyr-functions.Rd
@@ -4,7 +4,7 @@
 \alias{arrow-dplyr-functions}
 \title{Functions available in Arrow dplyr queries}
 \description{
-The \code{arrow} package contains mappings of 196 R functions to the corresponding
+The \code{arrow} package contains mappings of 195 R functions to the corresponding
 functions in the Arrow compute library. This allows you to write code inside
 of \code{dplyr} methods that call R functions, including many in packages like
 \code{stringr} and \code{lubridate}, and they will get translated to Arrow and run
@@ -27,202 +27,253 @@ mapping has that make Acero behave like R. These functions are listed in the
 \href{https://arrow.apache.org/docs/cpp/compute.html}{C++ documentation}, and
 in the function registry in R, they are named with an \code{arrow_} prefix, such
 as \code{arrow_ascii_is_decimal}.
+\subsection{arrow}{
 \itemize{
-\item \code{\link[=-]{-()}}
-\item \code{\link[=!]{!()}}
-\item \code{\link[=!=]{!=()}}
-\item \code{\link[=*]{*()}}
-\item \code{\link[=/]{/()}}
-\item \code{\link[=&]{&()}}
-\item \code{\link[=\%/\%]{\%/\%()}}
-\item \code{\link[=\%\%]{\%\%()}}
-\item \code{\link[=\%in\%]{\%in\%()}}
-\item \code{\link[=^]{^()}}
-\item \code{\link[=+]{+()}}
-\item \code{\link[=<]{<()}}
-\item \code{\link[=<=]{<=()}}
-\item \code{\link[===]{==()}}
-\item \code{\link[=>]{>()}}
-\item \code{\link[=>=]{>=()}}
-\item \code{\link[=|]{|()}}
-\item \code{\link[=add_filename]{add_filename()}}
-\item \code{\link[base:MathFun]{base::abs()}}
-\item \code{\link[base:Trig]{base::acos()}}
-\item \code{\link[base:all]{base::all()}}
-\item \code{\link[base:any]{base::any()}}
-\item \code{\link[base:character]{base::as.character()}}
-\item \code{\link[base:as.Date]{base::as.Date()}}
-\item \code{\link[base:difftime]{base::as.difftime()}}
-\item \code{\link[base:double]{base::as.double()}}
-\item \code{\link[base:factor]{base::as.factor()}}
-\item \code{\link[base:integer]{base::as.integer()}}
-\item \code{\link[base:logical]{base::as.logical()}}
-\item \code{\link[base:numeric]{base::as.numeric()}}
-\item \code{\link[base:Trig]{base::asin()}}
-\item \code{\link[base:Round]{base::ceiling()}}
-\item \code{\link[base:Trig]{base::cos()}}
-\item \code{\link[base:data.frame]{base::data.frame()}}
-\item \code{\link[base:difftime]{base::difftime()}}
-\item \code{\link[base:startsWith]{base::endsWith()}}
-\item \code{\link[base:Log]{base::exp()}}
-\item \code{\link[base:Round]{base::floor()}}
-\item \code{\link[base:format]{base::format()}}
-\item \code{\link[base:grep]{base::grepl()}}
-\item \code{\link[base:grep]{base::gsub()}}
-\item \code{\link[base:ifelse]{base::ifelse()}}
-\item \code{\link[base:character]{base::is.character()}}
-\item \code{\link[base:double]{base::is.double()}}
-\item \code{\link[base:factor]{base::is.factor()}}
-\item \code{\link[base:is.finite]{base::is.finite()}}
-\item \code{\link[base:is.finite]{base::is.infinite()}}
-\item \code{\link[base:integer]{base::is.integer()}}
-\item \code{\link[base:list]{base::is.list()}}
-\item \code{\link[base:logical]{base::is.logical()}}
-\item \code{\link[base:NA]{base::is.na()}}
-\item \code{\link[base:is.finite]{base::is.nan()}}
-\item \code{\link[base:numeric]{base::is.numeric()}}
-\item \code{\link[base:ISOdatetime]{base::ISOdate()}}
-\item \code{\link[base:ISOdatetime]{base::ISOdatetime()}}
-\item \code{\link[base:Log]{base::log()}}
-\item \code{\link[base:Log]{base::log10()}}
-\item \code{\link[base:Log]{base::log1p()}}
-\item \code{\link[base:Log]{base::log2()}}
-\item \code{\link[base:Log]{base::logb()}}
-\item \code{\link[base:Extremes]{base::max()}}
-\item \code{\link[base:mean]{base::mean()}}
-\item \code{\link[base:Extremes]{base::min()}}
-\item \code{\link[base:nchar]{base::nchar()}}
-\item \code{\link[base:paste]{base::paste()}}: the \code{collapse} argument is not yet supported
-\item \code{\link[base:paste]{base::paste0()}}: the \code{collapse} argument is not yet supported
-\item \code{\link[base:Extremes]{base::pmax()}}
-\item \code{\link[base:Extremes]{base::pmin()}}
-\item \code{\link[base:Round]{base::round()}}
-\item \code{\link[base:sign]{base::sign()}}
-\item \code{\link[base:Trig]{base::sin()}}
-\item \code{\link[base:MathFun]{base::sqrt()}}
-\item \code{\link[base:startsWith]{base::startsWith()}}
-\item \code{\link[base:strptime]{base::strftime()}}
-\item \code{\link[base:strptime]{base::strptime()}}
-\item \code{\link[base:strrep]{base::strrep()}}
-\item \code{\link[base:strsplit]{base::strsplit()}}
-\item \code{\link[base:grep]{base::sub()}}
-\item \code{\link[base:substr]{base::substr()}}
-\item \code{\link[base:substr]{base::substring()}}
-\item \code{\link[base:sum]{base::sum()}}
-\item \code{\link[base:Trig]{base::tan()}}
-\item \code{\link[base:chartr]{base::tolower()}}
-\item \code{\link[base:chartr]{base::toupper()}}
-\item \code{\link[base:Round]{base::trunc()}}
-\item \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}}
-\item \code{\link[bit64:bit64-package]{bit64::is.integer64()}}
-\item \code{\link[=cast]{cast()}}
-\item \code{\link[=dictionary_encode]{dictionary_encode()}}
-\item \code{\link[dplyr:between]{dplyr::between()}}
-\item \code{\link[dplyr:case_when]{dplyr::case_when()}}
-\item \code{\link[dplyr:coalesce]{dplyr::coalesce()}}
-\item \code{\link[dplyr:if_else]{dplyr::if_else()}}
-\item \code{\link[dplyr:context]{dplyr::n()}}
-\item \code{\link[dplyr:n_distinct]{dplyr::n_distinct()}}
-\item \code{\link[lubridate:am]{lubridate::am()}}
-\item \code{\link[lubridate:as_date]{lubridate::as_date()}}
-\item \code{\link[lubridate:as_date]{lubridate::as_datetime()}}
-\item \code{\link[lubridate:round_date]{lubridate::ceiling_date()}}
-\item \code{\link[lubridate:date]{lubridate::date()}}
-\item \code{\link[lubridate:date_decimal]{lubridate::date_decimal()}}
-\item \code{\link[lubridate:day]{lubridate::day()}}
-\item \code{\link[lubridate:duration]{lubridate::ddays()}}
-\item \code{\link[lubridate:decimal_date]{lubridate::decimal_date()}}
-\item \code{\link[lubridate:duration]{lubridate::dhours()}}
-\item \code{\link[lubridate:duration]{lubridate::dmicroseconds()}}
-\item \code{\link[lubridate:duration]{lubridate::dmilliseconds()}}
-\item \code{\link[lubridate:duration]{lubridate::dminutes()}}
-\item \code{\link[lubridate:duration]{lubridate::dmonths()}}
-\item \code{\link[lubridate:ymd]{lubridate::dmy()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::dmy_h()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::dmy_hm()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::dmy_hms()}}
-\item \code{\link[lubridate:duration]{lubridate::dnanoseconds()}}
-\item \code{\link[lubridate:duration]{lubridate::dpicoseconds()}}
-\item \code{\link[lubridate:duration]{lubridate::dseconds()}}
-\item \code{\link[lubridate:dst]{lubridate::dst()}}
-\item \code{\link[lubridate:duration]{lubridate::dweeks()}}
-\item \code{\link[lubridate:duration]{lubridate::dyears()}}
-\item \code{\link[lubridate:ymd]{lubridate::dym()}}
-\item \code{\link[lubridate:week]{lubridate::epiweek()}}
-\item \code{\link[lubridate:year]{lubridate::epiyear()}}
-\item \code{\link[lubridate:parse_date_time]{lubridate::fast_strptime()}}
-\item \code{\link[lubridate:round_date]{lubridate::floor_date()}}
-\item \code{\link[lubridate:format_ISO8601]{lubridate::format_ISO8601()}}
-\item \code{\link[lubridate:hour]{lubridate::hour()}}
-\item \code{\link[lubridate:date_utils]{lubridate::is.Date()}}
-\item \code{\link[lubridate:is.instant]{lubridate::is.instant()}}
-\item \code{\link[lubridate:posix_utils]{lubridate::is.POSIXct()}}
-\item \code{\link[lubridate:is.instant]{lubridate::is.timepoint()}}
-\item \code{\link[lubridate:week]{lubridate::isoweek()}}
-\item \code{\link[lubridate:year]{lubridate::isoyear()}}
-\item \code{\link[lubridate:leap_year]{lubridate::leap_year()}}
-\item \code{\link[lubridate:make_datetime]{lubridate::make_date()}}
-\item \code{\link[lubridate:make_datetime]{lubridate::make_datetime()}}
-\item \code{\link[lubridate:make_difftime]{lubridate::make_difftime()}}
-\item \code{\link[lubridate:day]{lubridate::mday()}}
-\item \code{\link[lubridate:ymd]{lubridate::mdy()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::mdy_h()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::mdy_hm()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::mdy_hms()}}
-\item \code{\link[lubridate:minute]{lubridate::minute()}}
-\item \code{\link[lubridate:month]{lubridate::month()}}
-\item \code{\link[lubridate:ymd]{lubridate::my()}}
-\item \code{\link[lubridate:ymd]{lubridate::myd()}}
-\item \code{\link[lubridate:parse_date_time]{lubridate::parse_date_time()}}
-\item \code{\link[lubridate:am]{lubridate::pm()}}
-\item \code{\link[lubridate:day]{lubridate::qday()}}
-\item \code{\link[lubridate:quarter]{lubridate::quarter()}}
-\item \code{\link[lubridate:round_date]{lubridate::round_date()}}
-\item \code{\link[lubridate:second]{lubridate::second()}}
-\item \code{\link[lubridate:quarter]{lubridate::semester()}}
-\item \code{\link[lubridate:tz]{lubridate::tz()}}
-\item \code{\link[lubridate:day]{lubridate::wday()}}
-\item \code{\link[lubridate:week]{lubridate::week()}}
-\item \code{\link[lubridate:day]{lubridate::yday()}}
-\item \code{\link[lubridate:ymd]{lubridate::ydm()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::ydm_h()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::ydm_hm()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::ydm_hms()}}
-\item \code{\link[lubridate:year]{lubridate::year()}}
-\item \code{\link[lubridate:ymd]{lubridate::ym()}}
-\item \code{\link[lubridate:ymd]{lubridate::ymd()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::ymd_h()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::ymd_hm()}}
-\item \code{\link[lubridate:ymd_hms]{lubridate::ymd_hms()}}
-\item \code{\link[lubridate:ymd]{lubridate::yq()}}
-\item \code{\link[methods:is]{methods::is()}}
-\item \code{\link[rlang:type-predicates]{rlang::is_character()}}
-\item \code{\link[rlang:type-predicates]{rlang::is_double()}}
-\item \code{\link[rlang:type-predicates]{rlang::is_integer()}}
-\item \code{\link[rlang:type-predicates]{rlang::is_list()}}
-\item \code{\link[rlang:type-predicates]{rlang::is_logical()}}
-\item \code{\link[stats:median]{stats::median()}}
-\item \code{\link[stats:quantile]{stats::quantile()}}
-\item \code{\link[stats:sd]{stats::sd()}}
-\item \code{\link[stats:cor]{stats::var()}}
-\item \code{\link[stringi:stri_reverse]{stringi::stri_reverse()}}
-\item \code{\link[stringr:str_c]{stringr::str_c()}}: the \code{collapse} argument is not yet supported
-\item \code{\link[stringr:str_count]{stringr::str_count()}}
-\item \code{\link[stringr:str_detect]{stringr::str_detect()}}
-\item \code{\link[stringr:str_dup]{stringr::str_dup()}}
-\item \code{\link[stringr:str_starts]{stringr::str_ends()}}
-\item \code{\link[stringr:str_length]{stringr::str_length()}}
-\item \code{\link[stringr:str_like]{stringr::str_like()}}
-\item \code{\link[stringr:str_pad]{stringr::str_pad()}}
-\item \code{\link[stringr:str_replace]{stringr::str_replace()}}
-\item \code{\link[stringr:str_replace]{stringr::str_replace_all()}}
-\item \code{\link[stringr:str_split]{stringr::str_split()}}
-\item \code{\link[stringr:str_starts]{stringr::str_starts()}}
-\item \code{\link[stringr:str_sub]{stringr::str_sub()}}
-\item \code{\link[stringr:case]{stringr::str_to_lower()}}
-\item \code{\link[stringr:case]{stringr::str_to_title()}}
-\item \code{\link[stringr:case]{stringr::str_to_upper()}}
-\item \code{\link[stringr:str_trim]{stringr::str_trim()}}
-\item \code{\link[tibble:tibble]{tibble::tibble()}}
+\item \link[=add_filename]{add_filename}
+\item \link[=cast]{cast}
+}
+}
+
+\subsection{base}{
+\itemize{
+\item \link[=-]{-}
+\item \link[=!]{!}
+\item \link[=!=]{!=}
+\item \link[=*]{*}
+\item \link[=/]{/}
+\item \link[=&]{&}
+\item \link[=\%/\%]{\%/\%}
+\item \link[=\%\%]{\%\%}
+\item \link[=\%in\%]{\%in\%}
+\item \link[=^]{^}
+\item \link[=+]{+}
+\item \link[=<]{<}
+\item \link[=<=]{<=}
+\item \link[===]{==}
+\item \link[=>]{>}
+\item \link[=>=]{>=}
+\item \link[=|]{|}
+\item \link[base:MathFun]{abs}
+\item \link[base:Trig]{acos}
+\item \link[base:all]{all}
+\item \link[base:any]{any}
+\item \link[base:character]{as.character}
+\item \link[base:as.Date]{as.Date}
+\item \link[base:difftime]{as.difftime}
+\item \link[base:double]{as.double}
+\item \link[base:factor]{as.factor}
+\item \link[base:integer]{as.integer}
+\item \link[base:logical]{as.logical}
+\item \link[base:numeric]{as.numeric}
+\item \link[base:Trig]{asin}
+\item \link[base:Round]{ceiling}
+\item \link[base:Trig]{cos}
+\item \link[base:data.frame]{data.frame}
+\item \link[base:difftime]{difftime}
+\item \link[base:startsWith]{endsWith}
+\item \link[base:Log]{exp}
+\item \link[base:Round]{floor}
+\item \link[base:format]{format}
+\item \link[base:grep]{grepl}
+\item \link[base:grep]{gsub}
+\item \link[base:ifelse]{ifelse}
+\item \link[base:character]{is.character}
+\item \link[base:double]{is.double}
+\item \link[base:factor]{is.factor}
+\item \link[base:is.finite]{is.finite}
+\item \link[base:is.finite]{is.infinite}
+\item \link[base:integer]{is.integer}
+\item \link[base:list]{is.list}
+\item \link[base:logical]{is.logical}
+\item \link[base:NA]{is.na}
+\item \link[base:is.finite]{is.nan}
+\item \link[base:numeric]{is.numeric}
+\item \link[base:ISOdatetime]{ISOdate}
+\item \link[base:ISOdatetime]{ISOdatetime}
+\item \link[base:Log]{log}
+\item \link[base:Log]{log10}
+\item \link[base:Log]{log1p}
+\item \link[base:Log]{log2}
+\item \link[base:Log]{logb}
+\item \link[base:Extremes]{max}
+\item \link[base:mean]{mean}
+\item \link[base:Extremes]{min}
+\item \link[base:nchar]{nchar}
+\item \link[base:paste]{paste}: the \code{collapse} argument is not yet supported
+\item \link[base:paste]{paste0}: the \code{collapse} argument is not yet supported
+\item \link[base:Extremes]{pmax}
+\item \link[base:Extremes]{pmin}
+\item \link[base:Round]{round}
+\item \link[base:sign]{sign}
+\item \link[base:Trig]{sin}
+\item \link[base:MathFun]{sqrt}
+\item \link[base:startsWith]{startsWith}
+\item \link[base:strptime]{strftime}
+\item \link[base:strptime]{strptime}
+\item \link[base:strrep]{strrep}
+\item \link[base:strsplit]{strsplit}
+\item \link[base:grep]{sub}
+\item \link[base:substr]{substr}
+\item \link[base:substr]{substring}
+\item \link[base:sum]{sum}
+\item \link[base:Trig]{tan}
+\item \link[base:chartr]{tolower}
+\item \link[base:chartr]{toupper}
+\item \link[base:Round]{trunc}
+}
+}
+
+\subsection{bit64}{
+\itemize{
+\item \link[bit64:as.integer64.character]{as.integer64}
+\item \link[bit64:bit64-package]{is.integer64}
+}
+}
+
+\subsection{dplyr}{
+\itemize{
+\item \link[dplyr:between]{between}
+\item \link[dplyr:case_when]{case_when}
+\item \link[dplyr:coalesce]{coalesce}
+\item \link[dplyr:if_else]{if_else}
+\item \link[dplyr:context]{n}
+\item \link[dplyr:n_distinct]{n_distinct}
+}
+}
+
+\subsection{lubridate}{
+\itemize{
+\item \link[lubridate:am]{am}
+\item \link[lubridate:as_date]{as_date}
+\item \link[lubridate:as_date]{as_datetime}
+\item \link[lubridate:round_date]{ceiling_date}
+\item \link[lubridate:date]{date}
+\item \link[lubridate:date_decimal]{date_decimal}
+\item \link[lubridate:day]{day}
+\item \link[lubridate:duration]{ddays}
+\item \link[lubridate:decimal_date]{decimal_date}
+\item \link[lubridate:duration]{dhours}
+\item \link[lubridate:duration]{dmicroseconds}
+\item \link[lubridate:duration]{dmilliseconds}
+\item \link[lubridate:duration]{dminutes}
+\item \link[lubridate:duration]{dmonths}
+\item \link[lubridate:ymd]{dmy}
+\item \link[lubridate:ymd_hms]{dmy_h}
+\item \link[lubridate:ymd_hms]{dmy_hm}
+\item \link[lubridate:ymd_hms]{dmy_hms}
+\item \link[lubridate:duration]{dnanoseconds}
+\item \link[lubridate:duration]{dpicoseconds}
+\item \link[lubridate:duration]{dseconds}
+\item \link[lubridate:dst]{dst}
+\item \link[lubridate:duration]{dweeks}
+\item \link[lubridate:duration]{dyears}
+\item \link[lubridate:ymd]{dym}
+\item \link[lubridate:week]{epiweek}
+\item \link[lubridate:year]{epiyear}
+\item \link[lubridate:parse_date_time]{fast_strptime}
+\item \link[lubridate:round_date]{floor_date}
+\item \link[lubridate:format_ISO8601]{format_ISO8601}
+\item \link[lubridate:hour]{hour}
+\item \link[lubridate:date_utils]{is.Date}
+\item \link[lubridate:is.instant]{is.instant}
+\item \link[lubridate:posix_utils]{is.POSIXct}
+\item \link[lubridate:is.instant]{is.timepoint}
+\item \link[lubridate:week]{isoweek}
+\item \link[lubridate:year]{isoyear}
+\item \link[lubridate:leap_year]{leap_year}
+\item \link[lubridate:make_datetime]{make_date}
+\item \link[lubridate:make_datetime]{make_datetime}
+\item \link[lubridate:make_difftime]{make_difftime}
+\item \link[lubridate:day]{mday}
+\item \link[lubridate:ymd]{mdy}
+\item \link[lubridate:ymd_hms]{mdy_h}
+\item \link[lubridate:ymd_hms]{mdy_hm}
+\item \link[lubridate:ymd_hms]{mdy_hms}
+\item \link[lubridate:minute]{minute}
+\item \link[lubridate:month]{month}
+\item \link[lubridate:ymd]{my}
+\item \link[lubridate:ymd]{myd}
+\item \link[lubridate:parse_date_time]{parse_date_time}
+\item \link[lubridate:am]{pm}
+\item \link[lubridate:day]{qday}
+\item \link[lubridate:quarter]{quarter}
+\item \link[lubridate:round_date]{round_date}
+\item \link[lubridate:second]{second}
+\item \link[lubridate:quarter]{semester}
+\item \link[lubridate:tz]{tz}
+\item \link[lubridate:day]{wday}
+\item \link[lubridate:week]{week}
+\item \link[lubridate:day]{yday}
+\item \link[lubridate:ymd]{ydm}
+\item \link[lubridate:ymd_hms]{ydm_h}
+\item \link[lubridate:ymd_hms]{ydm_hm}
+\item \link[lubridate:ymd_hms]{ydm_hms}
+\item \link[lubridate:year]{year}
+\item \link[lubridate:ymd]{ym}
+\item \link[lubridate:ymd]{ymd}
+\item \link[lubridate:ymd_hms]{ymd_h}
+\item \link[lubridate:ymd_hms]{ymd_hm}
+\item \link[lubridate:ymd_hms]{ymd_hms}
+\item \link[lubridate:ymd]{yq}
+}
+}
+
+\subsection{methods}{
+\itemize{
+\item \link[methods:is]{is}
+}
+}
+
+\subsection{rlang}{
+\itemize{
+\item \link[rlang:type-predicates]{is_character}
+\item \link[rlang:type-predicates]{is_double}
+\item \link[rlang:type-predicates]{is_integer}
+\item \link[rlang:type-predicates]{is_list}
+\item \link[rlang:type-predicates]{is_logical}
+}
+}
+
+\subsection{stats}{
+\itemize{
+\item \link[stats:median]{median}
+\item \link[stats:quantile]{quantile}
+\item \link[stats:sd]{sd}
+\item \link[stats:cor]{var}
+}
+}
+
+\subsection{stringi}{
+\itemize{
+\item \link[stringi:stri_reverse]{stri_reverse}
+}
+}
+
+\subsection{stringr}{
+\itemize{
+\item \link[stringr:str_c]{str_c}: the \code{collapse} argument is not yet supported
+\item \link[stringr:str_count]{str_count}
+\item \link[stringr:str_detect]{str_detect}
+\item \link[stringr:str_dup]{str_dup}
+\item \link[stringr:str_starts]{str_ends}
+\item \link[stringr:str_length]{str_length}
+\item \link[stringr:str_like]{str_like}
+\item \link[stringr:str_pad]{str_pad}
+\item \link[stringr:str_replace]{str_replace}
+\item \link[stringr:str_replace]{str_replace_all}
+\item \link[stringr:str_split]{str_split}
+\item \link[stringr:str_starts]{str_starts}
+\item \link[stringr:str_sub]{str_sub}
+\item \link[stringr:case]{str_to_lower}
+\item \link[stringr:case]{str_to_title}
+\item \link[stringr:case]{str_to_upper}
+\item \link[stringr:str_trim]{str_trim}
+}
+}
+
+\subsection{tibble}{
+\itemize{
+\item \link[tibble:tibble]{tibble}
+}
 }
 }

From 20233904cf1d54af56a93d5e7b88382d634dbe51 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 1 Sep 2022 09:58:22 -0400
Subject: [PATCH 3/7] Add to pkgdown

---
 r/_pkgdown.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index dfb0998ddff..6c23305a8f3 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -216,6 +216,7 @@ reference:
       - codec_is_available
   - title: Computation
     contents:
+      - arrow-dplyr-functions
       - call_function
       - match_arrow
       - value_counts

From 42fb685ab716ad2e9de20851481f970bad2d0f89 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 9 Sep 2022 10:09:21 -0400
Subject: [PATCH 4/7] Add dplyr verbs and tidyselect to the page; add docs for
 cast and add_filename

---
 r/R/arrow-package.R            |  51 ++++++++++++-----
 r/R/dplyr-funcs-augmented.R    |  16 ++++++
 r/R/dplyr-funcs-doc.R          |  65 +++++++++++++++++++--
 r/R/dplyr-funcs-string.R       |  20 ++++---
 r/R/dplyr-funcs-type.R         |  26 +++++++++
 r/data-raw/docgen.R            | 100 ++++++++++++++++++++++++++-------
 r/man/add_filename.Rd          |  20 +++++++
 r/man/arrow-dplyr-functions.Rd |  69 +++++++++++++++++++++--
 r/man/cast.Rd                  |  36 ++++++++++++
 9 files changed, 351 insertions(+), 52 deletions(-)
 create mode 100644 r/man/add_filename.Rd
 create mode 100644 r/man/cast.Rd

diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 53fb0280a50..46a517c1e22 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -31,25 +31,50 @@
 #' @keywords internal
 "_PACKAGE"
 
+# Include notes about features not supported here.
+supported_dplyr_methods <- list(
+  select = NULL,
+  filter = NULL,
+  collect = NULL,
+  summarise = NULL,
+  group_by = NULL,
+  groups = NULL,
+  group_vars = NULL,
+  group_by_drop_default = NULL,
+  ungroup = NULL,
+  mutate = NULL,
+  transmute = NULL,
+  arrange = NULL,
+  rename = NULL,
+  pull = NULL,
+  relocate = NULL,
+  compute = NULL,
+  collapse = NULL,
+  distinct = NULL,
+  left_join = NULL,
+  right_join = NULL,
+  inner_join = NULL,
+  full_join = NULL,
+  semi_join = NULL,
+  anti_join = NULL,
+  count = NULL,
+  tally = NULL,
+  rename_with = NULL,
+  union = NULL,
+  union_all = NULL,
+  glimpse = NULL,
+  show_query = NULL,
+  explain = NULL
+)
+
 #' @importFrom vctrs s3_register vec_size vec_cast vec_unique
 .onLoad <- function(...) {
   # Make sure C++ knows on which thread it is safe to call the R API
   InitializeMainRThread()
 
-  dplyr_methods <- paste0(
-    "dplyr::",
-    c(
-      "select", "filter", "collect", "summarise", "group_by", "groups",
-      "group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute",
-      "arrange", "rename", "pull", "relocate", "compute", "collapse",
-      "distinct", "left_join", "right_join", "inner_join", "full_join",
-      "semi_join", "anti_join", "count", "tally", "rename_with", "union",
-      "union_all", "glimpse", "show_query", "explain"
-    )
-  )
   for (cl in c("Dataset", "ArrowTabular", "RecordBatchReader", "arrow_dplyr_query")) {
-    for (m in dplyr_methods) {
-      s3_register(m, cl)
+    for (m in names(supported_dplyr_methods)) {
+      s3_register(paste0("dplyr::", m), cl)
     }
   }
   s3_register("dplyr::tbl_vars", "arrow_dplyr_query")
diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R
index 828e3df12be..efb62139368 100644
--- a/r/R/dplyr-funcs-augmented.R
+++ b/r/R/dplyr-funcs-augmented.R
@@ -15,6 +15,22 @@
 # specific language governing permissions and limitations
 # under the License.
 
+#' Add the data filename as a column
+#'
+#' This function only exists inside `arrow` `dplyr` queries, and it only is
+#' valid when quering on a `FileSystemDataset`.
+#'
+#' @return A `FieldRef` `Expression` that refers to the filename augmented
+#' column.
+#' @examples
+#' \dontrun{
+#' open_dataset("nyc-taxi") %>%
+#'   mutate(file = add_filename())
+#' }
+#' @keywords internal
+#' @name add_filename
+NULL
+
 register_bindings_augmented <- function() {
   register_binding("arrow::add_filename", function() {
     Expression$field_ref("__filename")
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 078cf4a9fa4..d0a6d3dfcc6 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -19,13 +19,58 @@
 
 #' Functions available in Arrow dplyr queries
 #'
-#' The `arrow` package contains mappings of 195 R functions to the corresponding
-#' functions in the Arrow compute library. This allows you to write code inside
+#' The `arrow` package contains methods for 32 `dplyr` table functions, many of
+#' which are "verbs" that do transformations to one or more tables.
+#' The package also has mappings of 204 R functions to the corresponding
+#' functions in the Arrow compute library. These allow you to write code inside
 #' of `dplyr` methods that call R functions, including many in packages like
 #' `stringr` and `lubridate`, and they will get translated to Arrow and run
 #' on the Arrow query engine (Acero). This document lists all of the mapped
 #' functions.
 #'
+#' # `dplyr` verbs
+#'
+#' Most verb functions return an `arrow_dplyr_query` object, similar in spirit
+#' to a `dbplyr::tbl_lazy`. This means that the verbs do not eagerly evaluate
+#' the query on the data. To run the query, call either `compute()`,
+#' which returns an `arrow` [Table], or `collect()`, which pulls the resulting
+#' Table into an R `data.frame`.
+#'
+#' * [anti_join][dplyr::anti_join()]
+#' * [arrange][dplyr::arrange()]
+#' * [collapse][dplyr::collapse()]
+#' * [collect][dplyr::collect()]
+#' * [compute][dplyr::compute()]
+#' * [count][dplyr::count()]
+#' * [distinct][dplyr::distinct()]
+#' * [explain][dplyr::explain()]
+#' * [filter][dplyr::filter()]
+#' * [full_join][dplyr::full_join()]
+#' * [glimpse][dplyr::glimpse()]
+#' * [group_by][dplyr::group_by()]
+#' * [group_by_drop_default][dplyr::group_by_drop_default()]
+#' * [group_vars][dplyr::group_vars()]
+#' * [groups][dplyr::groups()]
+#' * [inner_join][dplyr::inner_join()]
+#' * [left_join][dplyr::left_join()]
+#' * [mutate][dplyr::mutate()]
+#' * [pull][dplyr::pull()]
+#' * [relocate][dplyr::relocate()]
+#' * [rename][dplyr::rename()]
+#' * [rename_with][dplyr::rename_with()]
+#' * [right_join][dplyr::right_join()]
+#' * [select][dplyr::select()]
+#' * [semi_join][dplyr::semi_join()]
+#' * [show_query][dplyr::show_query()]
+#' * [summarise][dplyr::summarise()]
+#' * [tally][dplyr::tally()]
+#' * [transmute][dplyr::transmute()]
+#' * [ungroup][dplyr::ungroup()]
+#' * [union][dplyr::union()]
+#' * [union_all][dplyr::union_all()]
+#'
+#' # Function mappings
+#'
 #' In the list below, any differences in behavior or support between Acero and
 #' the R function are listed. If no notes follow the function name, then you
 #' can assume that the function works in Acero just as it does in R.
@@ -74,7 +119,6 @@
 #' * [as.Date][base::as.Date()]
 #' * [as.difftime][base::as.difftime()]
 #' * [as.double][base::as.double()]
-#' * [as.factor][base::as.factor()]
 #' * [as.integer][base::as.integer()]
 #' * [as.logical][base::as.logical()]
 #' * [as.numeric][base::as.numeric()]
@@ -141,6 +185,7 @@
 #'
 #' ## dplyr
 #' 
+#' * [across][dplyr::across()]: only supported inside `mutate()`; purrr-style lambda functions not yet supported
 #' * [between][dplyr::between()]
 #' * [case_when][dplyr::case_when()]
 #' * [coalesce][dplyr::coalesce()]
@@ -254,7 +299,7 @@
 #' * [str_dup][stringr::str_dup()]
 #' * [str_ends][stringr::str_ends()]
 #' * [str_length][stringr::str_length()]
-#' * [str_like][stringr::str_like()]
+#' * `str_like`: not yet in a released version of `stringr`, but it is supported in `arrow`
 #' * [str_pad][stringr::str_pad()]
 #' * [str_replace][stringr::str_replace()]
 #' * [str_replace_all][stringr::str_replace_all()]
@@ -270,5 +315,17 @@
 #' 
 #' * [tibble][tibble::tibble()]
 #'
+#' ## tidyselect
+#' 
+#' * [all_of][tidyselect::all_of()]
+#' * [contains][tidyselect::contains()]
+#' * [ends_with][tidyselect::ends_with()]
+#' * [everything][tidyselect::everything()]
+#' * [last_col][tidyselect::last_col()]
+#' * [matches][tidyselect::matches()]
+#' * [num_range][tidyselect::num_range()]
+#' * [one_of][tidyselect::one_of()]
+#' * [starts_with][tidyselect::starts_with()]
+#'
 #' @name arrow-dplyr-functions
 NULL
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 71dcce94cc7..eb2326ed056 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -239,15 +239,17 @@ register_bindings_string_regex <- function() {
     out
   })
 
-  register_binding("stringr::str_like", function(string,
-                                                 pattern,
-                                                 ignore_case = TRUE) {
-    Expression$create(
-      "match_like",
-      string,
-      options = list(pattern = pattern, ignore_case = ignore_case)
-    )
-  })
+  register_binding(
+    "stringr::str_like",
+    function(string, pattern, ignore_case = TRUE) {
+      Expression$create(
+        "match_like",
+        string,
+        options = list(pattern = pattern, ignore_case = ignore_case)
+      )
+    },
+    notes = "not yet in a released version of `stringr`, but it is supported in `arrow`"
+  )
 
   register_binding("stringr::str_count", function(string, pattern) {
     opts <- get_stringr_pattern_options(enexpr(pattern))
diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
index 2c5112d7f73..3d03476a881 100644
--- a/r/R/dplyr-funcs-type.R
+++ b/r/R/dplyr-funcs-type.R
@@ -23,6 +23,32 @@ register_bindings_type <- function() {
   register_bindings_type_format()
 }
 
+#' Change the type of an array or column
+#'
+#' The `cast()` function only exists inside of `arrow` `dplyr` queries. Use it
+#' as a more convenient way of changing the type of a value or field inside of
+#' a `mutate()` call. To cast an `Array` or `ChunkedArray` outside of a query,
+#' call the `$cast()` method on the object, which has the same semantics.
+#'
+#' @param x an `Expression`
+#' @param target_type [DataType] to cast to
+#' @param safe logical: only allow the type conversion if no data is lost
+#' (truncation, overflow, etc.). Default is `TRUE`
+#' @param ... specific `CastOptions` to set
+#' @return an `Expression`
+#'
+#' @examples
+#' \dontrun{
+#' mtcars %>%
+#'   arrow_table() %>%
+#'   mutate(cyl = cast(cyl, string()))
+#' }
+#' @keywords internal
+#' @name cast
+#' @seealso https://arrow.apache.org/docs/cpp/api/compute.html for the list of
+#' supported CastOptions.
+NULL
+
 register_bindings_type_cast <- function() {
   register_binding("arrow::cast", function(x, target_type, safe = TRUE, ...) {
     opts <- cast_options(safe, ...)
diff --git a/r/data-raw/docgen.R b/r/data-raw/docgen.R
index f78c1d47db2..5d767f27041 100644
--- a/r/data-raw/docgen.R
+++ b/r/data-raw/docgen.R
@@ -39,13 +39,27 @@ file_template <- "# Licensed to the Apache Software Foundation (ASF) under one
 
 #' Functions available in Arrow dplyr queries
 #'
-#' The `arrow` package contains mappings of %s R functions to the corresponding
-#' functions in the Arrow compute library. This allows you to write code inside
+#' The `arrow` package contains methods for %s `dplyr` table functions, many of
+#' which are \"verbs\" that do transformations to one or more tables.
+#' The package also has mappings of %s R functions to the corresponding
+#' functions in the Arrow compute library. These allow you to write code inside
 #' of `dplyr` methods that call R functions, including many in packages like
 #' `stringr` and `lubridate`, and they will get translated to Arrow and run
 #' on the Arrow query engine (Acero). This document lists all of the mapped
 #' functions.
 #'
+#' # `dplyr` verbs
+#'
+#' Most verb functions return an `arrow_dplyr_query` object, similar in spirit
+#' to a `dbplyr::tbl_lazy`. This means that the verbs do not eagerly evaluate
+#' the query on the data. To run the query, call either `compute()`,
+#' which returns an `arrow` [Table], or `collect()`, which pulls the resulting
+#' Table into an R `data.frame`.
+#'
+%s
+#'
+#' # Function mappings
+#'
 #' In the list below, any differences in behavior or support between Acero and
 #' the R function are listed. If no notes follow the function name, then you
 #' can assume that the function works in Acero just as it does in R.
@@ -70,26 +84,18 @@ NULL"
 library(dplyr)
 library(purrr)
 
-docs <- arrow:::.cache$docs
-
-docs_df <- tibble::tibble(
-  pkg_fun = names(docs),
-  notes = docs
-) %>%
-  mutate(
-    has_pkg = grepl("::", pkg_fun),
-    fun = sub("^.*?:{+}", "", pkg_fun),
-    pkg = sub(":{+}.*$", "", pkg_fun),
-    # We will list operators under "base" (everything else must be pkg::fun)
-    pkg = if_else(has_pkg, pkg, "base"),
-    # Flatten notes to a single string
-    notes = map_chr(notes, ~ paste(., collapse = " "))
-  ) %>%
-  arrange(pkg, fun)
+# Functions that for whatever reason cause xref problems, so don't hyperlink
+do_not_link <- c(
+  "stringr::str_like" # Still only in the unreleased version
+)
 
 # Vectorized function to make entries for each function
 render_fun <- function(fun, pkg_fun, notes) {
-  out <- paste0("* [", fun, "][", pkg_fun, "()]")
+  out <- ifelse(
+    pkg_fun %in% do_not_link,
+    paste0("* `", fun, "`"),
+    paste0("* [", fun, "][", pkg_fun, "()]")
+  )
   has_notes <- nzchar(notes)
   out[has_notes] <- paste0(out[has_notes], ": ", notes[has_notes])
   out
@@ -109,15 +115,67 @@ render_pkg <- function(df, pkg) {
   paste("#'", bullets, collapse = "\n")
 }
 
+docs <- arrow:::.cache$docs
+
+# Add some functions
+
+# across() is handled by manipulating the quosures, not by nse_funcs
+docs[["dplyr::across"]] <- c(
+  "only supported inside `mutate()`;", # TODO(ARROW-17362, ARROW-17387)
+  "purrr-style lambda functions not yet supported" # TODO(ARROW-17366)
+)
+
+# add tidyselect helpers by parsing the reexports file
+tidyselect <- grep("^tidyselect::", readLines("R/reexports-tidyselect.R"), value = TRUE)
+
+docs <- c(docs, setNames(rep(list(NULL), length(tidyselect)), tidyselect))
+
+# TODO: add doc pages for add_filename() and cast()
+
+fun_df <- tibble::tibble(
+  pkg_fun = names(docs),
+  notes = docs
+) %>%
+  mutate(
+    has_pkg = grepl("::", pkg_fun),
+    fun = sub("^.*?:{+}", "", pkg_fun),
+    pkg = sub(":{+}.*$", "", pkg_fun),
+    # We will list operators under "base" (everything else must be pkg::fun)
+    pkg = if_else(has_pkg, pkg, "base"),
+    # Flatten notes to a single string
+    notes = map_chr(notes, ~ paste(., collapse = " "))
+  ) %>%
+  arrange(pkg, fun)
+
 # Group by package name and render the lists
-doclets <- imap_chr(split(docs_df, docs_df$pkg), render_pkg)
+fun_doclets <- imap_chr(split(fun_df, fun_df$pkg), render_pkg)
+
+dplyr_verbs <- c(
+  arrow:::supported_dplyr_methods,
+  # Because this only has a method for arrow_dplyr_query, it's not in the main list
+  tbl_vars = NULL
+)
+
+verb_bullets <- tibble::tibble(
+  fun = names(dplyr_verbs),
+  notes = dplyr_verbs
+) %>%
+  mutate(
+    pkg_fun = paste0("dplyr::", fun),
+    notes = map_chr(notes, ~ paste(., collapse = " "))
+  ) %>%
+  arrange(fun) %>%
+  transmute(render_fun(fun, pkg_fun, notes)) %>%
+  pull()
 
 writeLines(
   sprintf(
     file_template,
+    length(dplyr_verbs),
     length(docs),
+    paste("#'", verb_bullets, collapse = "\n"),
     length(arrow::list_compute_functions()),
-    paste(doclets, collapse = "\n#'\n")
+    paste(fun_doclets, collapse = "\n#'\n")
   ),
   "R/dplyr-funcs-doc.R"
 )
diff --git a/r/man/add_filename.Rd b/r/man/add_filename.Rd
new file mode 100644
index 00000000000..56731281ab9
--- /dev/null
+++ b/r/man/add_filename.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-funcs-augmented.R
+\name{add_filename}
+\alias{add_filename}
+\title{Add the data filename as a column}
+\value{
+A \code{FieldRef} \code{Expression} that refers to the filename augmented
+column.
+}
+\description{
+This function only exists inside \code{arrow} \code{dplyr} queries, and it only is
+valid when quering on a \code{FileSystemDataset}.
+}
+\examples{
+\dontrun{
+open_dataset("nyc-taxi") \%>\%
+  mutate(file = add_filename())
+}
+}
+\keyword{internal}
diff --git a/r/man/arrow-dplyr-functions.Rd b/r/man/arrow-dplyr-functions.Rd
index 9fcfeb23b07..438d8b64025 100644
--- a/r/man/arrow-dplyr-functions.Rd
+++ b/r/man/arrow-dplyr-functions.Rd
@@ -4,14 +4,58 @@
 \alias{arrow-dplyr-functions}
 \title{Functions available in Arrow dplyr queries}
 \description{
-The \code{arrow} package contains mappings of 195 R functions to the corresponding
-functions in the Arrow compute library. This allows you to write code inside
+The \code{arrow} package contains methods for 32 \code{dplyr} table functions, many of
+which are "verbs" that do transformations to one or more tables.
+The package also has mappings of 204 R functions to the corresponding
+functions in the Arrow compute library. These allow you to write code inside
 of \code{dplyr} methods that call R functions, including many in packages like
 \code{stringr} and \code{lubridate}, and they will get translated to Arrow and run
 on the Arrow query engine (Acero). This document lists all of the mapped
 functions.
 }
-\details{
+\section{\code{dplyr} verbs}{
+Most verb functions return an \code{arrow_dplyr_query} object, similar in spirit
+to a \code{dbplyr::tbl_lazy}. This means that the verbs do not eagerly evaluate
+the query on the data. To run the query, call either \code{compute()},
+which returns an \code{arrow} \link{Table}, or \code{collect()}, which pulls the resulting
+Table into an R \code{data.frame}.
+\itemize{
+\item \link[dplyr:filter-joins]{anti_join}
+\item \link[dplyr:arrange]{arrange}
+\item \link[dplyr:compute]{collapse}
+\item \link[dplyr:compute]{collect}
+\item \link[dplyr:compute]{compute}
+\item \link[dplyr:count]{count}
+\item \link[dplyr:distinct]{distinct}
+\item \link[dplyr:explain]{explain}
+\item \link[dplyr:filter]{filter}
+\item \link[dplyr:mutate-joins]{full_join}
+\item \link[dplyr:glimpse]{glimpse}
+\item \link[dplyr:group_by]{group_by}
+\item \link[dplyr:group_by_drop_default]{group_by_drop_default}
+\item \link[dplyr:group_data]{group_vars}
+\item \link[dplyr:group_data]{groups}
+\item \link[dplyr:mutate-joins]{inner_join}
+\item \link[dplyr:mutate-joins]{left_join}
+\item \link[dplyr:mutate]{mutate}
+\item \link[dplyr:pull]{pull}
+\item \link[dplyr:relocate]{relocate}
+\item \link[dplyr:rename]{rename}
+\item \link[dplyr:rename]{rename_with}
+\item \link[dplyr:mutate-joins]{right_join}
+\item \link[dplyr:select]{select}
+\item \link[dplyr:filter-joins]{semi_join}
+\item \link[dplyr:explain]{show_query}
+\item \link[dplyr:summarise]{summarise}
+\item \link[dplyr:count]{tally}
+\item \link[dplyr:mutate]{transmute}
+\item \link[dplyr:group_by]{ungroup}
+\item \link[dplyr:reexports]{union}
+\item \link[dplyr:setops]{union_all}
+}
+}
+
+\section{Function mappings}{
 In the list below, any differences in behavior or support between Acero and
 the R function are listed. If no notes follow the function name, then you
 can assume that the function works in Acero just as it does in R.
@@ -61,7 +105,6 @@ as \code{arrow_ascii_is_decimal}.
 \item \link[base:as.Date]{as.Date}
 \item \link[base:difftime]{as.difftime}
 \item \link[base:double]{as.double}
-\item \link[base:factor]{as.factor}
 \item \link[base:integer]{as.integer}
 \item \link[base:logical]{as.logical}
 \item \link[base:numeric]{as.numeric}
@@ -132,6 +175,7 @@ as \code{arrow_ascii_is_decimal}.
 
 \subsection{dplyr}{
 \itemize{
+\item \link[dplyr:across]{across}: only supported inside \code{mutate()}; purrr-style lambda functions not yet supported
 \item \link[dplyr:between]{between}
 \item \link[dplyr:case_when]{case_when}
 \item \link[dplyr:coalesce]{coalesce}
@@ -257,7 +301,7 @@ as \code{arrow_ascii_is_decimal}.
 \item \link[stringr:str_dup]{str_dup}
 \item \link[stringr:str_starts]{str_ends}
 \item \link[stringr:str_length]{str_length}
-\item \link[stringr:str_like]{str_like}
+\item \code{str_like}: not yet in a released version of \code{stringr}, but it is supported in \code{arrow}
 \item \link[stringr:str_pad]{str_pad}
 \item \link[stringr:str_replace]{str_replace}
 \item \link[stringr:str_replace]{str_replace_all}
@@ -276,4 +320,19 @@ as \code{arrow_ascii_is_decimal}.
 \item \link[tibble:tibble]{tibble}
 }
 }
+
+\subsection{tidyselect}{
+\itemize{
+\item \link[tidyselect:all_of]{all_of}
+\item \link[tidyselect:starts_with]{contains}
+\item \link[tidyselect:starts_with]{ends_with}
+\item \link[tidyselect:everything]{everything}
+\item \link[tidyselect:everything]{last_col}
+\item \link[tidyselect:starts_with]{matches}
+\item \link[tidyselect:starts_with]{num_range}
+\item \link[tidyselect:one_of]{one_of}
+\item \link[tidyselect:starts_with]{starts_with}
+}
 }
+}
+
diff --git a/r/man/cast.Rd b/r/man/cast.Rd
new file mode 100644
index 00000000000..90f0230d210
--- /dev/null
+++ b/r/man/cast.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-funcs-type.R
+\name{cast}
+\alias{cast}
+\title{Change the type of an array or column}
+\arguments{
+\item{x}{an \code{Expression}}
+
+\item{target_type}{\link{DataType} to cast to}
+
+\item{safe}{logical: only allow the type conversion if no data is lost
+(truncation, overflow, etc.). Default is \code{TRUE}}
+
+\item{...}{specific \code{CastOptions} to set}
+}
+\value{
+an \code{Expression}
+}
+\description{
+The \code{cast()} function only exists inside of \code{arrow} \code{dplyr} queries. Use it
+as a more convenient way of changing the type of a value or field inside of
+a \code{mutate()} call. To cast an \code{Array} or \code{ChunkedArray} outside of a query,
+call the \verb{$cast()} method on the object, which has the same semantics.
+}
+\examples{
+\dontrun{
+mtcars \%>\%
+  arrow_table() \%>\%
+  mutate(cyl = cast(cyl, string()))
+}
+}
+\seealso{
+https://arrow.apache.org/docs/cpp/api/compute.html for the list of
+supported CastOptions.
+}
+\keyword{internal}

From c48ff881924df02da7b8595de9573f6e1b0d84e8 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 9 Sep 2022 14:15:50 -0400
Subject: [PATCH 5/7] Add todo jira and fill in usage for new docs

---
 r/R/arrow-package.R         | 2 +-
 r/R/dplyr-funcs-augmented.R | 2 ++
 r/R/dplyr-funcs-type.R      | 1 +
 r/man/add_filename.Rd       | 3 +++
 r/man/cast.Rd               | 3 +++
 5 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 46a517c1e22..e6b3f481e21 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -31,7 +31,7 @@
 #' @keywords internal
 "_PACKAGE"
 
-# Include notes about features not supported here.
+# TODO(ARROW-17666): Include notes about features not supported here.
 supported_dplyr_methods <- list(
   select = NULL,
   filter = NULL,
diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R
index efb62139368..d1359005e37 100644
--- a/r/R/dplyr-funcs-augmented.R
+++ b/r/R/dplyr-funcs-augmented.R
@@ -20,6 +20,8 @@
 #' This function only exists inside `arrow` `dplyr` queries, and it only is
 #' valid when quering on a `FileSystemDataset`.
 #'
+#' @usage add_filename()
+#'
 #' @return A `FieldRef` `Expression` that refers to the filename augmented
 #' column.
 #' @examples
diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
index 3d03476a881..35734ced05d 100644
--- a/r/R/dplyr-funcs-type.R
+++ b/r/R/dplyr-funcs-type.R
@@ -30,6 +30,7 @@ register_bindings_type <- function() {
 #' a `mutate()` call. To cast an `Array` or `ChunkedArray` outside of a query,
 #' call the `$cast()` method on the object, which has the same semantics.
 #'
+#' @usage cast(x, target_type, safe = TRUE, ...)
 #' @param x an `Expression`
 #' @param target_type [DataType] to cast to
 #' @param safe logical: only allow the type conversion if no data is lost
diff --git a/r/man/add_filename.Rd b/r/man/add_filename.Rd
index 56731281ab9..ca7ed0e4b17 100644
--- a/r/man/add_filename.Rd
+++ b/r/man/add_filename.Rd
@@ -3,6 +3,9 @@
 \name{add_filename}
 \alias{add_filename}
 \title{Add the data filename as a column}
+\usage{
+add_filename()
+}
 \value{
 A \code{FieldRef} \code{Expression} that refers to the filename augmented
 column.
diff --git a/r/man/cast.Rd b/r/man/cast.Rd
index 90f0230d210..81abfa6567d 100644
--- a/r/man/cast.Rd
+++ b/r/man/cast.Rd
@@ -3,6 +3,9 @@
 \name{cast}
 \alias{cast}
 \title{Change the type of an array or column}
+\usage{
+cast(x, target_type, safe = TRUE, ...)
+}
 \arguments{
 \item{x}{an \code{Expression}}
 

From 2ced7b6c3e541bd6b3da34c8e7a58093f170425a Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Sep 2022 12:47:17 -0400
Subject: [PATCH 6/7] Rename to acero.Rd and update stuff

---
 r/R/dplyr-funcs-doc.R          | 477 +++++++++++++++++----------------
 r/_pkgdown.yml                 |   2 +-
 r/data-raw/docgen.R            |  27 +-
 r/man/acero.Rd                 | 339 +++++++++++++++++++++++
 r/man/arrow-dplyr-functions.Rd | 338 -----------------------
 5 files changed, 598 insertions(+), 585 deletions(-)
 create mode 100644 r/man/acero.Rd
 delete mode 100644 r/man/arrow-dplyr-functions.Rd

diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index d0a6d3dfcc6..cac0310f49b 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -21,7 +21,7 @@
 #'
 #' The `arrow` package contains methods for 32 `dplyr` table functions, many of
 #' which are "verbs" that do transformations to one or more tables.
-#' The package also has mappings of 204 R functions to the corresponding
+#' The package also has mappings of 205 R functions to the corresponding
 #' functions in the Arrow compute library. These allow you to write code inside
 #' of `dplyr` methods that call R functions, including many in packages like
 #' `stringr` and `lubridate`, and they will get translated to Arrow and run
@@ -36,38 +36,38 @@
 #' which returns an `arrow` [Table], or `collect()`, which pulls the resulting
 #' Table into an R `data.frame`.
 #'
-#' * [anti_join][dplyr::anti_join()]
-#' * [arrange][dplyr::arrange()]
-#' * [collapse][dplyr::collapse()]
-#' * [collect][dplyr::collect()]
-#' * [compute][dplyr::compute()]
-#' * [count][dplyr::count()]
-#' * [distinct][dplyr::distinct()]
-#' * [explain][dplyr::explain()]
-#' * [filter][dplyr::filter()]
-#' * [full_join][dplyr::full_join()]
-#' * [glimpse][dplyr::glimpse()]
-#' * [group_by][dplyr::group_by()]
-#' * [group_by_drop_default][dplyr::group_by_drop_default()]
-#' * [group_vars][dplyr::group_vars()]
-#' * [groups][dplyr::groups()]
-#' * [inner_join][dplyr::inner_join()]
-#' * [left_join][dplyr::left_join()]
-#' * [mutate][dplyr::mutate()]
-#' * [pull][dplyr::pull()]
-#' * [relocate][dplyr::relocate()]
-#' * [rename][dplyr::rename()]
-#' * [rename_with][dplyr::rename_with()]
-#' * [right_join][dplyr::right_join()]
-#' * [select][dplyr::select()]
-#' * [semi_join][dplyr::semi_join()]
-#' * [show_query][dplyr::show_query()]
-#' * [summarise][dplyr::summarise()]
-#' * [tally][dplyr::tally()]
-#' * [transmute][dplyr::transmute()]
-#' * [ungroup][dplyr::ungroup()]
-#' * [union][dplyr::union()]
-#' * [union_all][dplyr::union_all()]
+#' * [`anti_join()`][dplyr::anti_join()]
+#' * [`arrange()`][dplyr::arrange()]
+#' * [`collapse()`][dplyr::collapse()]
+#' * [`collect()`][dplyr::collect()]
+#' * [`compute()`][dplyr::compute()]
+#' * [`count()`][dplyr::count()]
+#' * [`distinct()`][dplyr::distinct()]
+#' * [`explain()`][dplyr::explain()]
+#' * [`filter()`][dplyr::filter()]
+#' * [`full_join()`][dplyr::full_join()]
+#' * [`glimpse()`][dplyr::glimpse()]
+#' * [`group_by()`][dplyr::group_by()]
+#' * [`group_by_drop_default()`][dplyr::group_by_drop_default()]
+#' * [`group_vars()`][dplyr::group_vars()]
+#' * [`groups()`][dplyr::groups()]
+#' * [`inner_join()`][dplyr::inner_join()]
+#' * [`left_join()`][dplyr::left_join()]
+#' * [`mutate()`][dplyr::mutate()]
+#' * [`pull()`][dplyr::pull()]
+#' * [`relocate()`][dplyr::relocate()]
+#' * [`rename()`][dplyr::rename()]
+#' * [`rename_with()`][dplyr::rename_with()]
+#' * [`right_join()`][dplyr::right_join()]
+#' * [`select()`][dplyr::select()]
+#' * [`semi_join()`][dplyr::semi_join()]
+#' * [`show_query()`][dplyr::show_query()]
+#' * [`summarise()`][dplyr::summarise()]
+#' * [`tally()`][dplyr::tally()]
+#' * [`transmute()`][dplyr::transmute()]
+#' * [`ungroup()`][dplyr::ungroup()]
+#' * [`union()`][dplyr::union()]
+#' * [`union_all()`][dplyr::union_all()]
 #'
 #' # Function mappings
 #'
@@ -89,243 +89,244 @@
 #'
 #' ## arrow
 #' 
-#' * [add_filename][arrow::add_filename()]
-#' * [cast][arrow::cast()]
+#' * [`add_filename()`][arrow::add_filename()]
+#' * [`cast()`][arrow::cast()]
 #'
 #' ## base
 #' 
-#' * [-][-()]
-#' * [!][!()]
-#' * [!=][!=()]
-#' * [*][*()]
-#' * [/][/()]
-#' * [&][&()]
-#' * [%/%][%/%()]
-#' * [%%][%%()]
-#' * [%in%][%in%()]
-#' * [^][^()]
-#' * [+][+()]
-#' * [<][<()]
-#' * [<=][<=()]
-#' * [==][==()]
-#' * [>][>()]
-#' * [>=][>=()]
-#' * [|][|()]
-#' * [abs][base::abs()]
-#' * [acos][base::acos()]
-#' * [all][base::all()]
-#' * [any][base::any()]
-#' * [as.character][base::as.character()]
-#' * [as.Date][base::as.Date()]
-#' * [as.difftime][base::as.difftime()]
-#' * [as.double][base::as.double()]
-#' * [as.integer][base::as.integer()]
-#' * [as.logical][base::as.logical()]
-#' * [as.numeric][base::as.numeric()]
-#' * [asin][base::asin()]
-#' * [ceiling][base::ceiling()]
-#' * [cos][base::cos()]
-#' * [data.frame][base::data.frame()]
-#' * [difftime][base::difftime()]
-#' * [endsWith][base::endsWith()]
-#' * [exp][base::exp()]
-#' * [floor][base::floor()]
-#' * [format][base::format()]
-#' * [grepl][base::grepl()]
-#' * [gsub][base::gsub()]
-#' * [ifelse][base::ifelse()]
-#' * [is.character][base::is.character()]
-#' * [is.double][base::is.double()]
-#' * [is.factor][base::is.factor()]
-#' * [is.finite][base::is.finite()]
-#' * [is.infinite][base::is.infinite()]
-#' * [is.integer][base::is.integer()]
-#' * [is.list][base::is.list()]
-#' * [is.logical][base::is.logical()]
-#' * [is.na][base::is.na()]
-#' * [is.nan][base::is.nan()]
-#' * [is.numeric][base::is.numeric()]
-#' * [ISOdate][base::ISOdate()]
-#' * [ISOdatetime][base::ISOdatetime()]
-#' * [log][base::log()]
-#' * [log10][base::log10()]
-#' * [log1p][base::log1p()]
-#' * [log2][base::log2()]
-#' * [logb][base::logb()]
-#' * [max][base::max()]
-#' * [mean][base::mean()]
-#' * [min][base::min()]
-#' * [nchar][base::nchar()]
-#' * [paste][base::paste()]: the `collapse` argument is not yet supported
-#' * [paste0][base::paste0()]: the `collapse` argument is not yet supported
-#' * [pmax][base::pmax()]
-#' * [pmin][base::pmin()]
-#' * [round][base::round()]
-#' * [sign][base::sign()]
-#' * [sin][base::sin()]
-#' * [sqrt][base::sqrt()]
-#' * [startsWith][base::startsWith()]
-#' * [strftime][base::strftime()]
-#' * [strptime][base::strptime()]
-#' * [strrep][base::strrep()]
-#' * [strsplit][base::strsplit()]
-#' * [sub][base::sub()]
-#' * [substr][base::substr()]
-#' * [substring][base::substring()]
-#' * [sum][base::sum()]
-#' * [tan][base::tan()]
-#' * [tolower][base::tolower()]
-#' * [toupper][base::toupper()]
-#' * [trunc][base::trunc()]
+#' * [`-`][-()]
+#' * [`!`][!()]
+#' * [`!=`][!=()]
+#' * [`*`][*()]
+#' * [`/`][/()]
+#' * [`&`][&()]
+#' * [`%/%`][%/%()]
+#' * [`%%`][%%()]
+#' * [`%in%`][%in%()]
+#' * [`^`][^()]
+#' * [`+`][+()]
+#' * [`<`][<()]
+#' * [`<=`][<=()]
+#' * [`==`][==()]
+#' * [`>`][>()]
+#' * [`>=`][>=()]
+#' * [`|`][|()]
+#' * [`abs()`][base::abs()]
+#' * [`acos()`][base::acos()]
+#' * [`all()`][base::all()]
+#' * [`any()`][base::any()]
+#' * [`as.character()`][base::as.character()]
+#' * [`as.Date()`][base::as.Date()]
+#' * [`as.difftime()`][base::as.difftime()]
+#' * [`as.double()`][base::as.double()]
+#' * [`as.integer()`][base::as.integer()]
+#' * [`as.logical()`][base::as.logical()]
+#' * [`as.numeric()`][base::as.numeric()]
+#' * [`asin()`][base::asin()]
+#' * [`ceiling()`][base::ceiling()]
+#' * [`cos()`][base::cos()]
+#' * [`data.frame()`][base::data.frame()]
+#' * [`difftime()`][base::difftime()]
+#' * [`endsWith()`][base::endsWith()]
+#' * [`exp()`][base::exp()]
+#' * [`floor()`][base::floor()]
+#' * [`format()`][base::format()]
+#' * [`grepl()`][base::grepl()]
+#' * [`gsub()`][base::gsub()]
+#' * [`ifelse()`][base::ifelse()]
+#' * [`is.character()`][base::is.character()]
+#' * [`is.double()`][base::is.double()]
+#' * [`is.factor()`][base::is.factor()]
+#' * [`is.finite()`][base::is.finite()]
+#' * [`is.infinite()`][base::is.infinite()]
+#' * [`is.integer()`][base::is.integer()]
+#' * [`is.list()`][base::is.list()]
+#' * [`is.logical()`][base::is.logical()]
+#' * [`is.na()`][base::is.na()]
+#' * [`is.nan()`][base::is.nan()]
+#' * [`is.numeric()`][base::is.numeric()]
+#' * [`ISOdate()`][base::ISOdate()]
+#' * [`ISOdatetime()`][base::ISOdatetime()]
+#' * [`log()`][base::log()]
+#' * [`log10()`][base::log10()]
+#' * [`log1p()`][base::log1p()]
+#' * [`log2()`][base::log2()]
+#' * [`logb()`][base::logb()]
+#' * [`max()`][base::max()]
+#' * [`mean()`][base::mean()]
+#' * [`min()`][base::min()]
+#' * [`nchar()`][base::nchar()]
+#' * [`paste()`][base::paste()]: the `collapse` argument is not yet supported
+#' * [`paste0()`][base::paste0()]: the `collapse` argument is not yet supported
+#' * [`pmax()`][base::pmax()]
+#' * [`pmin()`][base::pmin()]
+#' * [`round()`][base::round()]
+#' * [`sign()`][base::sign()]
+#' * [`sin()`][base::sin()]
+#' * [`sqrt()`][base::sqrt()]
+#' * [`startsWith()`][base::startsWith()]
+#' * [`strftime()`][base::strftime()]
+#' * [`strptime()`][base::strptime()]
+#' * [`strrep()`][base::strrep()]
+#' * [`strsplit()`][base::strsplit()]
+#' * [`sub()`][base::sub()]
+#' * [`substr()`][base::substr()]
+#' * [`substring()`][base::substring()]
+#' * [`sum()`][base::sum()]
+#' * [`tan()`][base::tan()]
+#' * [`tolower()`][base::tolower()]
+#' * [`toupper()`][base::toupper()]
+#' * [`trunc()`][base::trunc()]
 #'
 #' ## bit64
 #' 
-#' * [as.integer64][bit64::as.integer64()]
-#' * [is.integer64][bit64::is.integer64()]
+#' * [`as.integer64()`][bit64::as.integer64()]
+#' * [`is.integer64()`][bit64::is.integer64()]
 #'
 #' ## dplyr
 #' 
-#' * [across][dplyr::across()]: only supported inside `mutate()`; purrr-style lambda functions not yet supported
-#' * [between][dplyr::between()]
-#' * [case_when][dplyr::case_when()]
-#' * [coalesce][dplyr::coalesce()]
-#' * [if_else][dplyr::if_else()]
-#' * [n][dplyr::n()]
-#' * [n_distinct][dplyr::n_distinct()]
+#' * [`across()`][dplyr::across()]: only supported inside `mutate()`, `summarize()`, and `arrange()`; purrr-style lambda functions and use of `where()` selection helper not yet supported
+#' * [`between()`][dplyr::between()]
+#' * [`case_when()`][dplyr::case_when()]
+#' * [`coalesce()`][dplyr::coalesce()]
+#' * [`desc()`][dplyr::desc()]
+#' * [`if_else()`][dplyr::if_else()]
+#' * [`n()`][dplyr::n()]
+#' * [`n_distinct()`][dplyr::n_distinct()]
 #'
 #' ## lubridate
 #' 
-#' * [am][lubridate::am()]
-#' * [as_date][lubridate::as_date()]
-#' * [as_datetime][lubridate::as_datetime()]
-#' * [ceiling_date][lubridate::ceiling_date()]
-#' * [date][lubridate::date()]
-#' * [date_decimal][lubridate::date_decimal()]
-#' * [day][lubridate::day()]
-#' * [ddays][lubridate::ddays()]
-#' * [decimal_date][lubridate::decimal_date()]
-#' * [dhours][lubridate::dhours()]
-#' * [dmicroseconds][lubridate::dmicroseconds()]
-#' * [dmilliseconds][lubridate::dmilliseconds()]
-#' * [dminutes][lubridate::dminutes()]
-#' * [dmonths][lubridate::dmonths()]
-#' * [dmy][lubridate::dmy()]
-#' * [dmy_h][lubridate::dmy_h()]
-#' * [dmy_hm][lubridate::dmy_hm()]
-#' * [dmy_hms][lubridate::dmy_hms()]
-#' * [dnanoseconds][lubridate::dnanoseconds()]
-#' * [dpicoseconds][lubridate::dpicoseconds()]
-#' * [dseconds][lubridate::dseconds()]
-#' * [dst][lubridate::dst()]
-#' * [dweeks][lubridate::dweeks()]
-#' * [dyears][lubridate::dyears()]
-#' * [dym][lubridate::dym()]
-#' * [epiweek][lubridate::epiweek()]
-#' * [epiyear][lubridate::epiyear()]
-#' * [fast_strptime][lubridate::fast_strptime()]
-#' * [floor_date][lubridate::floor_date()]
-#' * [format_ISO8601][lubridate::format_ISO8601()]
-#' * [hour][lubridate::hour()]
-#' * [is.Date][lubridate::is.Date()]
-#' * [is.instant][lubridate::is.instant()]
-#' * [is.POSIXct][lubridate::is.POSIXct()]
-#' * [is.timepoint][lubridate::is.timepoint()]
-#' * [isoweek][lubridate::isoweek()]
-#' * [isoyear][lubridate::isoyear()]
-#' * [leap_year][lubridate::leap_year()]
-#' * [make_date][lubridate::make_date()]
-#' * [make_datetime][lubridate::make_datetime()]
-#' * [make_difftime][lubridate::make_difftime()]
-#' * [mday][lubridate::mday()]
-#' * [mdy][lubridate::mdy()]
-#' * [mdy_h][lubridate::mdy_h()]
-#' * [mdy_hm][lubridate::mdy_hm()]
-#' * [mdy_hms][lubridate::mdy_hms()]
-#' * [minute][lubridate::minute()]
-#' * [month][lubridate::month()]
-#' * [my][lubridate::my()]
-#' * [myd][lubridate::myd()]
-#' * [parse_date_time][lubridate::parse_date_time()]
-#' * [pm][lubridate::pm()]
-#' * [qday][lubridate::qday()]
-#' * [quarter][lubridate::quarter()]
-#' * [round_date][lubridate::round_date()]
-#' * [second][lubridate::second()]
-#' * [semester][lubridate::semester()]
-#' * [tz][lubridate::tz()]
-#' * [wday][lubridate::wday()]
-#' * [week][lubridate::week()]
-#' * [yday][lubridate::yday()]
-#' * [ydm][lubridate::ydm()]
-#' * [ydm_h][lubridate::ydm_h()]
-#' * [ydm_hm][lubridate::ydm_hm()]
-#' * [ydm_hms][lubridate::ydm_hms()]
-#' * [year][lubridate::year()]
-#' * [ym][lubridate::ym()]
-#' * [ymd][lubridate::ymd()]
-#' * [ymd_h][lubridate::ymd_h()]
-#' * [ymd_hm][lubridate::ymd_hm()]
-#' * [ymd_hms][lubridate::ymd_hms()]
-#' * [yq][lubridate::yq()]
+#' * [`am()`][lubridate::am()]
+#' * [`as_date()`][lubridate::as_date()]
+#' * [`as_datetime()`][lubridate::as_datetime()]
+#' * [`ceiling_date()`][lubridate::ceiling_date()]
+#' * [`date()`][lubridate::date()]
+#' * [`date_decimal()`][lubridate::date_decimal()]
+#' * [`day()`][lubridate::day()]
+#' * [`ddays()`][lubridate::ddays()]
+#' * [`decimal_date()`][lubridate::decimal_date()]
+#' * [`dhours()`][lubridate::dhours()]
+#' * [`dmicroseconds()`][lubridate::dmicroseconds()]
+#' * [`dmilliseconds()`][lubridate::dmilliseconds()]
+#' * [`dminutes()`][lubridate::dminutes()]
+#' * [`dmonths()`][lubridate::dmonths()]
+#' * [`dmy()`][lubridate::dmy()]
+#' * [`dmy_h()`][lubridate::dmy_h()]
+#' * [`dmy_hm()`][lubridate::dmy_hm()]
+#' * [`dmy_hms()`][lubridate::dmy_hms()]
+#' * [`dnanoseconds()`][lubridate::dnanoseconds()]
+#' * [`dpicoseconds()`][lubridate::dpicoseconds()]
+#' * [`dseconds()`][lubridate::dseconds()]
+#' * [`dst()`][lubridate::dst()]
+#' * [`dweeks()`][lubridate::dweeks()]
+#' * [`dyears()`][lubridate::dyears()]
+#' * [`dym()`][lubridate::dym()]
+#' * [`epiweek()`][lubridate::epiweek()]
+#' * [`epiyear()`][lubridate::epiyear()]
+#' * [`fast_strptime()`][lubridate::fast_strptime()]
+#' * [`floor_date()`][lubridate::floor_date()]
+#' * [`format_ISO8601()`][lubridate::format_ISO8601()]
+#' * [`hour()`][lubridate::hour()]
+#' * [`is.Date()`][lubridate::is.Date()]
+#' * [`is.instant()`][lubridate::is.instant()]
+#' * [`is.POSIXct()`][lubridate::is.POSIXct()]
+#' * [`is.timepoint()`][lubridate::is.timepoint()]
+#' * [`isoweek()`][lubridate::isoweek()]
+#' * [`isoyear()`][lubridate::isoyear()]
+#' * [`leap_year()`][lubridate::leap_year()]
+#' * [`make_date()`][lubridate::make_date()]
+#' * [`make_datetime()`][lubridate::make_datetime()]
+#' * [`make_difftime()`][lubridate::make_difftime()]
+#' * [`mday()`][lubridate::mday()]
+#' * [`mdy()`][lubridate::mdy()]
+#' * [`mdy_h()`][lubridate::mdy_h()]
+#' * [`mdy_hm()`][lubridate::mdy_hm()]
+#' * [`mdy_hms()`][lubridate::mdy_hms()]
+#' * [`minute()`][lubridate::minute()]
+#' * [`month()`][lubridate::month()]
+#' * [`my()`][lubridate::my()]
+#' * [`myd()`][lubridate::myd()]
+#' * [`parse_date_time()`][lubridate::parse_date_time()]
+#' * [`pm()`][lubridate::pm()]
+#' * [`qday()`][lubridate::qday()]
+#' * [`quarter()`][lubridate::quarter()]
+#' * [`round_date()`][lubridate::round_date()]
+#' * [`second()`][lubridate::second()]
+#' * [`semester()`][lubridate::semester()]
+#' * [`tz()`][lubridate::tz()]
+#' * [`wday()`][lubridate::wday()]
+#' * [`week()`][lubridate::week()]
+#' * [`yday()`][lubridate::yday()]
+#' * [`ydm()`][lubridate::ydm()]
+#' * [`ydm_h()`][lubridate::ydm_h()]
+#' * [`ydm_hm()`][lubridate::ydm_hm()]
+#' * [`ydm_hms()`][lubridate::ydm_hms()]
+#' * [`year()`][lubridate::year()]
+#' * [`ym()`][lubridate::ym()]
+#' * [`ymd()`][lubridate::ymd()]
+#' * [`ymd_h()`][lubridate::ymd_h()]
+#' * [`ymd_hm()`][lubridate::ymd_hm()]
+#' * [`ymd_hms()`][lubridate::ymd_hms()]
+#' * [`yq()`][lubridate::yq()]
 #'
 #' ## methods
 #' 
-#' * [is][methods::is()]
+#' * [`is()`][methods::is()]
 #'
 #' ## rlang
 #' 
-#' * [is_character][rlang::is_character()]
-#' * [is_double][rlang::is_double()]
-#' * [is_integer][rlang::is_integer()]
-#' * [is_list][rlang::is_list()]
-#' * [is_logical][rlang::is_logical()]
+#' * [`is_character()`][rlang::is_character()]
+#' * [`is_double()`][rlang::is_double()]
+#' * [`is_integer()`][rlang::is_integer()]
+#' * [`is_list()`][rlang::is_list()]
+#' * [`is_logical()`][rlang::is_logical()]
 #'
 #' ## stats
 #' 
-#' * [median][stats::median()]
-#' * [quantile][stats::quantile()]
-#' * [sd][stats::sd()]
-#' * [var][stats::var()]
+#' * [`median()`][stats::median()]
+#' * [`quantile()`][stats::quantile()]
+#' * [`sd()`][stats::sd()]
+#' * [`var()`][stats::var()]
 #'
 #' ## stringi
 #' 
-#' * [stri_reverse][stringi::stri_reverse()]
+#' * [`stri_reverse()`][stringi::stri_reverse()]
 #'
 #' ## stringr
 #' 
-#' * [str_c][stringr::str_c()]: the `collapse` argument is not yet supported
-#' * [str_count][stringr::str_count()]
-#' * [str_detect][stringr::str_detect()]
-#' * [str_dup][stringr::str_dup()]
-#' * [str_ends][stringr::str_ends()]
-#' * [str_length][stringr::str_length()]
-#' * `str_like`: not yet in a released version of `stringr`, but it is supported in `arrow`
-#' * [str_pad][stringr::str_pad()]
-#' * [str_replace][stringr::str_replace()]
-#' * [str_replace_all][stringr::str_replace_all()]
-#' * [str_split][stringr::str_split()]
-#' * [str_starts][stringr::str_starts()]
-#' * [str_sub][stringr::str_sub()]
-#' * [str_to_lower][stringr::str_to_lower()]
-#' * [str_to_title][stringr::str_to_title()]
-#' * [str_to_upper][stringr::str_to_upper()]
-#' * [str_trim][stringr::str_trim()]
+#' * [`str_c()`][stringr::str_c()]: the `collapse` argument is not yet supported
+#' * [`str_count()`][stringr::str_count()]
+#' * [`str_detect()`][stringr::str_detect()]
+#' * [`str_dup()`][stringr::str_dup()]
+#' * [`str_ends()`][stringr::str_ends()]
+#' * [`str_length()`][stringr::str_length()]
+#' * `str_like()`: not yet in a released version of `stringr`, but it is supported in `arrow`
+#' * [`str_pad()`][stringr::str_pad()]
+#' * [`str_replace()`][stringr::str_replace()]
+#' * [`str_replace_all()`][stringr::str_replace_all()]
+#' * [`str_split()`][stringr::str_split()]
+#' * [`str_starts()`][stringr::str_starts()]
+#' * [`str_sub()`][stringr::str_sub()]
+#' * [`str_to_lower()`][stringr::str_to_lower()]
+#' * [`str_to_title()`][stringr::str_to_title()]
+#' * [`str_to_upper()`][stringr::str_to_upper()]
+#' * [`str_trim()`][stringr::str_trim()]
 #'
 #' ## tibble
 #' 
-#' * [tibble][tibble::tibble()]
+#' * [`tibble()`][tibble::tibble()]
 #'
 #' ## tidyselect
 #' 
-#' * [all_of][tidyselect::all_of()]
-#' * [contains][tidyselect::contains()]
-#' * [ends_with][tidyselect::ends_with()]
-#' * [everything][tidyselect::everything()]
-#' * [last_col][tidyselect::last_col()]
-#' * [matches][tidyselect::matches()]
-#' * [num_range][tidyselect::num_range()]
-#' * [one_of][tidyselect::one_of()]
-#' * [starts_with][tidyselect::starts_with()]
+#' * [`all_of()`][tidyselect::all_of()]
+#' * [`contains()`][tidyselect::contains()]
+#' * [`ends_with()`][tidyselect::ends_with()]
+#' * [`everything()`][tidyselect::everything()]
+#' * [`last_col()`][tidyselect::last_col()]
+#' * [`matches()`][tidyselect::matches()]
+#' * [`num_range()`][tidyselect::num_range()]
+#' * [`one_of()`][tidyselect::one_of()]
+#' * [`starts_with()`][tidyselect::starts_with()]
 #'
-#' @name arrow-dplyr-functions
+#' @name acero
 NULL
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 6c23305a8f3..70bd7ac518c 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -216,7 +216,7 @@ reference:
       - codec_is_available
   - title: Computation
     contents:
-      - arrow-dplyr-functions
+      - acero
       - call_function
       - match_arrow
       - value_counts
diff --git a/r/data-raw/docgen.R b/r/data-raw/docgen.R
index 5d767f27041..ef39bec272f 100644
--- a/r/data-raw/docgen.R
+++ b/r/data-raw/docgen.R
@@ -78,7 +78,7 @@ file_template <- "# Licensed to the Apache Software Foundation (ASF) under one
 #'
 %s
 #'
-#' @name arrow-dplyr-functions
+#' @name acero
 NULL"
 
 library(dplyr)
@@ -91,14 +91,22 @@ do_not_link <- c(
 
 # Vectorized function to make entries for each function
 render_fun <- function(fun, pkg_fun, notes) {
+  # Add () to fun if it's not an operator
+  not_operators <- grepl("^[[:alpha:]]", fun)
+  fun[not_operators] <- paste0(fun[not_operators], "()")
+  # Make it \code{} for better formatting
+  fun <- paste0("`", fun, "`")
+  # Wrap in \link{}
   out <- ifelse(
     pkg_fun %in% do_not_link,
-    paste0("* `", fun, "`"),
-    paste0("* [", fun, "][", pkg_fun, "()]")
+    fun,
+    paste0("[", fun, "][", pkg_fun, "()]")
   )
+  # Add notes after :, if exist
   has_notes <- nzchar(notes)
   out[has_notes] <- paste0(out[has_notes], ": ", notes[has_notes])
-  out
+  # Make bullets
+  paste("*", out)
 }
 
 # This renders a bulleted list under a package heading
@@ -121,17 +129,20 @@ docs <- arrow:::.cache$docs
 
 # across() is handled by manipulating the quosures, not by nse_funcs
 docs[["dplyr::across"]] <- c(
-  "only supported inside `mutate()`;", # TODO(ARROW-17362, ARROW-17387)
-  "purrr-style lambda functions not yet supported" # TODO(ARROW-17366)
+  # TODO(ARROW-17387, ARROW-17389, ARROW-17390)
+  "only supported inside `mutate()`, `summarize()`, and `arrange()`;",
+  # TODO(ARROW-17366)
+  "purrr-style lambda functions",
+  "and use of `where()` selection helper not yet supported"
 )
+# desc() is a special helper handled inside of arrange()
+docs[["dplyr::desc"]] <- character(0)
 
 # add tidyselect helpers by parsing the reexports file
 tidyselect <- grep("^tidyselect::", readLines("R/reexports-tidyselect.R"), value = TRUE)
 
 docs <- c(docs, setNames(rep(list(NULL), length(tidyselect)), tidyselect))
 
-# TODO: add doc pages for add_filename() and cast()
-
 fun_df <- tibble::tibble(
   pkg_fun = names(docs),
   notes = docs
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
new file mode 100644
index 00000000000..5b5920f386e
--- /dev/null
+++ b/r/man/acero.Rd
@@ -0,0 +1,339 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-funcs-doc.R
+\name{acero}
+\alias{acero}
+\title{Functions available in Arrow dplyr queries}
+\description{
+The \code{arrow} package contains methods for 32 \code{dplyr} table functions, many of
+which are "verbs" that do transformations to one or more tables.
+The package also has mappings of 205 R functions to the corresponding
+functions in the Arrow compute library. These allow you to write code inside
+of \code{dplyr} methods that call R functions, including many in packages like
+\code{stringr} and \code{lubridate}, and they will get translated to Arrow and run
+on the Arrow query engine (Acero). This document lists all of the mapped
+functions.
+}
+\section{\code{dplyr} verbs}{
+Most verb functions return an \code{arrow_dplyr_query} object, similar in spirit
+to a \code{dbplyr::tbl_lazy}. This means that the verbs do not eagerly evaluate
+the query on the data. To run the query, call either \code{compute()},
+which returns an \code{arrow} \link{Table}, or \code{collect()}, which pulls the resulting
+Table into an R \code{data.frame}.
+\itemize{
+\item \code{\link[dplyr:filter-joins]{anti_join()}}
+\item \code{\link[dplyr:arrange]{arrange()}}
+\item \code{\link[dplyr:compute]{collapse()}}
+\item \code{\link[dplyr:compute]{collect()}}
+\item \code{\link[dplyr:compute]{compute()}}
+\item \code{\link[dplyr:count]{count()}}
+\item \code{\link[dplyr:distinct]{distinct()}}
+\item \code{\link[dplyr:explain]{explain()}}
+\item \code{\link[dplyr:filter]{filter()}}
+\item \code{\link[dplyr:mutate-joins]{full_join()}}
+\item \code{\link[dplyr:glimpse]{glimpse()}}
+\item \code{\link[dplyr:group_by]{group_by()}}
+\item \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}}
+\item \code{\link[dplyr:group_data]{group_vars()}}
+\item \code{\link[dplyr:group_data]{groups()}}
+\item \code{\link[dplyr:mutate-joins]{inner_join()}}
+\item \code{\link[dplyr:mutate-joins]{left_join()}}
+\item \code{\link[dplyr:mutate]{mutate()}}
+\item \code{\link[dplyr:pull]{pull()}}
+\item \code{\link[dplyr:relocate]{relocate()}}
+\item \code{\link[dplyr:rename]{rename()}}
+\item \code{\link[dplyr:rename]{rename_with()}}
+\item \code{\link[dplyr:mutate-joins]{right_join()}}
+\item \code{\link[dplyr:select]{select()}}
+\item \code{\link[dplyr:filter-joins]{semi_join()}}
+\item \code{\link[dplyr:explain]{show_query()}}
+\item \code{\link[dplyr:summarise]{summarise()}}
+\item \code{\link[dplyr:count]{tally()}}
+\item \code{\link[dplyr:mutate]{transmute()}}
+\item \code{\link[dplyr:group_by]{ungroup()}}
+\item \code{\link[dplyr:reexports]{union()}}
+\item \code{\link[dplyr:setops]{union_all()}}
+}
+}
+
+\section{Function mappings}{
+In the list below, any differences in behavior or support between Acero and
+the R function are listed. If no notes follow the function name, then you
+can assume that the function works in Acero just as it does in R.
+
+Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. both
+\code{str_sub()} and \code{stringr::str_sub()} work.
+
+In addition to these functions, you can call any of Arrow's 243 compute
+functions directly. Arrow has many functions that don't map to an existing R
+function. In other cases where there is an R function mapping, you can still
+call the Arrow function directly if you don't want the adaptations that the R
+mapping has that make Acero behave like R. These functions are listed in the
+\href{https://arrow.apache.org/docs/cpp/compute.html}{C++ documentation}, and
+in the function registry in R, they are named with an \code{arrow_} prefix, such
+as \code{arrow_ascii_is_decimal}.
+\subsection{arrow}{
+\itemize{
+\item \code{\link[=add_filename]{add_filename()}}
+\item \code{\link[=cast]{cast()}}
+}
+}
+
+\subsection{base}{
+\itemize{
+\item \code{\link[=-]{-}}
+\item \code{\link[=!]{!}}
+\item \code{\link[=!=]{!=}}
+\item \code{\link[=*]{*}}
+\item \code{\link[=/]{/}}
+\item \code{\link[=&]{&}}
+\item \code{\link[=\%/\%]{\%/\%}}
+\item \code{\link[=\%\%]{\%\%}}
+\item \code{\link[=\%in\%]{\%in\%}}
+\item \code{\link[=^]{^}}
+\item \code{\link[=+]{+}}
+\item \code{\link[=<]{<}}
+\item \code{\link[=<=]{<=}}
+\item \code{\link[===]{==}}
+\item \code{\link[=>]{>}}
+\item \code{\link[=>=]{>=}}
+\item \code{\link[=|]{|}}
+\item \code{\link[base:MathFun]{abs()}}
+\item \code{\link[base:Trig]{acos()}}
+\item \code{\link[base:all]{all()}}
+\item \code{\link[base:any]{any()}}
+\item \code{\link[base:character]{as.character()}}
+\item \code{\link[base:as.Date]{as.Date()}}
+\item \code{\link[base:difftime]{as.difftime()}}
+\item \code{\link[base:double]{as.double()}}
+\item \code{\link[base:integer]{as.integer()}}
+\item \code{\link[base:logical]{as.logical()}}
+\item \code{\link[base:numeric]{as.numeric()}}
+\item \code{\link[base:Trig]{asin()}}
+\item \code{\link[base:Round]{ceiling()}}
+\item \code{\link[base:Trig]{cos()}}
+\item \code{\link[base:data.frame]{data.frame()}}
+\item \code{\link[base:difftime]{difftime()}}
+\item \code{\link[base:startsWith]{endsWith()}}
+\item \code{\link[base:Log]{exp()}}
+\item \code{\link[base:Round]{floor()}}
+\item \code{\link[base:format]{format()}}
+\item \code{\link[base:grep]{grepl()}}
+\item \code{\link[base:grep]{gsub()}}
+\item \code{\link[base:ifelse]{ifelse()}}
+\item \code{\link[base:character]{is.character()}}
+\item \code{\link[base:double]{is.double()}}
+\item \code{\link[base:factor]{is.factor()}}
+\item \code{\link[base:is.finite]{is.finite()}}
+\item \code{\link[base:is.finite]{is.infinite()}}
+\item \code{\link[base:integer]{is.integer()}}
+\item \code{\link[base:list]{is.list()}}
+\item \code{\link[base:logical]{is.logical()}}
+\item \code{\link[base:NA]{is.na()}}
+\item \code{\link[base:is.finite]{is.nan()}}
+\item \code{\link[base:numeric]{is.numeric()}}
+\item \code{\link[base:ISOdatetime]{ISOdate()}}
+\item \code{\link[base:ISOdatetime]{ISOdatetime()}}
+\item \code{\link[base:Log]{log()}}
+\item \code{\link[base:Log]{log10()}}
+\item \code{\link[base:Log]{log1p()}}
+\item \code{\link[base:Log]{log2()}}
+\item \code{\link[base:Log]{logb()}}
+\item \code{\link[base:Extremes]{max()}}
+\item \code{\link[base:mean]{mean()}}
+\item \code{\link[base:Extremes]{min()}}
+\item \code{\link[base:nchar]{nchar()}}
+\item \code{\link[base:paste]{paste()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[base:paste]{paste0()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[base:Extremes]{pmax()}}
+\item \code{\link[base:Extremes]{pmin()}}
+\item \code{\link[base:Round]{round()}}
+\item \code{\link[base:sign]{sign()}}
+\item \code{\link[base:Trig]{sin()}}
+\item \code{\link[base:MathFun]{sqrt()}}
+\item \code{\link[base:startsWith]{startsWith()}}
+\item \code{\link[base:strptime]{strftime()}}
+\item \code{\link[base:strptime]{strptime()}}
+\item \code{\link[base:strrep]{strrep()}}
+\item \code{\link[base:strsplit]{strsplit()}}
+\item \code{\link[base:grep]{sub()}}
+\item \code{\link[base:substr]{substr()}}
+\item \code{\link[base:substr]{substring()}}
+\item \code{\link[base:sum]{sum()}}
+\item \code{\link[base:Trig]{tan()}}
+\item \code{\link[base:chartr]{tolower()}}
+\item \code{\link[base:chartr]{toupper()}}
+\item \code{\link[base:Round]{trunc()}}
+}
+}
+
+\subsection{bit64}{
+\itemize{
+\item \code{\link[bit64:as.integer64.character]{as.integer64()}}
+\item \code{\link[bit64:bit64-package]{is.integer64()}}
+}
+}
+
+\subsection{dplyr}{
+\itemize{
+\item \code{\link[dplyr:across]{across()}}: only supported inside \code{mutate()}, \code{summarize()}, and \code{arrange()}; purrr-style lambda functions and use of \code{where()} selection helper not yet supported
+\item \code{\link[dplyr:between]{between()}}
+\item \code{\link[dplyr:case_when]{case_when()}}
+\item \code{\link[dplyr:coalesce]{coalesce()}}
+\item \code{\link[dplyr:desc]{desc()}}
+\item \code{\link[dplyr:if_else]{if_else()}}
+\item \code{\link[dplyr:context]{n()}}
+\item \code{\link[dplyr:n_distinct]{n_distinct()}}
+}
+}
+
+\subsection{lubridate}{
+\itemize{
+\item \code{\link[lubridate:am]{am()}}
+\item \code{\link[lubridate:as_date]{as_date()}}
+\item \code{\link[lubridate:as_date]{as_datetime()}}
+\item \code{\link[lubridate:round_date]{ceiling_date()}}
+\item \code{\link[lubridate:date]{date()}}
+\item \code{\link[lubridate:date_decimal]{date_decimal()}}
+\item \code{\link[lubridate:day]{day()}}
+\item \code{\link[lubridate:duration]{ddays()}}
+\item \code{\link[lubridate:decimal_date]{decimal_date()}}
+\item \code{\link[lubridate:duration]{dhours()}}
+\item \code{\link[lubridate:duration]{dmicroseconds()}}
+\item \code{\link[lubridate:duration]{dmilliseconds()}}
+\item \code{\link[lubridate:duration]{dminutes()}}
+\item \code{\link[lubridate:duration]{dmonths()}}
+\item \code{\link[lubridate:ymd]{dmy()}}
+\item \code{\link[lubridate:ymd_hms]{dmy_h()}}
+\item \code{\link[lubridate:ymd_hms]{dmy_hm()}}
+\item \code{\link[lubridate:ymd_hms]{dmy_hms()}}
+\item \code{\link[lubridate:duration]{dnanoseconds()}}
+\item \code{\link[lubridate:duration]{dpicoseconds()}}
+\item \code{\link[lubridate:duration]{dseconds()}}
+\item \code{\link[lubridate:dst]{dst()}}
+\item \code{\link[lubridate:duration]{dweeks()}}
+\item \code{\link[lubridate:duration]{dyears()}}
+\item \code{\link[lubridate:ymd]{dym()}}
+\item \code{\link[lubridate:week]{epiweek()}}
+\item \code{\link[lubridate:year]{epiyear()}}
+\item \code{\link[lubridate:parse_date_time]{fast_strptime()}}
+\item \code{\link[lubridate:round_date]{floor_date()}}
+\item \code{\link[lubridate:format_ISO8601]{format_ISO8601()}}
+\item \code{\link[lubridate:hour]{hour()}}
+\item \code{\link[lubridate:date_utils]{is.Date()}}
+\item \code{\link[lubridate:is.instant]{is.instant()}}
+\item \code{\link[lubridate:posix_utils]{is.POSIXct()}}
+\item \code{\link[lubridate:is.instant]{is.timepoint()}}
+\item \code{\link[lubridate:week]{isoweek()}}
+\item \code{\link[lubridate:year]{isoyear()}}
+\item \code{\link[lubridate:leap_year]{leap_year()}}
+\item \code{\link[lubridate:make_datetime]{make_date()}}
+\item \code{\link[lubridate:make_datetime]{make_datetime()}}
+\item \code{\link[lubridate:make_difftime]{make_difftime()}}
+\item \code{\link[lubridate:day]{mday()}}
+\item \code{\link[lubridate:ymd]{mdy()}}
+\item \code{\link[lubridate:ymd_hms]{mdy_h()}}
+\item \code{\link[lubridate:ymd_hms]{mdy_hm()}}
+\item \code{\link[lubridate:ymd_hms]{mdy_hms()}}
+\item \code{\link[lubridate:minute]{minute()}}
+\item \code{\link[lubridate:month]{month()}}
+\item \code{\link[lubridate:ymd]{my()}}
+\item \code{\link[lubridate:ymd]{myd()}}
+\item \code{\link[lubridate:parse_date_time]{parse_date_time()}}
+\item \code{\link[lubridate:am]{pm()}}
+\item \code{\link[lubridate:day]{qday()}}
+\item \code{\link[lubridate:quarter]{quarter()}}
+\item \code{\link[lubridate:round_date]{round_date()}}
+\item \code{\link[lubridate:second]{second()}}
+\item \code{\link[lubridate:quarter]{semester()}}
+\item \code{\link[lubridate:tz]{tz()}}
+\item \code{\link[lubridate:day]{wday()}}
+\item \code{\link[lubridate:week]{week()}}
+\item \code{\link[lubridate:day]{yday()}}
+\item \code{\link[lubridate:ymd]{ydm()}}
+\item \code{\link[lubridate:ymd_hms]{ydm_h()}}
+\item \code{\link[lubridate:ymd_hms]{ydm_hm()}}
+\item \code{\link[lubridate:ymd_hms]{ydm_hms()}}
+\item \code{\link[lubridate:year]{year()}}
+\item \code{\link[lubridate:ymd]{ym()}}
+\item \code{\link[lubridate:ymd]{ymd()}}
+\item \code{\link[lubridate:ymd_hms]{ymd_h()}}
+\item \code{\link[lubridate:ymd_hms]{ymd_hm()}}
+\item \code{\link[lubridate:ymd_hms]{ymd_hms()}}
+\item \code{\link[lubridate:ymd]{yq()}}
+}
+}
+
+\subsection{methods}{
+\itemize{
+\item \code{\link[methods:is]{is()}}
+}
+}
+
+\subsection{rlang}{
+\itemize{
+\item \code{\link[rlang:type-predicates]{is_character()}}
+\item \code{\link[rlang:type-predicates]{is_double()}}
+\item \code{\link[rlang:type-predicates]{is_integer()}}
+\item \code{\link[rlang:type-predicates]{is_list()}}
+\item \code{\link[rlang:type-predicates]{is_logical()}}
+}
+}
+
+\subsection{stats}{
+\itemize{
+\item \code{\link[stats:median]{median()}}
+\item \code{\link[stats:quantile]{quantile()}}
+\item \code{\link[stats:sd]{sd()}}
+\item \code{\link[stats:cor]{var()}}
+}
+}
+
+\subsection{stringi}{
+\itemize{
+\item \code{\link[stringi:stri_reverse]{stri_reverse()}}
+}
+}
+
+\subsection{stringr}{
+\itemize{
+\item \code{\link[stringr:str_c]{str_c()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[stringr:str_count]{str_count()}}
+\item \code{\link[stringr:str_detect]{str_detect()}}
+\item \code{\link[stringr:str_dup]{str_dup()}}
+\item \code{\link[stringr:str_starts]{str_ends()}}
+\item \code{\link[stringr:str_length]{str_length()}}
+\item \code{str_like()}: not yet in a released version of \code{stringr}, but it is supported in \code{arrow}
+\item \code{\link[stringr:str_pad]{str_pad()}}
+\item \code{\link[stringr:str_replace]{str_replace()}}
+\item \code{\link[stringr:str_replace]{str_replace_all()}}
+\item \code{\link[stringr:str_split]{str_split()}}
+\item \code{\link[stringr:str_starts]{str_starts()}}
+\item \code{\link[stringr:str_sub]{str_sub()}}
+\item \code{\link[stringr:case]{str_to_lower()}}
+\item \code{\link[stringr:case]{str_to_title()}}
+\item \code{\link[stringr:case]{str_to_upper()}}
+\item \code{\link[stringr:str_trim]{str_trim()}}
+}
+}
+
+\subsection{tibble}{
+\itemize{
+\item \code{\link[tibble:tibble]{tibble()}}
+}
+}
+
+\subsection{tidyselect}{
+\itemize{
+\item \code{\link[tidyselect:all_of]{all_of()}}
+\item \code{\link[tidyselect:starts_with]{contains()}}
+\item \code{\link[tidyselect:starts_with]{ends_with()}}
+\item \code{\link[tidyselect:everything]{everything()}}
+\item \code{\link[tidyselect:everything]{last_col()}}
+\item \code{\link[tidyselect:starts_with]{matches()}}
+\item \code{\link[tidyselect:starts_with]{num_range()}}
+\item \code{\link[tidyselect:one_of]{one_of()}}
+\item \code{\link[tidyselect:starts_with]{starts_with()}}
+}
+}
+}
+
diff --git a/r/man/arrow-dplyr-functions.Rd b/r/man/arrow-dplyr-functions.Rd
deleted file mode 100644
index 438d8b64025..00000000000
--- a/r/man/arrow-dplyr-functions.Rd
+++ /dev/null
@@ -1,338 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dplyr-funcs-doc.R
-\name{arrow-dplyr-functions}
-\alias{arrow-dplyr-functions}
-\title{Functions available in Arrow dplyr queries}
-\description{
-The \code{arrow} package contains methods for 32 \code{dplyr} table functions, many of
-which are "verbs" that do transformations to one or more tables.
-The package also has mappings of 204 R functions to the corresponding
-functions in the Arrow compute library. These allow you to write code inside
-of \code{dplyr} methods that call R functions, including many in packages like
-\code{stringr} and \code{lubridate}, and they will get translated to Arrow and run
-on the Arrow query engine (Acero). This document lists all of the mapped
-functions.
-}
-\section{\code{dplyr} verbs}{
-Most verb functions return an \code{arrow_dplyr_query} object, similar in spirit
-to a \code{dbplyr::tbl_lazy}. This means that the verbs do not eagerly evaluate
-the query on the data. To run the query, call either \code{compute()},
-which returns an \code{arrow} \link{Table}, or \code{collect()}, which pulls the resulting
-Table into an R \code{data.frame}.
-\itemize{
-\item \link[dplyr:filter-joins]{anti_join}
-\item \link[dplyr:arrange]{arrange}
-\item \link[dplyr:compute]{collapse}
-\item \link[dplyr:compute]{collect}
-\item \link[dplyr:compute]{compute}
-\item \link[dplyr:count]{count}
-\item \link[dplyr:distinct]{distinct}
-\item \link[dplyr:explain]{explain}
-\item \link[dplyr:filter]{filter}
-\item \link[dplyr:mutate-joins]{full_join}
-\item \link[dplyr:glimpse]{glimpse}
-\item \link[dplyr:group_by]{group_by}
-\item \link[dplyr:group_by_drop_default]{group_by_drop_default}
-\item \link[dplyr:group_data]{group_vars}
-\item \link[dplyr:group_data]{groups}
-\item \link[dplyr:mutate-joins]{inner_join}
-\item \link[dplyr:mutate-joins]{left_join}
-\item \link[dplyr:mutate]{mutate}
-\item \link[dplyr:pull]{pull}
-\item \link[dplyr:relocate]{relocate}
-\item \link[dplyr:rename]{rename}
-\item \link[dplyr:rename]{rename_with}
-\item \link[dplyr:mutate-joins]{right_join}
-\item \link[dplyr:select]{select}
-\item \link[dplyr:filter-joins]{semi_join}
-\item \link[dplyr:explain]{show_query}
-\item \link[dplyr:summarise]{summarise}
-\item \link[dplyr:count]{tally}
-\item \link[dplyr:mutate]{transmute}
-\item \link[dplyr:group_by]{ungroup}
-\item \link[dplyr:reexports]{union}
-\item \link[dplyr:setops]{union_all}
-}
-}
-
-\section{Function mappings}{
-In the list below, any differences in behavior or support between Acero and
-the R function are listed. If no notes follow the function name, then you
-can assume that the function works in Acero just as it does in R.
-
-Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. both
-\code{str_sub()} and \code{stringr::str_sub()} work.
-
-In addition to these functions, you can call any of Arrow's 243 compute
-functions directly. Arrow has many functions that don't map to an existing R
-function. In other cases where there is an R function mapping, you can still
-call the Arrow function directly if you don't want the adaptations that the R
-mapping has that make Acero behave like R. These functions are listed in the
-\href{https://arrow.apache.org/docs/cpp/compute.html}{C++ documentation}, and
-in the function registry in R, they are named with an \code{arrow_} prefix, such
-as \code{arrow_ascii_is_decimal}.
-\subsection{arrow}{
-\itemize{
-\item \link[=add_filename]{add_filename}
-\item \link[=cast]{cast}
-}
-}
-
-\subsection{base}{
-\itemize{
-\item \link[=-]{-}
-\item \link[=!]{!}
-\item \link[=!=]{!=}
-\item \link[=*]{*}
-\item \link[=/]{/}
-\item \link[=&]{&}
-\item \link[=\%/\%]{\%/\%}
-\item \link[=\%\%]{\%\%}
-\item \link[=\%in\%]{\%in\%}
-\item \link[=^]{^}
-\item \link[=+]{+}
-\item \link[=<]{<}
-\item \link[=<=]{<=}
-\item \link[===]{==}
-\item \link[=>]{>}
-\item \link[=>=]{>=}
-\item \link[=|]{|}
-\item \link[base:MathFun]{abs}
-\item \link[base:Trig]{acos}
-\item \link[base:all]{all}
-\item \link[base:any]{any}
-\item \link[base:character]{as.character}
-\item \link[base:as.Date]{as.Date}
-\item \link[base:difftime]{as.difftime}
-\item \link[base:double]{as.double}
-\item \link[base:integer]{as.integer}
-\item \link[base:logical]{as.logical}
-\item \link[base:numeric]{as.numeric}
-\item \link[base:Trig]{asin}
-\item \link[base:Round]{ceiling}
-\item \link[base:Trig]{cos}
-\item \link[base:data.frame]{data.frame}
-\item \link[base:difftime]{difftime}
-\item \link[base:startsWith]{endsWith}
-\item \link[base:Log]{exp}
-\item \link[base:Round]{floor}
-\item \link[base:format]{format}
-\item \link[base:grep]{grepl}
-\item \link[base:grep]{gsub}
-\item \link[base:ifelse]{ifelse}
-\item \link[base:character]{is.character}
-\item \link[base:double]{is.double}
-\item \link[base:factor]{is.factor}
-\item \link[base:is.finite]{is.finite}
-\item \link[base:is.finite]{is.infinite}
-\item \link[base:integer]{is.integer}
-\item \link[base:list]{is.list}
-\item \link[base:logical]{is.logical}
-\item \link[base:NA]{is.na}
-\item \link[base:is.finite]{is.nan}
-\item \link[base:numeric]{is.numeric}
-\item \link[base:ISOdatetime]{ISOdate}
-\item \link[base:ISOdatetime]{ISOdatetime}
-\item \link[base:Log]{log}
-\item \link[base:Log]{log10}
-\item \link[base:Log]{log1p}
-\item \link[base:Log]{log2}
-\item \link[base:Log]{logb}
-\item \link[base:Extremes]{max}
-\item \link[base:mean]{mean}
-\item \link[base:Extremes]{min}
-\item \link[base:nchar]{nchar}
-\item \link[base:paste]{paste}: the \code{collapse} argument is not yet supported
-\item \link[base:paste]{paste0}: the \code{collapse} argument is not yet supported
-\item \link[base:Extremes]{pmax}
-\item \link[base:Extremes]{pmin}
-\item \link[base:Round]{round}
-\item \link[base:sign]{sign}
-\item \link[base:Trig]{sin}
-\item \link[base:MathFun]{sqrt}
-\item \link[base:startsWith]{startsWith}
-\item \link[base:strptime]{strftime}
-\item \link[base:strptime]{strptime}
-\item \link[base:strrep]{strrep}
-\item \link[base:strsplit]{strsplit}
-\item \link[base:grep]{sub}
-\item \link[base:substr]{substr}
-\item \link[base:substr]{substring}
-\item \link[base:sum]{sum}
-\item \link[base:Trig]{tan}
-\item \link[base:chartr]{tolower}
-\item \link[base:chartr]{toupper}
-\item \link[base:Round]{trunc}
-}
-}
-
-\subsection{bit64}{
-\itemize{
-\item \link[bit64:as.integer64.character]{as.integer64}
-\item \link[bit64:bit64-package]{is.integer64}
-}
-}
-
-\subsection{dplyr}{
-\itemize{
-\item \link[dplyr:across]{across}: only supported inside \code{mutate()}; purrr-style lambda functions not yet supported
-\item \link[dplyr:between]{between}
-\item \link[dplyr:case_when]{case_when}
-\item \link[dplyr:coalesce]{coalesce}
-\item \link[dplyr:if_else]{if_else}
-\item \link[dplyr:context]{n}
-\item \link[dplyr:n_distinct]{n_distinct}
-}
-}
-
-\subsection{lubridate}{
-\itemize{
-\item \link[lubridate:am]{am}
-\item \link[lubridate:as_date]{as_date}
-\item \link[lubridate:as_date]{as_datetime}
-\item \link[lubridate:round_date]{ceiling_date}
-\item \link[lubridate:date]{date}
-\item \link[lubridate:date_decimal]{date_decimal}
-\item \link[lubridate:day]{day}
-\item \link[lubridate:duration]{ddays}
-\item \link[lubridate:decimal_date]{decimal_date}
-\item \link[lubridate:duration]{dhours}
-\item \link[lubridate:duration]{dmicroseconds}
-\item \link[lubridate:duration]{dmilliseconds}
-\item \link[lubridate:duration]{dminutes}
-\item \link[lubridate:duration]{dmonths}
-\item \link[lubridate:ymd]{dmy}
-\item \link[lubridate:ymd_hms]{dmy_h}
-\item \link[lubridate:ymd_hms]{dmy_hm}
-\item \link[lubridate:ymd_hms]{dmy_hms}
-\item \link[lubridate:duration]{dnanoseconds}
-\item \link[lubridate:duration]{dpicoseconds}
-\item \link[lubridate:duration]{dseconds}
-\item \link[lubridate:dst]{dst}
-\item \link[lubridate:duration]{dweeks}
-\item \link[lubridate:duration]{dyears}
-\item \link[lubridate:ymd]{dym}
-\item \link[lubridate:week]{epiweek}
-\item \link[lubridate:year]{epiyear}
-\item \link[lubridate:parse_date_time]{fast_strptime}
-\item \link[lubridate:round_date]{floor_date}
-\item \link[lubridate:format_ISO8601]{format_ISO8601}
-\item \link[lubridate:hour]{hour}
-\item \link[lubridate:date_utils]{is.Date}
-\item \link[lubridate:is.instant]{is.instant}
-\item \link[lubridate:posix_utils]{is.POSIXct}
-\item \link[lubridate:is.instant]{is.timepoint}
-\item \link[lubridate:week]{isoweek}
-\item \link[lubridate:year]{isoyear}
-\item \link[lubridate:leap_year]{leap_year}
-\item \link[lubridate:make_datetime]{make_date}
-\item \link[lubridate:make_datetime]{make_datetime}
-\item \link[lubridate:make_difftime]{make_difftime}
-\item \link[lubridate:day]{mday}
-\item \link[lubridate:ymd]{mdy}
-\item \link[lubridate:ymd_hms]{mdy_h}
-\item \link[lubridate:ymd_hms]{mdy_hm}
-\item \link[lubridate:ymd_hms]{mdy_hms}
-\item \link[lubridate:minute]{minute}
-\item \link[lubridate:month]{month}
-\item \link[lubridate:ymd]{my}
-\item \link[lubridate:ymd]{myd}
-\item \link[lubridate:parse_date_time]{parse_date_time}
-\item \link[lubridate:am]{pm}
-\item \link[lubridate:day]{qday}
-\item \link[lubridate:quarter]{quarter}
-\item \link[lubridate:round_date]{round_date}
-\item \link[lubridate:second]{second}
-\item \link[lubridate:quarter]{semester}
-\item \link[lubridate:tz]{tz}
-\item \link[lubridate:day]{wday}
-\item \link[lubridate:week]{week}
-\item \link[lubridate:day]{yday}
-\item \link[lubridate:ymd]{ydm}
-\item \link[lubridate:ymd_hms]{ydm_h}
-\item \link[lubridate:ymd_hms]{ydm_hm}
-\item \link[lubridate:ymd_hms]{ydm_hms}
-\item \link[lubridate:year]{year}
-\item \link[lubridate:ymd]{ym}
-\item \link[lubridate:ymd]{ymd}
-\item \link[lubridate:ymd_hms]{ymd_h}
-\item \link[lubridate:ymd_hms]{ymd_hm}
-\item \link[lubridate:ymd_hms]{ymd_hms}
-\item \link[lubridate:ymd]{yq}
-}
-}
-
-\subsection{methods}{
-\itemize{
-\item \link[methods:is]{is}
-}
-}
-
-\subsection{rlang}{
-\itemize{
-\item \link[rlang:type-predicates]{is_character}
-\item \link[rlang:type-predicates]{is_double}
-\item \link[rlang:type-predicates]{is_integer}
-\item \link[rlang:type-predicates]{is_list}
-\item \link[rlang:type-predicates]{is_logical}
-}
-}
-
-\subsection{stats}{
-\itemize{
-\item \link[stats:median]{median}
-\item \link[stats:quantile]{quantile}
-\item \link[stats:sd]{sd}
-\item \link[stats:cor]{var}
-}
-}
-
-\subsection{stringi}{
-\itemize{
-\item \link[stringi:stri_reverse]{stri_reverse}
-}
-}
-
-\subsection{stringr}{
-\itemize{
-\item \link[stringr:str_c]{str_c}: the \code{collapse} argument is not yet supported
-\item \link[stringr:str_count]{str_count}
-\item \link[stringr:str_detect]{str_detect}
-\item \link[stringr:str_dup]{str_dup}
-\item \link[stringr:str_starts]{str_ends}
-\item \link[stringr:str_length]{str_length}
-\item \code{str_like}: not yet in a released version of \code{stringr}, but it is supported in \code{arrow}
-\item \link[stringr:str_pad]{str_pad}
-\item \link[stringr:str_replace]{str_replace}
-\item \link[stringr:str_replace]{str_replace_all}
-\item \link[stringr:str_split]{str_split}
-\item \link[stringr:str_starts]{str_starts}
-\item \link[stringr:str_sub]{str_sub}
-\item \link[stringr:case]{str_to_lower}
-\item \link[stringr:case]{str_to_title}
-\item \link[stringr:case]{str_to_upper}
-\item \link[stringr:str_trim]{str_trim}
-}
-}
-
-\subsection{tibble}{
-\itemize{
-\item \link[tibble:tibble]{tibble}
-}
-}
-
-\subsection{tidyselect}{
-\itemize{
-\item \link[tidyselect:all_of]{all_of}
-\item \link[tidyselect:starts_with]{contains}
-\item \link[tidyselect:starts_with]{ends_with}
-\item \link[tidyselect:everything]{everything}
-\item \link[tidyselect:everything]{last_col}
-\item \link[tidyselect:starts_with]{matches}
-\item \link[tidyselect:starts_with]{num_range}
-\item \link[tidyselect:one_of]{one_of}
-\item \link[tidyselect:starts_with]{starts_with}
-}
-}
-}
-

From 8f50f440efe63de96a0c46f625d984f982103013 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Sep 2022 17:48:51 -0400
Subject: [PATCH 7/7] Fix check warning

---
 r/R/dplyr-funcs-augmented.R |  9 ++-------
 r/R/dplyr-funcs-type.R      | 23 +++++++++--------------
 r/R/expression.R            | 11 +++--------
 r/man/cast.Rd               | 13 ++++++-------
 4 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R
index d1359005e37..1067f15573b 100644
--- a/r/R/dplyr-funcs-augmented.R
+++ b/r/R/dplyr-funcs-augmented.R
@@ -20,8 +20,6 @@
 #' This function only exists inside `arrow` `dplyr` queries, and it only is
 #' valid when quering on a `FileSystemDataset`.
 #'
-#' @usage add_filename()
-#'
 #' @return A `FieldRef` `Expression` that refers to the filename augmented
 #' column.
 #' @examples
@@ -30,11 +28,8 @@
 #'   mutate(file = add_filename())
 #' }
 #' @keywords internal
-#' @name add_filename
-NULL
+add_filename <- function() Expression$field_ref("__filename")
 
 register_bindings_augmented <- function() {
-  register_binding("arrow::add_filename", function() {
-    Expression$field_ref("__filename")
-  })
+  register_binding("arrow::add_filename", add_filename)
 }
diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
index 35734ced05d..aa50cdebc5d 100644
--- a/r/R/dplyr-funcs-type.R
+++ b/r/R/dplyr-funcs-type.R
@@ -25,14 +25,12 @@ register_bindings_type <- function() {
 
 #' Change the type of an array or column
 #'
-#' The `cast()` function only exists inside of `arrow` `dplyr` queries. Use it
-#' as a more convenient way of changing the type of a value or field inside of
-#' a `mutate()` call. To cast an `Array` or `ChunkedArray` outside of a query,
-#' call the `$cast()` method on the object, which has the same semantics.
+#' This is a wrapper around the `$cast()` method that many Arrow objects have.
+#' It is more convenient to call inside `dplyr` pipelines than the method.
 #'
-#' @usage cast(x, target_type, safe = TRUE, ...)
-#' @param x an `Expression`
-#' @param target_type [DataType] to cast to
+#' @param x an `Array`, `Table`, `Expression`, or similar Arrow data object.
+#' @param to [DataType] to cast to; for [Table] and [RecordBatch],
+#' it should be a [Schema].
 #' @param safe logical: only allow the type conversion if no data is lost
 #' (truncation, overflow, etc.). Default is `TRUE`
 #' @param ... specific `CastOptions` to set
@@ -45,17 +43,14 @@ register_bindings_type <- function() {
 #'   mutate(cyl = cast(cyl, string()))
 #' }
 #' @keywords internal
-#' @name cast
 #' @seealso https://arrow.apache.org/docs/cpp/api/compute.html for the list of
 #' supported CastOptions.
-NULL
+cast <- function(x, to, safe = TRUE, ...) {
+  x$cast(to, safe = safe, ...)
+}
 
 register_bindings_type_cast <- function() {
-  register_binding("arrow::cast", function(x, target_type, safe = TRUE, ...) {
-    opts <- cast_options(safe, ...)
-    opts$to_type <- as_type(target_type)
-    Expression$create("cast", x, options = opts)
-  })
+  register_binding("arrow::cast", cast)
 
   # as.* type casting functions
   # as.factor() is mapped in expression.R
diff --git a/r/R/expression.R b/r/R/expression.R
index 09a8ea24608..7a5a600d956 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -76,7 +76,6 @@
   "lubridate::yday" = "day_of_year",
   "lubridate::year" = "year",
   "lubridate::leap_year" = "is_leap_year"
-
 )
 
 .binary_function_map <- list(
@@ -158,13 +157,9 @@ Expression <- R6Class("Expression",
       compute___expr__type_id(self, schema)
     },
     cast = function(to_type, safe = TRUE, ...) {
-      opts <- list(
-        to_type = to_type,
-        allow_int_overflow = !safe,
-        allow_time_truncate = !safe,
-        allow_float_truncate = !safe
-      )
-      Expression$create("cast", self, options = modifyList(opts, list(...)))
+      opts <- cast_options(safe, ...)
+      opts$to_type <- as_type(to_type)
+      Expression$create("cast", self, options = opts)
     }
   ),
   active = list(
diff --git a/r/man/cast.Rd b/r/man/cast.Rd
index 81abfa6567d..88134f2e022 100644
--- a/r/man/cast.Rd
+++ b/r/man/cast.Rd
@@ -4,12 +4,13 @@
 \alias{cast}
 \title{Change the type of an array or column}
 \usage{
-cast(x, target_type, safe = TRUE, ...)
+cast(x, to, safe = TRUE, ...)
 }
 \arguments{
-\item{x}{an \code{Expression}}
+\item{x}{an \code{Array}, \code{Table}, \code{Expression}, or similar Arrow data object.}
 
-\item{target_type}{\link{DataType} to cast to}
+\item{to}{\link{DataType} to cast to; for \link{Table} and \link{RecordBatch},
+it should be a \link{Schema}.}
 
 \item{safe}{logical: only allow the type conversion if no data is lost
 (truncation, overflow, etc.). Default is \code{TRUE}}
@@ -20,10 +21,8 @@ cast(x, target_type, safe = TRUE, ...)
 an \code{Expression}
 }
 \description{
-The \code{cast()} function only exists inside of \code{arrow} \code{dplyr} queries. Use it
-as a more convenient way of changing the type of a value or field inside of
-a \code{mutate()} call. To cast an \code{Array} or \code{ChunkedArray} outside of a query,
-call the \verb{$cast()} method on the object, which has the same semantics.
+This is a wrapper around the \verb{$cast()} method that many Arrow objects have.
+It is more convenient to call inside \code{dplyr} pipelines than the method.
 }
 \examples{
 \dontrun{