diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 7ae6a8de29f..7b60f0c510a 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -103,6 +103,7 @@ Collate:
     'dplyr-funcs-augmented.R'
     'dplyr-funcs-conditional.R'
     'dplyr-funcs-datetime.R'
+    'dplyr-funcs-doc.R'
     'dplyr-funcs-math.R'
     'dplyr-funcs-string.R'
     'dplyr-funcs-type.R'
diff --git a/r/Makefile b/r/Makefile
index 1ddbe595dd2..cb76b4c9775 100644
--- a/r/Makefile
+++ b/r/Makefile
@@ -26,6 +26,7 @@ style-all:
 	R -s -e 'styler::style_file(setdiff(dir(pattern = "R$$", recursive = TRUE), source(".styler_excludes.R")$$value))'
 
 doc: style
+	R -s -f data-raw/docgen.R
 	R -s -e 'roxygen2::roxygenize()'
 	-git add --all man/*.Rd
 
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 53fb0280a50..e6b3f481e21 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -31,25 +31,50 @@
 #' @keywords internal
 "_PACKAGE"
 
+# TODO(ARROW-17666): Include notes about features not supported here.
+supported_dplyr_methods <- list(
+  select = NULL,
+  filter = NULL,
+  collect = NULL,
+  summarise = NULL,
+  group_by = NULL,
+  groups = NULL,
+  group_vars = NULL,
+  group_by_drop_default = NULL,
+  ungroup = NULL,
+  mutate = NULL,
+  transmute = NULL,
+  arrange = NULL,
+  rename = NULL,
+  pull = NULL,
+  relocate = NULL,
+  compute = NULL,
+  collapse = NULL,
+  distinct = NULL,
+  left_join = NULL,
+  right_join = NULL,
+  inner_join = NULL,
+  full_join = NULL,
+  semi_join = NULL,
+  anti_join = NULL,
+  count = NULL,
+  tally = NULL,
+  rename_with = NULL,
+  union = NULL,
+  union_all = NULL,
+  glimpse = NULL,
+  show_query = NULL,
+  explain = NULL
+)
+
 #' @importFrom vctrs s3_register vec_size vec_cast vec_unique
 .onLoad <- function(...) {
   # Make sure C++ knows on which thread it is safe to call the R API
   InitializeMainRThread()
 
-  dplyr_methods <- paste0(
-    "dplyr::",
-    c(
-      "select", "filter", "collect", "summarise", "group_by", "groups",
-      "group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute",
-      "arrange", "rename", "pull", "relocate", "compute", "collapse",
-      "distinct", "left_join", "right_join", "inner_join", "full_join",
-      "semi_join", "anti_join", "count", "tally", "rename_with", "union",
-      "union_all", "glimpse", "show_query", "explain"
-    )
-  )
   for (cl in c("Dataset", "ArrowTabular", "RecordBatchReader", "arrow_dplyr_query")) {
-    for (m in dplyr_methods) {
-      s3_register(m, cl)
+    for (m in names(supported_dplyr_methods)) {
+      s3_register(paste0("dplyr::", m), cl)
     }
   }
   s3_register("dplyr::tbl_vars", "arrow_dplyr_query")
diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R
index 6e751d49f61..1067f15573b 100644
--- a/r/R/dplyr-funcs-augmented.R
+++ b/r/R/dplyr-funcs-augmented.R
@@ -15,8 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.
 
+#' Add the data filename as a column
+#'
+#' This function only exists inside `arrow` `dplyr` queries, and it only is
+#' valid when quering on a `FileSystemDataset`.
+#'
+#' @return A `FieldRef` `Expression` that refers to the filename augmented
+#' column.
+#' @examples
+#' \dontrun{
+#' open_dataset("nyc-taxi") %>%
+#'   mutate(file = add_filename())
+#' }
+#' @keywords internal
+add_filename <- function() Expression$field_ref("__filename")
+
 register_bindings_augmented <- function() {
-  register_binding("add_filename", function() {
-    Expression$field_ref("__filename")
-  })
+  register_binding("arrow::add_filename", add_filename)
 }
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 9a010452b84..6106adbc5e4 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -649,55 +649,54 @@ register_bindings_datetime_parsers <- function() {
 
     build_expr("assume_timezone", coalesce_output, options = list(timezone = tz))
   })
-
 }
 
 register_bindings_datetime_rounding <- function() {
   register_binding(
-    "round_date",
+    "lubridate::round_date",
     function(x,
              unit = "second",
              week_start = getOption("lubridate.week.start", 7)) {
+      opts <- parse_period_unit(unit)
+      if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
+        return(shift_temporal_to_week("round_temporal", x, week_start, options = opts))
+      }
 
-    opts <- parse_period_unit(unit)
-    if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
-      return(shift_temporal_to_week("round_temporal", x, week_start, options = opts))
+      Expression$create("round_temporal", x, options = opts)
     }
-
-    Expression$create("round_temporal", x, options = opts)
-  })
+  )
 
   register_binding(
-    "floor_date",
+    "lubridate::floor_date",
     function(x,
              unit = "second",
              week_start = getOption("lubridate.week.start", 7)) {
+      opts <- parse_period_unit(unit)
+      if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
+        return(shift_temporal_to_week("floor_temporal", x, week_start, options = opts))
+      }
 
-    opts <- parse_period_unit(unit)
-    if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
-      return(shift_temporal_to_week("floor_temporal", x, week_start, options = opts))
+      Expression$create("floor_temporal", x, options = opts)
     }
-
-    Expression$create("floor_temporal", x, options = opts)
-  })
+  )
 
   register_binding(
-    "ceiling_date",
+    "lubridate::ceiling_date",
     function(x,
              unit = "second",
              change_on_boundary = NULL,
              week_start = getOption("lubridate.week.start", 7)) {
-    opts <- parse_period_unit(unit)
-    if (is.null(change_on_boundary)) {
-      change_on_boundary <- ifelse(call_binding("is.Date", x), TRUE, FALSE)
-    }
-    opts$ceil_is_strictly_greater <- change_on_boundary
-
-    if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
-      return(shift_temporal_to_week("ceil_temporal", x, week_start, options = opts))
-    }
+      opts <- parse_period_unit(unit)
+      if (is.null(change_on_boundary)) {
+        change_on_boundary <- ifelse(call_binding("is.Date", x), TRUE, FALSE)
+      }
+      opts$ceil_is_strictly_greater <- change_on_boundary
 
-    Expression$create("ceil_temporal", x, options = opts)
-  })
+      if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
+        return(shift_temporal_to_week("ceil_temporal", x, week_start, options = opts))
+      }
 
+      Expression$create("ceil_temporal", x, options = opts)
+    }
+  )
 }
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
new file mode 100644
index 00000000000..cac0310f49b
--- /dev/null
+++ b/r/R/dplyr-funcs-doc.R
@@ -0,0 +1,332 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Generated by using data-raw/docgen.R -> do not edit by hand
+
+#' Functions available in Arrow dplyr queries
+#'
+#' The `arrow` package contains methods for 32 `dplyr` table functions, many of
+#' which are "verbs" that do transformations to one or more tables.
+#' The package also has mappings of 205 R functions to the corresponding
+#' functions in the Arrow compute library. These allow you to write code inside
+#' of `dplyr` methods that call R functions, including many in packages like
+#' `stringr` and `lubridate`, and they will get translated to Arrow and run
+#' on the Arrow query engine (Acero). This document lists all of the mapped
+#' functions.
+#'
+#' # `dplyr` verbs
+#'
+#' Most verb functions return an `arrow_dplyr_query` object, similar in spirit
+#' to a `dbplyr::tbl_lazy`. This means that the verbs do not eagerly evaluate
+#' the query on the data. To run the query, call either `compute()`,
+#' which returns an `arrow` [Table], or `collect()`, which pulls the resulting
+#' Table into an R `data.frame`.
+#'
+#' * [`anti_join()`][dplyr::anti_join()]
+#' * [`arrange()`][dplyr::arrange()]
+#' * [`collapse()`][dplyr::collapse()]
+#' * [`collect()`][dplyr::collect()]
+#' * [`compute()`][dplyr::compute()]
+#' * [`count()`][dplyr::count()]
+#' * [`distinct()`][dplyr::distinct()]
+#' * [`explain()`][dplyr::explain()]
+#' * [`filter()`][dplyr::filter()]
+#' * [`full_join()`][dplyr::full_join()]
+#' * [`glimpse()`][dplyr::glimpse()]
+#' * [`group_by()`][dplyr::group_by()]
+#' * [`group_by_drop_default()`][dplyr::group_by_drop_default()]
+#' * [`group_vars()`][dplyr::group_vars()]
+#' * [`groups()`][dplyr::groups()]
+#' * [`inner_join()`][dplyr::inner_join()]
+#' * [`left_join()`][dplyr::left_join()]
+#' * [`mutate()`][dplyr::mutate()]
+#' * [`pull()`][dplyr::pull()]
+#' * [`relocate()`][dplyr::relocate()]
+#' * [`rename()`][dplyr::rename()]
+#' * [`rename_with()`][dplyr::rename_with()]
+#' * [`right_join()`][dplyr::right_join()]
+#' * [`select()`][dplyr::select()]
+#' * [`semi_join()`][dplyr::semi_join()]
+#' * [`show_query()`][dplyr::show_query()]
+#' * [`summarise()`][dplyr::summarise()]
+#' * [`tally()`][dplyr::tally()]
+#' * [`transmute()`][dplyr::transmute()]
+#' * [`ungroup()`][dplyr::ungroup()]
+#' * [`union()`][dplyr::union()]
+#' * [`union_all()`][dplyr::union_all()]
+#'
+#' # Function mappings
+#'
+#' In the list below, any differences in behavior or support between Acero and
+#' the R function are listed. If no notes follow the function name, then you
+#' can assume that the function works in Acero just as it does in R.
+#'
+#' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
+#' `str_sub()` and `stringr::str_sub()` work.
+#'
+#' In addition to these functions, you can call any of Arrow's 243 compute
+#' functions directly. Arrow has many functions that don't map to an existing R
+#' function. In other cases where there is an R function mapping, you can still
+#' call the Arrow function directly if you don't want the adaptations that the R
+#' mapping has that make Acero behave like R. These functions are listed in the
+#' [C++ documentation](https://arrow.apache.org/docs/cpp/compute.html), and
+#' in the function registry in R, they are named with an `arrow_` prefix, such
+#' as `arrow_ascii_is_decimal`.
+#'
+#' ## arrow
+#' 
+#' * [`add_filename()`][arrow::add_filename()]
+#' * [`cast()`][arrow::cast()]
+#'
+#' ## base
+#' 
+#' * [`-`][-()]
+#' * [`!`][!()]
+#' * [`!=`][!=()]
+#' * [`*`][*()]
+#' * [`/`][/()]
+#' * [`&`][&()]
+#' * [`%/%`][%/%()]
+#' * [`%%`][%%()]
+#' * [`%in%`][%in%()]
+#' * [`^`][^()]
+#' * [`+`][+()]
+#' * [`<`][<()]
+#' * [`<=`][<=()]
+#' * [`==`][==()]
+#' * [`>`][>()]
+#' * [`>=`][>=()]
+#' * [`|`][|()]
+#' * [`abs()`][base::abs()]
+#' * [`acos()`][base::acos()]
+#' * [`all()`][base::all()]
+#' * [`any()`][base::any()]
+#' * [`as.character()`][base::as.character()]
+#' * [`as.Date()`][base::as.Date()]
+#' * [`as.difftime()`][base::as.difftime()]
+#' * [`as.double()`][base::as.double()]
+#' * [`as.integer()`][base::as.integer()]
+#' * [`as.logical()`][base::as.logical()]
+#' * [`as.numeric()`][base::as.numeric()]
+#' * [`asin()`][base::asin()]
+#' * [`ceiling()`][base::ceiling()]
+#' * [`cos()`][base::cos()]
+#' * [`data.frame()`][base::data.frame()]
+#' * [`difftime()`][base::difftime()]
+#' * [`endsWith()`][base::endsWith()]
+#' * [`exp()`][base::exp()]
+#' * [`floor()`][base::floor()]
+#' * [`format()`][base::format()]
+#' * [`grepl()`][base::grepl()]
+#' * [`gsub()`][base::gsub()]
+#' * [`ifelse()`][base::ifelse()]
+#' * [`is.character()`][base::is.character()]
+#' * [`is.double()`][base::is.double()]
+#' * [`is.factor()`][base::is.factor()]
+#' * [`is.finite()`][base::is.finite()]
+#' * [`is.infinite()`][base::is.infinite()]
+#' * [`is.integer()`][base::is.integer()]
+#' * [`is.list()`][base::is.list()]
+#' * [`is.logical()`][base::is.logical()]
+#' * [`is.na()`][base::is.na()]
+#' * [`is.nan()`][base::is.nan()]
+#' * [`is.numeric()`][base::is.numeric()]
+#' * [`ISOdate()`][base::ISOdate()]
+#' * [`ISOdatetime()`][base::ISOdatetime()]
+#' * [`log()`][base::log()]
+#' * [`log10()`][base::log10()]
+#' * [`log1p()`][base::log1p()]
+#' * [`log2()`][base::log2()]
+#' * [`logb()`][base::logb()]
+#' * [`max()`][base::max()]
+#' * [`mean()`][base::mean()]
+#' * [`min()`][base::min()]
+#' * [`nchar()`][base::nchar()]
+#' * [`paste()`][base::paste()]: the `collapse` argument is not yet supported
+#' * [`paste0()`][base::paste0()]: the `collapse` argument is not yet supported
+#' * [`pmax()`][base::pmax()]
+#' * [`pmin()`][base::pmin()]
+#' * [`round()`][base::round()]
+#' * [`sign()`][base::sign()]
+#' * [`sin()`][base::sin()]
+#' * [`sqrt()`][base::sqrt()]
+#' * [`startsWith()`][base::startsWith()]
+#' * [`strftime()`][base::strftime()]
+#' * [`strptime()`][base::strptime()]
+#' * [`strrep()`][base::strrep()]
+#' * [`strsplit()`][base::strsplit()]
+#' * [`sub()`][base::sub()]
+#' * [`substr()`][base::substr()]
+#' * [`substring()`][base::substring()]
+#' * [`sum()`][base::sum()]
+#' * [`tan()`][base::tan()]
+#' * [`tolower()`][base::tolower()]
+#' * [`toupper()`][base::toupper()]
+#' * [`trunc()`][base::trunc()]
+#'
+#' ## bit64
+#' 
+#' * [`as.integer64()`][bit64::as.integer64()]
+#' * [`is.integer64()`][bit64::is.integer64()]
+#'
+#' ## dplyr
+#' 
+#' * [`across()`][dplyr::across()]: only supported inside `mutate()`, `summarize()`, and `arrange()`; purrr-style lambda functions and use of `where()` selection helper not yet supported
+#' * [`between()`][dplyr::between()]
+#' * [`case_when()`][dplyr::case_when()]
+#' * [`coalesce()`][dplyr::coalesce()]
+#' * [`desc()`][dplyr::desc()]
+#' * [`if_else()`][dplyr::if_else()]
+#' * [`n()`][dplyr::n()]
+#' * [`n_distinct()`][dplyr::n_distinct()]
+#'
+#' ## lubridate
+#' 
+#' * [`am()`][lubridate::am()]
+#' * [`as_date()`][lubridate::as_date()]
+#' * [`as_datetime()`][lubridate::as_datetime()]
+#' * [`ceiling_date()`][lubridate::ceiling_date()]
+#' * [`date()`][lubridate::date()]
+#' * [`date_decimal()`][lubridate::date_decimal()]
+#' * [`day()`][lubridate::day()]
+#' * [`ddays()`][lubridate::ddays()]
+#' * [`decimal_date()`][lubridate::decimal_date()]
+#' * [`dhours()`][lubridate::dhours()]
+#' * [`dmicroseconds()`][lubridate::dmicroseconds()]
+#' * [`dmilliseconds()`][lubridate::dmilliseconds()]
+#' * [`dminutes()`][lubridate::dminutes()]
+#' * [`dmonths()`][lubridate::dmonths()]
+#' * [`dmy()`][lubridate::dmy()]
+#' * [`dmy_h()`][lubridate::dmy_h()]
+#' * [`dmy_hm()`][lubridate::dmy_hm()]
+#' * [`dmy_hms()`][lubridate::dmy_hms()]
+#' * [`dnanoseconds()`][lubridate::dnanoseconds()]
+#' * [`dpicoseconds()`][lubridate::dpicoseconds()]
+#' * [`dseconds()`][lubridate::dseconds()]
+#' * [`dst()`][lubridate::dst()]
+#' * [`dweeks()`][lubridate::dweeks()]
+#' * [`dyears()`][lubridate::dyears()]
+#' * [`dym()`][lubridate::dym()]
+#' * [`epiweek()`][lubridate::epiweek()]
+#' * [`epiyear()`][lubridate::epiyear()]
+#' * [`fast_strptime()`][lubridate::fast_strptime()]
+#' * [`floor_date()`][lubridate::floor_date()]
+#' * [`format_ISO8601()`][lubridate::format_ISO8601()]
+#' * [`hour()`][lubridate::hour()]
+#' * [`is.Date()`][lubridate::is.Date()]
+#' * [`is.instant()`][lubridate::is.instant()]
+#' * [`is.POSIXct()`][lubridate::is.POSIXct()]
+#' * [`is.timepoint()`][lubridate::is.timepoint()]
+#' * [`isoweek()`][lubridate::isoweek()]
+#' * [`isoyear()`][lubridate::isoyear()]
+#' * [`leap_year()`][lubridate::leap_year()]
+#' * [`make_date()`][lubridate::make_date()]
+#' * [`make_datetime()`][lubridate::make_datetime()]
+#' * [`make_difftime()`][lubridate::make_difftime()]
+#' * [`mday()`][lubridate::mday()]
+#' * [`mdy()`][lubridate::mdy()]
+#' * [`mdy_h()`][lubridate::mdy_h()]
+#' * [`mdy_hm()`][lubridate::mdy_hm()]
+#' * [`mdy_hms()`][lubridate::mdy_hms()]
+#' * [`minute()`][lubridate::minute()]
+#' * [`month()`][lubridate::month()]
+#' * [`my()`][lubridate::my()]
+#' * [`myd()`][lubridate::myd()]
+#' * [`parse_date_time()`][lubridate::parse_date_time()]
+#' * [`pm()`][lubridate::pm()]
+#' * [`qday()`][lubridate::qday()]
+#' * [`quarter()`][lubridate::quarter()]
+#' * [`round_date()`][lubridate::round_date()]
+#' * [`second()`][lubridate::second()]
+#' * [`semester()`][lubridate::semester()]
+#' * [`tz()`][lubridate::tz()]
+#' * [`wday()`][lubridate::wday()]
+#' * [`week()`][lubridate::week()]
+#' * [`yday()`][lubridate::yday()]
+#' * [`ydm()`][lubridate::ydm()]
+#' * [`ydm_h()`][lubridate::ydm_h()]
+#' * [`ydm_hm()`][lubridate::ydm_hm()]
+#' * [`ydm_hms()`][lubridate::ydm_hms()]
+#' * [`year()`][lubridate::year()]
+#' * [`ym()`][lubridate::ym()]
+#' * [`ymd()`][lubridate::ymd()]
+#' * [`ymd_h()`][lubridate::ymd_h()]
+#' * [`ymd_hm()`][lubridate::ymd_hm()]
+#' * [`ymd_hms()`][lubridate::ymd_hms()]
+#' * [`yq()`][lubridate::yq()]
+#'
+#' ## methods
+#' 
+#' * [`is()`][methods::is()]
+#'
+#' ## rlang
+#' 
+#' * [`is_character()`][rlang::is_character()]
+#' * [`is_double()`][rlang::is_double()]
+#' * [`is_integer()`][rlang::is_integer()]
+#' * [`is_list()`][rlang::is_list()]
+#' * [`is_logical()`][rlang::is_logical()]
+#'
+#' ## stats
+#' 
+#' * [`median()`][stats::median()]
+#' * [`quantile()`][stats::quantile()]
+#' * [`sd()`][stats::sd()]
+#' * [`var()`][stats::var()]
+#'
+#' ## stringi
+#' 
+#' * [`stri_reverse()`][stringi::stri_reverse()]
+#'
+#' ## stringr
+#' 
+#' * [`str_c()`][stringr::str_c()]: the `collapse` argument is not yet supported
+#' * [`str_count()`][stringr::str_count()]
+#' * [`str_detect()`][stringr::str_detect()]
+#' * [`str_dup()`][stringr::str_dup()]
+#' * [`str_ends()`][stringr::str_ends()]
+#' * [`str_length()`][stringr::str_length()]
+#' * `str_like()`: not yet in a released version of `stringr`, but it is supported in `arrow`
+#' * [`str_pad()`][stringr::str_pad()]
+#' * [`str_replace()`][stringr::str_replace()]
+#' * [`str_replace_all()`][stringr::str_replace_all()]
+#' * [`str_split()`][stringr::str_split()]
+#' * [`str_starts()`][stringr::str_starts()]
+#' * [`str_sub()`][stringr::str_sub()]
+#' * [`str_to_lower()`][stringr::str_to_lower()]
+#' * [`str_to_title()`][stringr::str_to_title()]
+#' * [`str_to_upper()`][stringr::str_to_upper()]
+#' * [`str_trim()`][stringr::str_trim()]
+#'
+#' ## tibble
+#' 
+#' * [`tibble()`][tibble::tibble()]
+#'
+#' ## tidyselect
+#' 
+#' * [`all_of()`][tidyselect::all_of()]
+#' * [`contains()`][tidyselect::contains()]
+#' * [`ends_with()`][tidyselect::ends_with()]
+#' * [`everything()`][tidyselect::everything()]
+#' * [`last_col()`][tidyselect::last_col()]
+#' * [`matches()`][tidyselect::matches()]
+#' * [`num_range()`][tidyselect::num_range()]
+#' * [`one_of()`][tidyselect::one_of()]
+#' * [`starts_with()`][tidyselect::starts_with()]
+#'
+#' @name acero
+NULL
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index b300d7c439e..eb2326ed056 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -161,32 +161,44 @@ register_bindings_string_join <- function() {
     }
   }
 
-  register_binding("base::paste", function(..., sep = " ", collapse = NULL, recycle0 = FALSE) {
-    assert_that(
-      is.null(collapse),
-      msg = "paste() with the collapse argument is not yet supported in Arrow"
-    )
-    if (!inherits(sep, "Expression")) {
-      assert_that(!is.na(sep), msg = "Invalid separator")
-    }
-    arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
-  })
-
-  register_binding("base::paste0", function(..., collapse = NULL, recycle0 = FALSE) {
-    assert_that(
-      is.null(collapse),
-      msg = "paste0() with the collapse argument is not yet supported in Arrow"
-    )
-    arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
-  })
-
-  register_binding("stringr::str_c", function(..., sep = "", collapse = NULL) {
-    assert_that(
-      is.null(collapse),
-      msg = "str_c() with the collapse argument is not yet supported in Arrow"
-    )
-    arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
-  })
+  register_binding(
+    "base::paste",
+    function(..., sep = " ", collapse = NULL, recycle0 = FALSE) {
+      assert_that(
+        is.null(collapse),
+        msg = "paste() with the collapse argument is not yet supported in Arrow"
+      )
+      if (!inherits(sep, "Expression")) {
+        assert_that(!is.na(sep), msg = "Invalid separator")
+      }
+      arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+    },
+    notes = "the `collapse` argument is not yet supported"
+  )
+
+  register_binding(
+    "base::paste0",
+    function(..., collapse = NULL, recycle0 = FALSE) {
+      assert_that(
+        is.null(collapse),
+        msg = "paste0() with the collapse argument is not yet supported in Arrow"
+      )
+      arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+    },
+    notes = "the `collapse` argument is not yet supported"
+  )
+
+  register_binding(
+    "stringr::str_c",
+    function(..., sep = "", collapse = NULL) {
+      assert_that(
+        is.null(collapse),
+        msg = "str_c() with the collapse argument is not yet supported in Arrow"
+      )
+      arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+    },
+    notes = "the `collapse` argument is not yet supported"
+  )
 }
 
 register_bindings_string_regex <- function() {
@@ -227,15 +239,17 @@ register_bindings_string_regex <- function() {
     out
   })
 
-  register_binding("stringr::str_like", function(string,
-                                                 pattern,
-                                                 ignore_case = TRUE) {
-    Expression$create(
-      "match_like",
-      string,
-      options = list(pattern = pattern, ignore_case = ignore_case)
-    )
-  })
+  register_binding(
+    "stringr::str_like",
+    function(string, pattern, ignore_case = TRUE) {
+      Expression$create(
+        "match_like",
+        string,
+        options = list(pattern = pattern, ignore_case = ignore_case)
+      )
+    },
+    notes = "not yet in a released version of `stringr`, but it is supported in `arrow`"
+  )
 
   register_binding("stringr::str_count", function(string, pattern) {
     opts <- get_stringr_pattern_options(enexpr(pattern))
@@ -337,7 +351,7 @@ register_bindings_string_regex <- function() {
   register_binding("stringr::str_replace_all", arrow_stringr_string_replace_function(-1L))
 
   register_binding("base::strsplit", function(x, split, fixed = FALSE, perl = FALSE,
-                                        useBytes = FALSE) {
+                                              useBytes = FALSE) {
     assert_that(is.string(split))
 
     arrow_fun <- ifelse(fixed, "split_pattern", "split_pattern_regex")
diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
index 9925d0347f7..aa50cdebc5d 100644
--- a/r/R/dplyr-funcs-type.R
+++ b/r/R/dplyr-funcs-type.R
@@ -23,23 +23,34 @@ register_bindings_type <- function() {
   register_bindings_type_format()
 }
 
-register_bindings_type_cast <- function() {
-  register_binding("cast", function(x, target_type, safe = TRUE, ...) {
-    opts <- cast_options(safe, ...)
-    opts$to_type <- as_type(target_type)
-    Expression$create("cast", x, options = opts)
-  })
+#' Change the type of an array or column
+#'
+#' This is a wrapper around the `$cast()` method that many Arrow objects have.
+#' It is more convenient to call inside `dplyr` pipelines than the method.
+#'
+#' @param x an `Array`, `Table`, `Expression`, or similar Arrow data object.
+#' @param to [DataType] to cast to; for [Table] and [RecordBatch],
+#' it should be a [Schema].
+#' @param safe logical: only allow the type conversion if no data is lost
+#' (truncation, overflow, etc.). Default is `TRUE`
+#' @param ... specific `CastOptions` to set
+#' @return an `Expression`
+#'
+#' @examples
+#' \dontrun{
+#' mtcars %>%
+#'   arrow_table() %>%
+#'   mutate(cyl = cast(cyl, string()))
+#' }
+#' @keywords internal
+#' @seealso https://arrow.apache.org/docs/cpp/api/compute.html for the list of
+#' supported CastOptions.
+cast <- function(x, to, safe = TRUE, ...) {
+  x$cast(to, safe = safe, ...)
+}
 
-  register_binding("dictionary_encode", function(x,
-                                                 null_encoding_behavior = c("mask", "encode")) {
-    behavior <- toupper(match.arg(null_encoding_behavior))
-    null_encoding_behavior <- NullEncodingBehavior[[behavior]]
-    Expression$create(
-      "dictionary_encode",
-      x,
-      options = list(null_encoding_behavior = null_encoding_behavior)
-    )
-  })
+register_bindings_type_cast <- function() {
+  register_binding("arrow::cast", cast)
 
   # as.* type casting functions
   # as.factor() is mapped in expression.R
diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R
index 4dadff54b48..a66db112d98 100644
--- a/r/R/dplyr-funcs.R
+++ b/r/R/dplyr-funcs.R
@@ -59,13 +59,17 @@ NULL
 #'   summarise) because the data mask has to be a list.
 #' @param registry An environment in which the functions should be
 #'   assigned.
-#'
+#' @param notes string for the docs: note any limitations or differences in
+#'   behavior between the Arrow version and the R function.
 #' @return The previously registered binding or `NULL` if no previously
 #'   registered function existed.
 #' @keywords internal
 #'
-register_binding <- function(fun_name, fun, registry = nse_funcs,
-                             update_cache = FALSE) {
+register_binding <- function(fun_name,
+                             fun,
+                             registry = nse_funcs,
+                             update_cache = FALSE,
+                             notes = character(0)) {
   unqualified_name <- sub("^.*?:{+}", "", fun_name)
 
   previous_fun <- registry[[unqualified_name]]
@@ -76,7 +80,8 @@ register_binding <- function(fun_name, fun, registry = nse_funcs,
       paste0(
         "A \"",
         unqualified_name,
-        "\" binding already exists in the registry and will be overwritten.")
+        "\" binding already exists in the registry and will be overwritten."
+      )
     )
   }
 
@@ -85,6 +90,8 @@ register_binding <- function(fun_name, fun, registry = nse_funcs,
   registry[[unqualified_name]] <- fun
   registry[[fun_name]] <- fun
 
+  .cache$docs[[fun_name]] <- notes
+
   if (update_cache) {
     fun_cache <- .cache$functions
     fun_cache[[unqualified_name]] <- fun
@@ -131,7 +138,7 @@ call_binding_agg <- function(fun_name, ...) {
 
 # Called in .onLoad()
 create_binding_cache <- function() {
-  arrow_funcs <- list()
+  .cache$docs <- list()
 
   # Register all available Arrow Compute functions, namespaced as arrow_fun.
   all_arrow_funs <- list_compute_functions()
diff --git a/r/R/expression.R b/r/R/expression.R
index 09a8ea24608..7a5a600d956 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -76,7 +76,6 @@
   "lubridate::yday" = "day_of_year",
   "lubridate::year" = "year",
   "lubridate::leap_year" = "is_leap_year"
-
 )
 
 .binary_function_map <- list(
@@ -158,13 +157,9 @@ Expression <- R6Class("Expression",
       compute___expr__type_id(self, schema)
     },
     cast = function(to_type, safe = TRUE, ...) {
-      opts <- list(
-        to_type = to_type,
-        allow_int_overflow = !safe,
-        allow_time_truncate = !safe,
-        allow_float_truncate = !safe
-      )
-      Expression$create("cast", self, options = modifyList(opts, list(...)))
+      opts <- cast_options(safe, ...)
+      opts$to_type <- as_type(to_type)
+      Expression$create("cast", self, options = opts)
     }
   ),
   active = list(
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index dfb0998ddff..70bd7ac518c 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -216,6 +216,7 @@ reference:
       - codec_is_available
   - title: Computation
     contents:
+      - acero
       - call_function
       - match_arrow
       - value_counts
diff --git a/r/data-raw/docgen.R b/r/data-raw/docgen.R
new file mode 100644
index 00000000000..ef39bec272f
--- /dev/null
+++ b/r/data-raw/docgen.R
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This code generates dplyr-funcs-doc.R.
+# It requires that the package be installed.
+
+file_template <- "# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# \"License\"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Generated by using data-raw/docgen.R -> do not edit by hand
+
+#' Functions available in Arrow dplyr queries
+#'
+#' The `arrow` package contains methods for %s `dplyr` table functions, many of
+#' which are \"verbs\" that do transformations to one or more tables.
+#' The package also has mappings of %s R functions to the corresponding
+#' functions in the Arrow compute library. These allow you to write code inside
+#' of `dplyr` methods that call R functions, including many in packages like
+#' `stringr` and `lubridate`, and they will get translated to Arrow and run
+#' on the Arrow query engine (Acero). This document lists all of the mapped
+#' functions.
+#'
+#' # `dplyr` verbs
+#'
+#' Most verb functions return an `arrow_dplyr_query` object, similar in spirit
+#' to a `dbplyr::tbl_lazy`. This means that the verbs do not eagerly evaluate
+#' the query on the data. To run the query, call either `compute()`,
+#' which returns an `arrow` [Table], or `collect()`, which pulls the resulting
+#' Table into an R `data.frame`.
+#'
+%s
+#'
+#' # Function mappings
+#'
+#' In the list below, any differences in behavior or support between Acero and
+#' the R function are listed. If no notes follow the function name, then you
+#' can assume that the function works in Acero just as it does in R.
+#'
+#' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
+#' `str_sub()` and `stringr::str_sub()` work.
+#'
+#' In addition to these functions, you can call any of Arrow's %s compute
+#' functions directly. Arrow has many functions that don't map to an existing R
+#' function. In other cases where there is an R function mapping, you can still
+#' call the Arrow function directly if you don't want the adaptations that the R
+#' mapping has that make Acero behave like R. These functions are listed in the
+#' [C++ documentation](https://arrow.apache.org/docs/cpp/compute.html), and
+#' in the function registry in R, they are named with an `arrow_` prefix, such
+#' as `arrow_ascii_is_decimal`.
+#'
+%s
+#'
+#' @name acero
+NULL"
+
+library(dplyr)
+library(purrr)
+
+# Functions that for whatever reason cause xref problems, so don't hyperlink
+do_not_link <- c(
+  "stringr::str_like" # Still only in the unreleased version
+)
+
+# Vectorized function to make entries for each function
+render_fun <- function(fun, pkg_fun, notes) {
+  # Add () to fun if it's not an operator
+  not_operators <- grepl("^[[:alpha:]]", fun)
+  fun[not_operators] <- paste0(fun[not_operators], "()")
+  # Make it \code{} for better formatting
+  fun <- paste0("`", fun, "`")
+  # Wrap in \link{}
+  out <- ifelse(
+    pkg_fun %in% do_not_link,
+    fun,
+    paste0("[", fun, "][", pkg_fun, "()]")
+  )
+  # Add notes after :, if exist
+  has_notes <- nzchar(notes)
+  out[has_notes] <- paste0(out[has_notes], ": ", notes[has_notes])
+  # Make bullets
+  paste("*", out)
+}
+
+# This renders a bulleted list under a package heading
+render_pkg <- function(df, pkg) {
+  bullets <- df %>%
+    transmute(render_fun(fun, pkg_fun, notes)) %>%
+    pull()
+  # Add header
+  bullets <- c(
+    paste("##", pkg),
+    "",
+    bullets
+  )
+  paste("#'", bullets, collapse = "\n")
+}
+
+docs <- arrow:::.cache$docs
+
+# Add some functions
+
+# across() is handled by manipulating the quosures, not by nse_funcs
+docs[["dplyr::across"]] <- c(
+  # TODO(ARROW-17387, ARROW-17389, ARROW-17390)
+  "only supported inside `mutate()`, `summarize()`, and `arrange()`;",
+  # TODO(ARROW-17366)
+  "purrr-style lambda functions",
+  "and use of `where()` selection helper not yet supported"
+)
+# desc() is a special helper handled inside of arrange()
+docs[["dplyr::desc"]] <- character(0)
+
+# add tidyselect helpers by parsing the reexports file
+tidyselect <- grep("^tidyselect::", readLines("R/reexports-tidyselect.R"), value = TRUE)
+
+docs <- c(docs, setNames(rep(list(NULL), length(tidyselect)), tidyselect))
+
+fun_df <- tibble::tibble(
+  pkg_fun = names(docs),
+  notes = docs
+) %>%
+  mutate(
+    has_pkg = grepl("::", pkg_fun),
+    fun = sub("^.*?:{+}", "", pkg_fun),
+    pkg = sub(":{+}.*$", "", pkg_fun),
+    # We will list operators under "base" (everything else must be pkg::fun)
+    pkg = if_else(has_pkg, pkg, "base"),
+    # Flatten notes to a single string
+    notes = map_chr(notes, ~ paste(., collapse = " "))
+  ) %>%
+  arrange(pkg, fun)
+
+# Group by package name and render the lists
+fun_doclets <- imap_chr(split(fun_df, fun_df$pkg), render_pkg)
+
+dplyr_verbs <- c(
+  arrow:::supported_dplyr_methods,
+  # Because this only has a method for arrow_dplyr_query, it's not in the main list
+  tbl_vars = NULL
+)
+
+verb_bullets <- tibble::tibble(
+  fun = names(dplyr_verbs),
+  notes = dplyr_verbs
+) %>%
+  mutate(
+    pkg_fun = paste0("dplyr::", fun),
+    notes = map_chr(notes, ~ paste(., collapse = " "))
+  ) %>%
+  arrange(fun) %>%
+  transmute(render_fun(fun, pkg_fun, notes)) %>%
+  pull()
+
+writeLines(
+  sprintf(
+    file_template,
+    length(dplyr_verbs),
+    length(docs),
+    paste("#'", verb_bullets, collapse = "\n"),
+    length(arrow::list_compute_functions()),
+    paste(fun_doclets, collapse = "\n#'\n")
+  ),
+  "R/dplyr-funcs-doc.R"
+)
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
new file mode 100644
index 00000000000..5b5920f386e
--- /dev/null
+++ b/r/man/acero.Rd
@@ -0,0 +1,339 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-funcs-doc.R
+\name{acero}
+\alias{acero}
+\title{Functions available in Arrow dplyr queries}
+\description{
+The \code{arrow} package contains methods for 32 \code{dplyr} table functions, many of
+which are "verbs" that do transformations to one or more tables.
+The package also has mappings of 205 R functions to the corresponding
+functions in the Arrow compute library. These allow you to write code inside
+of \code{dplyr} methods that call R functions, including many in packages like
+\code{stringr} and \code{lubridate}, and they will get translated to Arrow and run
+on the Arrow query engine (Acero). This document lists all of the mapped
+functions.
+}
+\section{\code{dplyr} verbs}{
+Most verb functions return an \code{arrow_dplyr_query} object, similar in spirit
+to a \code{dbplyr::tbl_lazy}. This means that the verbs do not eagerly evaluate
+the query on the data. To run the query, call either \code{compute()},
+which returns an \code{arrow} \link{Table}, or \code{collect()}, which pulls the resulting
+Table into an R \code{data.frame}.
+\itemize{
+\item \code{\link[dplyr:filter-joins]{anti_join()}}
+\item \code{\link[dplyr:arrange]{arrange()}}
+\item \code{\link[dplyr:compute]{collapse()}}
+\item \code{\link[dplyr:compute]{collect()}}
+\item \code{\link[dplyr:compute]{compute()}}
+\item \code{\link[dplyr:count]{count()}}
+\item \code{\link[dplyr:distinct]{distinct()}}
+\item \code{\link[dplyr:explain]{explain()}}
+\item \code{\link[dplyr:filter]{filter()}}
+\item \code{\link[dplyr:mutate-joins]{full_join()}}
+\item \code{\link[dplyr:glimpse]{glimpse()}}
+\item \code{\link[dplyr:group_by]{group_by()}}
+\item \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}}
+\item \code{\link[dplyr:group_data]{group_vars()}}
+\item \code{\link[dplyr:group_data]{groups()}}
+\item \code{\link[dplyr:mutate-joins]{inner_join()}}
+\item \code{\link[dplyr:mutate-joins]{left_join()}}
+\item \code{\link[dplyr:mutate]{mutate()}}
+\item \code{\link[dplyr:pull]{pull()}}
+\item \code{\link[dplyr:relocate]{relocate()}}
+\item \code{\link[dplyr:rename]{rename()}}
+\item \code{\link[dplyr:rename]{rename_with()}}
+\item \code{\link[dplyr:mutate-joins]{right_join()}}
+\item \code{\link[dplyr:select]{select()}}
+\item \code{\link[dplyr:filter-joins]{semi_join()}}
+\item \code{\link[dplyr:explain]{show_query()}}
+\item \code{\link[dplyr:summarise]{summarise()}}
+\item \code{\link[dplyr:count]{tally()}}
+\item \code{\link[dplyr:mutate]{transmute()}}
+\item \code{\link[dplyr:group_by]{ungroup()}}
+\item \code{\link[dplyr:reexports]{union()}}
+\item \code{\link[dplyr:setops]{union_all()}}
+}
+}
+
+\section{Function mappings}{
+In the list below, any differences in behavior or support between Acero and
+the R function are listed. If no notes follow the function name, then you
+can assume that the function works in Acero just as it does in R.
+
+Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. both
+\code{str_sub()} and \code{stringr::str_sub()} work.
+
+In addition to these functions, you can call any of Arrow's 243 compute
+functions directly. Arrow has many functions that don't map to an existing R
+function. In other cases where there is an R function mapping, you can still
+call the Arrow function directly if you don't want the adaptations that the R
+mapping has that make Acero behave like R. These functions are listed in the
+\href{https://arrow.apache.org/docs/cpp/compute.html}{C++ documentation}, and
+in the function registry in R, they are named with an \code{arrow_} prefix, such
+as \code{arrow_ascii_is_decimal}.
+\subsection{arrow}{
+\itemize{
+\item \code{\link[=add_filename]{add_filename()}}
+\item \code{\link[=cast]{cast()}}
+}
+}
+
+\subsection{base}{
+\itemize{
+\item \code{\link[=-]{-}}
+\item \code{\link[=!]{!}}
+\item \code{\link[=!=]{!=}}
+\item \code{\link[=*]{*}}
+\item \code{\link[=/]{/}}
+\item \code{\link[=&]{&}}
+\item \code{\link[=\%/\%]{\%/\%}}
+\item \code{\link[=\%\%]{\%\%}}
+\item \code{\link[=\%in\%]{\%in\%}}
+\item \code{\link[=^]{^}}
+\item \code{\link[=+]{+}}
+\item \code{\link[=<]{<}}
+\item \code{\link[=<=]{<=}}
+\item \code{\link[===]{==}}
+\item \code{\link[=>]{>}}
+\item \code{\link[=>=]{>=}}
+\item \code{\link[=|]{|}}
+\item \code{\link[base:MathFun]{abs()}}
+\item \code{\link[base:Trig]{acos()}}
+\item \code{\link[base:all]{all()}}
+\item \code{\link[base:any]{any()}}
+\item \code{\link[base:character]{as.character()}}
+\item \code{\link[base:as.Date]{as.Date()}}
+\item \code{\link[base:difftime]{as.difftime()}}
+\item \code{\link[base:double]{as.double()}}
+\item \code{\link[base:integer]{as.integer()}}
+\item \code{\link[base:logical]{as.logical()}}
+\item \code{\link[base:numeric]{as.numeric()}}
+\item \code{\link[base:Trig]{asin()}}
+\item \code{\link[base:Round]{ceiling()}}
+\item \code{\link[base:Trig]{cos()}}
+\item \code{\link[base:data.frame]{data.frame()}}
+\item \code{\link[base:difftime]{difftime()}}
+\item \code{\link[base:startsWith]{endsWith()}}
+\item \code{\link[base:Log]{exp()}}
+\item \code{\link[base:Round]{floor()}}
+\item \code{\link[base:format]{format()}}
+\item \code{\link[base:grep]{grepl()}}
+\item \code{\link[base:grep]{gsub()}}
+\item \code{\link[base:ifelse]{ifelse()}}
+\item \code{\link[base:character]{is.character()}}
+\item \code{\link[base:double]{is.double()}}
+\item \code{\link[base:factor]{is.factor()}}
+\item \code{\link[base:is.finite]{is.finite()}}
+\item \code{\link[base:is.finite]{is.infinite()}}
+\item \code{\link[base:integer]{is.integer()}}
+\item \code{\link[base:list]{is.list()}}
+\item \code{\link[base:logical]{is.logical()}}
+\item \code{\link[base:NA]{is.na()}}
+\item \code{\link[base:is.finite]{is.nan()}}
+\item \code{\link[base:numeric]{is.numeric()}}
+\item \code{\link[base:ISOdatetime]{ISOdate()}}
+\item \code{\link[base:ISOdatetime]{ISOdatetime()}}
+\item \code{\link[base:Log]{log()}}
+\item \code{\link[base:Log]{log10()}}
+\item \code{\link[base:Log]{log1p()}}
+\item \code{\link[base:Log]{log2()}}
+\item \code{\link[base:Log]{logb()}}
+\item \code{\link[base:Extremes]{max()}}
+\item \code{\link[base:mean]{mean()}}
+\item \code{\link[base:Extremes]{min()}}
+\item \code{\link[base:nchar]{nchar()}}
+\item \code{\link[base:paste]{paste()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[base:paste]{paste0()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[base:Extremes]{pmax()}}
+\item \code{\link[base:Extremes]{pmin()}}
+\item \code{\link[base:Round]{round()}}
+\item \code{\link[base:sign]{sign()}}
+\item \code{\link[base:Trig]{sin()}}
+\item \code{\link[base:MathFun]{sqrt()}}
+\item \code{\link[base:startsWith]{startsWith()}}
+\item \code{\link[base:strptime]{strftime()}}
+\item \code{\link[base:strptime]{strptime()}}
+\item \code{\link[base:strrep]{strrep()}}
+\item \code{\link[base:strsplit]{strsplit()}}
+\item \code{\link[base:grep]{sub()}}
+\item \code{\link[base:substr]{substr()}}
+\item \code{\link[base:substr]{substring()}}
+\item \code{\link[base:sum]{sum()}}
+\item \code{\link[base:Trig]{tan()}}
+\item \code{\link[base:chartr]{tolower()}}
+\item \code{\link[base:chartr]{toupper()}}
+\item \code{\link[base:Round]{trunc()}}
+}
+}
+
+\subsection{bit64}{
+\itemize{
+\item \code{\link[bit64:as.integer64.character]{as.integer64()}}
+\item \code{\link[bit64:bit64-package]{is.integer64()}}
+}
+}
+
+\subsection{dplyr}{
+\itemize{
+\item \code{\link[dplyr:across]{across()}}: only supported inside \code{mutate()}, \code{summarize()}, and \code{arrange()}; purrr-style lambda functions and use of \code{where()} selection helper not yet supported
+\item \code{\link[dplyr:between]{between()}}
+\item \code{\link[dplyr:case_when]{case_when()}}
+\item \code{\link[dplyr:coalesce]{coalesce()}}
+\item \code{\link[dplyr:desc]{desc()}}
+\item \code{\link[dplyr:if_else]{if_else()}}
+\item \code{\link[dplyr:context]{n()}}
+\item \code{\link[dplyr:n_distinct]{n_distinct()}}
+}
+}
+
+\subsection{lubridate}{
+\itemize{
+\item \code{\link[lubridate:am]{am()}}
+\item \code{\link[lubridate:as_date]{as_date()}}
+\item \code{\link[lubridate:as_date]{as_datetime()}}
+\item \code{\link[lubridate:round_date]{ceiling_date()}}
+\item \code{\link[lubridate:date]{date()}}
+\item \code{\link[lubridate:date_decimal]{date_decimal()}}
+\item \code{\link[lubridate:day]{day()}}
+\item \code{\link[lubridate:duration]{ddays()}}
+\item \code{\link[lubridate:decimal_date]{decimal_date()}}
+\item \code{\link[lubridate:duration]{dhours()}}
+\item \code{\link[lubridate:duration]{dmicroseconds()}}
+\item \code{\link[lubridate:duration]{dmilliseconds()}}
+\item \code{\link[lubridate:duration]{dminutes()}}
+\item \code{\link[lubridate:duration]{dmonths()}}
+\item \code{\link[lubridate:ymd]{dmy()}}
+\item \code{\link[lubridate:ymd_hms]{dmy_h()}}
+\item \code{\link[lubridate:ymd_hms]{dmy_hm()}}
+\item \code{\link[lubridate:ymd_hms]{dmy_hms()}}
+\item \code{\link[lubridate:duration]{dnanoseconds()}}
+\item \code{\link[lubridate:duration]{dpicoseconds()}}
+\item \code{\link[lubridate:duration]{dseconds()}}
+\item \code{\link[lubridate:dst]{dst()}}
+\item \code{\link[lubridate:duration]{dweeks()}}
+\item \code{\link[lubridate:duration]{dyears()}}
+\item \code{\link[lubridate:ymd]{dym()}}
+\item \code{\link[lubridate:week]{epiweek()}}
+\item \code{\link[lubridate:year]{epiyear()}}
+\item \code{\link[lubridate:parse_date_time]{fast_strptime()}}
+\item \code{\link[lubridate:round_date]{floor_date()}}
+\item \code{\link[lubridate:format_ISO8601]{format_ISO8601()}}
+\item \code{\link[lubridate:hour]{hour()}}
+\item \code{\link[lubridate:date_utils]{is.Date()}}
+\item \code{\link[lubridate:is.instant]{is.instant()}}
+\item \code{\link[lubridate:posix_utils]{is.POSIXct()}}
+\item \code{\link[lubridate:is.instant]{is.timepoint()}}
+\item \code{\link[lubridate:week]{isoweek()}}
+\item \code{\link[lubridate:year]{isoyear()}}
+\item \code{\link[lubridate:leap_year]{leap_year()}}
+\item \code{\link[lubridate:make_datetime]{make_date()}}
+\item \code{\link[lubridate:make_datetime]{make_datetime()}}
+\item \code{\link[lubridate:make_difftime]{make_difftime()}}
+\item \code{\link[lubridate:day]{mday()}}
+\item \code{\link[lubridate:ymd]{mdy()}}
+\item \code{\link[lubridate:ymd_hms]{mdy_h()}}
+\item \code{\link[lubridate:ymd_hms]{mdy_hm()}}
+\item \code{\link[lubridate:ymd_hms]{mdy_hms()}}
+\item \code{\link[lubridate:minute]{minute()}}
+\item \code{\link[lubridate:month]{month()}}
+\item \code{\link[lubridate:ymd]{my()}}
+\item \code{\link[lubridate:ymd]{myd()}}
+\item \code{\link[lubridate:parse_date_time]{parse_date_time()}}
+\item \code{\link[lubridate:am]{pm()}}
+\item \code{\link[lubridate:day]{qday()}}
+\item \code{\link[lubridate:quarter]{quarter()}}
+\item \code{\link[lubridate:round_date]{round_date()}}
+\item \code{\link[lubridate:second]{second()}}
+\item \code{\link[lubridate:quarter]{semester()}}
+\item \code{\link[lubridate:tz]{tz()}}
+\item \code{\link[lubridate:day]{wday()}}
+\item \code{\link[lubridate:week]{week()}}
+\item \code{\link[lubridate:day]{yday()}}
+\item \code{\link[lubridate:ymd]{ydm()}}
+\item \code{\link[lubridate:ymd_hms]{ydm_h()}}
+\item \code{\link[lubridate:ymd_hms]{ydm_hm()}}
+\item \code{\link[lubridate:ymd_hms]{ydm_hms()}}
+\item \code{\link[lubridate:year]{year()}}
+\item \code{\link[lubridate:ymd]{ym()}}
+\item \code{\link[lubridate:ymd]{ymd()}}
+\item \code{\link[lubridate:ymd_hms]{ymd_h()}}
+\item \code{\link[lubridate:ymd_hms]{ymd_hm()}}
+\item \code{\link[lubridate:ymd_hms]{ymd_hms()}}
+\item \code{\link[lubridate:ymd]{yq()}}
+}
+}
+
+\subsection{methods}{
+\itemize{
+\item \code{\link[methods:is]{is()}}
+}
+}
+
+\subsection{rlang}{
+\itemize{
+\item \code{\link[rlang:type-predicates]{is_character()}}
+\item \code{\link[rlang:type-predicates]{is_double()}}
+\item \code{\link[rlang:type-predicates]{is_integer()}}
+\item \code{\link[rlang:type-predicates]{is_list()}}
+\item \code{\link[rlang:type-predicates]{is_logical()}}
+}
+}
+
+\subsection{stats}{
+\itemize{
+\item \code{\link[stats:median]{median()}}
+\item \code{\link[stats:quantile]{quantile()}}
+\item \code{\link[stats:sd]{sd()}}
+\item \code{\link[stats:cor]{var()}}
+}
+}
+
+\subsection{stringi}{
+\itemize{
+\item \code{\link[stringi:stri_reverse]{stri_reverse()}}
+}
+}
+
+\subsection{stringr}{
+\itemize{
+\item \code{\link[stringr:str_c]{str_c()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[stringr:str_count]{str_count()}}
+\item \code{\link[stringr:str_detect]{str_detect()}}
+\item \code{\link[stringr:str_dup]{str_dup()}}
+\item \code{\link[stringr:str_starts]{str_ends()}}
+\item \code{\link[stringr:str_length]{str_length()}}
+\item \code{str_like()}: not yet in a released version of \code{stringr}, but it is supported in \code{arrow}
+\item \code{\link[stringr:str_pad]{str_pad()}}
+\item \code{\link[stringr:str_replace]{str_replace()}}
+\item \code{\link[stringr:str_replace]{str_replace_all()}}
+\item \code{\link[stringr:str_split]{str_split()}}
+\item \code{\link[stringr:str_starts]{str_starts()}}
+\item \code{\link[stringr:str_sub]{str_sub()}}
+\item \code{\link[stringr:case]{str_to_lower()}}
+\item \code{\link[stringr:case]{str_to_title()}}
+\item \code{\link[stringr:case]{str_to_upper()}}
+\item \code{\link[stringr:str_trim]{str_trim()}}
+}
+}
+
+\subsection{tibble}{
+\itemize{
+\item \code{\link[tibble:tibble]{tibble()}}
+}
+}
+
+\subsection{tidyselect}{
+\itemize{
+\item \code{\link[tidyselect:all_of]{all_of()}}
+\item \code{\link[tidyselect:starts_with]{contains()}}
+\item \code{\link[tidyselect:starts_with]{ends_with()}}
+\item \code{\link[tidyselect:everything]{everything()}}
+\item \code{\link[tidyselect:everything]{last_col()}}
+\item \code{\link[tidyselect:starts_with]{matches()}}
+\item \code{\link[tidyselect:starts_with]{num_range()}}
+\item \code{\link[tidyselect:one_of]{one_of()}}
+\item \code{\link[tidyselect:starts_with]{starts_with()}}
+}
+}
+}
+
diff --git a/r/man/add_filename.Rd b/r/man/add_filename.Rd
new file mode 100644
index 00000000000..ca7ed0e4b17
--- /dev/null
+++ b/r/man/add_filename.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-funcs-augmented.R
+\name{add_filename}
+\alias{add_filename}
+\title{Add the data filename as a column}
+\usage{
+add_filename()
+}
+\value{
+A \code{FieldRef} \code{Expression} that refers to the filename augmented
+column.
+}
+\description{
+This function only exists inside \code{arrow} \code{dplyr} queries, and it only is
+valid when quering on a \code{FileSystemDataset}.
+}
+\examples{
+\dontrun{
+open_dataset("nyc-taxi") \%>\%
+  mutate(file = add_filename())
+}
+}
+\keyword{internal}
diff --git a/r/man/cast.Rd b/r/man/cast.Rd
new file mode 100644
index 00000000000..88134f2e022
--- /dev/null
+++ b/r/man/cast.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-funcs-type.R
+\name{cast}
+\alias{cast}
+\title{Change the type of an array or column}
+\usage{
+cast(x, to, safe = TRUE, ...)
+}
+\arguments{
+\item{x}{an \code{Array}, \code{Table}, \code{Expression}, or similar Arrow data object.}
+
+\item{to}{\link{DataType} to cast to; for \link{Table} and \link{RecordBatch},
+it should be a \link{Schema}.}
+
+\item{safe}{logical: only allow the type conversion if no data is lost
+(truncation, overflow, etc.). Default is \code{TRUE}}
+
+\item{...}{specific \code{CastOptions} to set}
+}
+\value{
+an \code{Expression}
+}
+\description{
+This is a wrapper around the \verb{$cast()} method that many Arrow objects have.
+It is more convenient to call inside \code{dplyr} pipelines than the method.
+}
+\examples{
+\dontrun{
+mtcars \%>\%
+  arrow_table() \%>\%
+  mutate(cyl = cast(cyl, string()))
+}
+}
+\seealso{
+https://arrow.apache.org/docs/cpp/api/compute.html for the list of
+supported CastOptions.
+}
+\keyword{internal}
diff --git a/r/man/register_binding.Rd b/r/man/register_binding.Rd
index c53df707516..d2a4a380543 100644
--- a/r/man/register_binding.Rd
+++ b/r/man/register_binding.Rd
@@ -4,7 +4,13 @@
 \alias{register_binding}
 \title{Register compute bindings}
 \usage{
-register_binding(fun_name, fun, registry = nse_funcs, update_cache = FALSE)
+register_binding(
+  fun_name,
+  fun,
+  registry = nse_funcs,
+  update_cache = FALSE,
+  notes = character(0)
+)
 }
 \arguments{
 \item{fun_name}{A string containing a function name in the form \code{"function"} or
@@ -26,6 +32,9 @@ non-aggregate functions could be revisited...it is currently used
 as the data mask in mutate, filter, and aggregate (but not
 summarise) because the data mask has to be a list.}
 
+\item{notes}{string for the docs: note any limitations or differences in
+behavior between the Arrow version and the R function.}
+
 \item{agg_fun}{An aggregate function or \code{NULL} to un-register a previous
 aggregate function. This function must accept \code{Expression} objects as
 arguments and return a \code{list()} with components: