diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index 4306f644082..1d9e41bba7a 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -25,6 +25,8 @@ arch=("any")
 url="https://arrow.apache.org/"
 license=("Apache-2.0")
 depends=("${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp"
+         "${MINGW_PACKAGE_PREFIX}-libutf8proc"
+         "${MINGW_PACKAGE_PREFIX}-re2"
          "${MINGW_PACKAGE_PREFIX}-thrift"
          "${MINGW_PACKAGE_PREFIX}-snappy"
          "${MINGW_PACKAGE_PREFIX}-zlib"
@@ -103,9 +105,7 @@ build() {
     -DARROW_SNAPPY_USE_SHARED=OFF \
     -DARROW_USE_GLOG=OFF \
     -DARROW_WITH_LZ4=ON \
-    -DARROW_WITH_RE2=OFF \
     -DARROW_WITH_SNAPPY=ON \
-    -DARROW_WITH_UTF8PROC=OFF \
     -DARROW_WITH_ZLIB=ON \
     -DARROW_WITH_ZSTD=ON \
     -DARROW_ZSTD_USE_SHARED=OFF \
diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh
index cb33e676a7d..be03b75f5ad 100755
--- a/ci/scripts/r_windows_build.sh
+++ b/ci/scripts/r_windows_build.sh
@@ -96,8 +96,8 @@ cp $MSYS_LIB_DIR/mingw64/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/x6
 cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/i386
 
 # These are from https://dl.bintray.com/rtools/mingw{32,64}/
-cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,aws*}.a $DST_DIR/lib/x64
-cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,aws*}.a $DST_DIR/lib/i386
+cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64
+cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/i386
 
 # Create build artifact
 zip -r ${DST_DIR}.zip $DST_DIR
diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
index 8779ad19570..351d7764603 100644
--- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
@@ -57,9 +57,7 @@ def install
       -DARROW_USE_GLOG=OFF
       -DARROW_VERBOSE_THIRDPARTY_BUILD=ON
       -DARROW_WITH_LZ4=ON
-      -DARROW_WITH_RE2=OFF
       -DARROW_WITH_SNAPPY=ON
-      -DARROW_WITH_UTF8PROC=OFF
       -DARROW_WITH_ZLIB=ON
       -DARROW_WITH_ZSTD=ON
       -DCMAKE_UNITY_BUILD=OFF
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 145a5aeef9b..f37e6a4e84f 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -46,6 +46,7 @@ Suggests:
     pkgload,
     reticulate,
     rmarkdown,
+    stringr,
     testthat,
     tibble
 LinkingTo: cpp11 (>= 0.2.0)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index fbc71e9edf0..54061128ac7 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -284,6 +284,7 @@ importFrom(rlang,is_false)
 importFrom(rlang,is_integerish)
 importFrom(rlang,list2)
 importFrom(rlang,new_data_mask)
+importFrom(rlang,new_environment)
 importFrom(rlang,quo_is_null)
 importFrom(rlang,quos)
 importFrom(rlang,set_names)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index fd3f8b47856..66694a97867 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -18,7 +18,7 @@
 #' @importFrom R6 R6Class
 #' @importFrom purrr as_mapper map map2 map_chr map_dfr map_int map_lgl keep
 #' @importFrom assertthat assert_that is.string
-#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env env_bind as_label set_names exec is_bare_character
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character
 #' @importFrom tidyselect vars_select
 #' @useDynLib arrow, .registration = TRUE
 #' @keywords internal
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index ec0aae94f30..3d0f31ce8f3 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -296,6 +296,10 @@ compute__CallFunction <- function(func_name, args, options){
     .Call(`_arrow_compute__CallFunction`, func_name, args, options)
 }
 
+list_compute_functions <- function(){
+    .Call(`_arrow_list_compute_functions`)
+}
+
 csv___ReadOptions__initialize <- function(options){
     .Call(`_arrow_csv___ReadOptions__initialize`, options)
 }
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 8bc64ce089d..f9df04d5172 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -238,31 +238,50 @@ filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) {
 }
 filter.Dataset <- filter.ArrowTabular <- filter.arrow_dplyr_query
 
+# Helper to assemble the functions that go in the NSE data mask
+# The only difference between the Dataset and the Table/RecordBatch versions
+# is that they use a different wrapping function (FUN) to hold the unevaluated
+# expression.
+build_function_list <- function(FUN) {
+  wrapper <- function(operator) {
+    force(operator)
+    function(e1, e2) FUN(operator, e1, e2)
+  }
+
+  c(
+    lapply(set_names(names(.array_function_map)), wrapper),
+    # TODO: lapply also for the arrow spellings?
+    # See list_compute_functions()
+    # (would want to do these first, and then modifyList with the R ones
+    # in case of name collision)
+    # Would need to generalize FUN to accept ... args
+    str_trim = function(string, side = c("both", "left", "right")) {
+      side <- match.arg(side)
+      switch(
+        side,
+        left = FUN("utf8_ltrim_whitespace", string),
+        right = FUN("utf8_rtrim_whitespace", string),
+        both = FUN("utf8_trim_whitespace", string)
+      )
+    }
+  )
+}
+
+# Create these once, at package build time
+dataset_function_list <- build_function_list(build_dataset_expression)
+array_function_list <- build_function_list(build_array_expression)
+
 # Create a data mask for evaluating a filter expression
 filter_mask <- function(.data) {
-  f_env <- env()
-
-  # Insert functions/operators and field references
-  # TODO: define functions in env once, outside of this function
-  # filter_env <- env(parent = if (data_is_dataset) function_env1 else function_env2)
   if (query_on_dataset(.data)) {
-    comp_func <- function(operator) {
-      force(operator)
-      function(e1, e2) build_dataset_expression(operator, e1, e2)
-    }
+    f_env <- new_environment(dataset_function_list)
     var_binder <- function(x) Expression$field_ref(x)
   } else {
-    comp_func <- function(operator) {
-      force(operator)
-      function(e1, e2) build_array_expression(operator, e1, e2)
-    }
+    f_env <- new_environment(array_function_list)
     var_binder <- function(x) .data$.data[[x]]
   }
 
-  # First add the functions
-  func_names <- set_names(names(.array_function_map))
-  env_bind(f_env, !!!lapply(func_names, comp_func))
-  # Then add the column references
+  # Add the column references
   # Renaming is handled automatically by the named list
   data_pronoun <- lapply(.data$selected_columns, var_binder)
   env_bind(f_env, !!!data_pronoun)
diff --git a/r/R/expression.R b/r/R/expression.R
index 5475f7a44bc..878b800c652 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -51,8 +51,8 @@ Ops.array_expression <- function(e1, e2) {
 }
 
 build_array_expression <- function(.Generic, e1, e2, ...) {
-  if (.Generic %in% names(.unary_function_map)) {
-    expr <- array_expression(.unary_function_map[[.Generic]], e1)
+  if (.Generic %in% names(.unary_function_map) || nargs() == 2L) {
+    expr <- array_expression(.unary_function_map[[.Generic]] %||% .Generic, e1)
   } else {
     e1 <- .wrap_arrow(e1, .Generic)
     e2 <- .wrap_arrow(e2, .Generic)
@@ -79,7 +79,7 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
       return(build_array_expression("-", e1, base))
     }
 
-    expr <- array_expression(.binary_function_map[[.Generic]], e1, e2, ...)
+    expr <- array_expression(.binary_function_map[[.Generic]] %||% .Generic, e1, e2, ...)
   }
   expr
 }
@@ -110,7 +110,14 @@ cast_array_expression <- function(x, to_type, safe = TRUE, ...) {
 .unary_function_map <- list(
   "!" = "invert",
   "is.na" = "is_null",
-  "is.nan" = "is_nan"
+  "is.nan" = "is_nan",
+  "nchar" = "binary_length",
+  "tolower" = "utf8_lower",
+  "toupper" = "utf8_upper",
+  # stringr spellings of those
+  "str_length" = "binary_length",
+  "str_to_lower" = "utf8_lower",
+  "str_to_upper" = "utf8_upper"
 )
 
 .binary_function_map <- list(
@@ -228,8 +235,8 @@ Expression$scalar <- function(x) {
 }
 
 build_dataset_expression <- function(.Generic, e1, e2, ...) {
-  if (.Generic %in% names(.unary_function_map)) {
-    expr <- Expression$create(.unary_function_map[[.Generic]], e1)
+  if (.Generic %in% names(.unary_function_map) || nargs() == 2L) {
+    expr <- Expression$create(.unary_function_map[[.Generic]] %||% .Generic, e1)
   } else if (.Generic == "%in%") {
     # Special-case %in%, which is different from the Array function name
     expr <- Expression$create("is_in", e1,
@@ -260,7 +267,7 @@ build_dataset_expression <- function(.Generic, e1, e2, ...) {
       return(e1 - e2 * ( e1 %/% e2 ))
     }
 
-    expr <- Expression$create(.binary_function_map[[.Generic]], e1, e2, ...)
+    expr <- Expression$create(.binary_function_map[[.Generic]] %||% .Generic, e1, e2, ...)
   }
   expr
 }
diff --git a/r/configure.win b/r/configure.win
index 32d90ce8303..80529e702ac 100644
--- a/r/configure.win
+++ b/r/configure.win
@@ -50,7 +50,7 @@ AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-man
 # NOTE: If you make changes to the libraries below, you should also change
 # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD
 PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_DS_STATIC -DARROW_R_WITH_ARROW"
-PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '"-L${RWINLIB}/lib"'$(R_ARCH) '"-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lthrift -lsnappy -lz -lzstd -llz4 ${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
+PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '"-L${RWINLIB}/lib"'$(R_ARCH) '"-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lutf8proc -lre2 -lthrift -lsnappy -lz -lzstd -llz4 ${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
 
 # S3 support only for Rtools40 (i.e. R >= 4.0)
 "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e 'R.version$major >= 4' | grep TRUE >/dev/null 2>&1
diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index 3000f5826f3..61dd5930de0 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -61,9 +61,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-$ARROW_DEFAULT_PARAM} \
     -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-$ARROW_DEFAULT_PARAM} \
     -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-$ARROW_DEFAULT_PARAM} \
-    -DARROW_WITH_RE2=OFF \
     -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-$ARROW_DEFAULT_PARAM} \
-    -DARROW_WITH_UTF8PROC=OFF \
     -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-$ARROW_DEFAULT_PARAM} \
     -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-$ARROW_DEFAULT_PARAM} \
     -DCMAKE_BUILD_TYPE=Release \
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 2fbfecacfa1..839c9d6c173 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -642,6 +642,13 @@ BEGIN_CPP11
 	return cpp11::as_sexp(compute__CallFunction(func_name, args, options));
 END_CPP11
 }
+// compute.cpp
+std::vector<std::string> list_compute_functions();
+extern "C" SEXP _arrow_list_compute_functions(){
+BEGIN_CPP11
+	return cpp11::as_sexp(list_compute_functions());
+END_CPP11
+}
 // csv.cpp
 std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(cpp11::list options);
 extern "C" SEXP _arrow_csv___ReadOptions__initialize(SEXP options_sexp){
@@ -3583,6 +3590,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
 		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
+		{ "_arrow_list_compute_functions", (DL_FUNC) &_arrow_list_compute_functions, 0}, 
 		{ "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, 
 		{ "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, 
 		{ "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, 
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 2d69d8029c6..7bcded78f0d 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -199,4 +199,9 @@ SEXP compute__CallFunction(std::string func_name, cpp11::list args, cpp11::list
   return from_datum(std::move(out));
 }
 
+// [[arrow::export]]
+std::vector<std::string> list_compute_functions() {
+  return arrow::compute::GetFunctionRegistry()->GetFunctionNames();
+}
+
 #endif
diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R
index 15ea0fca31f..5fac5481f26 100644
--- a/r/tests/testthat/helper-data.R
+++ b/r/tests/testthat/helper-data.R
@@ -63,6 +63,49 @@ example_with_times <- tibble::tibble(
   posixlt_tz = as.POSIXlt(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "US/Eastern") + 1:10)
 )
 
+verses <- list(
+  # Since we tend to test with dataframes with 10 rows, here are verses from
+  # "Milonga del moro judío", by Jorge Drexler. They are décimas, 10-line
+  # poems with a particular meter and rhyme scheme.
+  # (They also have non-ASCII characters, which is nice for testing)
+  c(
+    "Por cada muro, un lamento",
+    "En Jerusalén la dorada",
+    "Y mil vidas malgastadas",
+    "Por cada mandamiento",
+    "Yo soy polvo de tu viento",
+    "Y aunque sangro de tu herida",
+    "Y cada piedra querida",
+    "Guarda mi amor más profundo",
+    "No hay una piedra en el mundo",
+    "Que valga lo que una vida"
+  ),
+  c(
+    "No hay muerto que no me duela",
+    "No hay un bando ganador",
+    "No hay nada más que dolor",
+    "Y otra vida que se vuela",
+    "La guerra es muy mala escuela",
+    "No importa el disfraz que viste",
+    "Perdonen que no me aliste",
+    "Bajo ninguna bandera",
+    "Vale más cualquier quimera",
+    "Que un trozo de tela triste"
+  ),
+  c(
+    "Y a nadie le di permiso",
+    "Para matar en mi nombre",
+    "Un hombre no es más que un hombre",
+    "Y si hay Dios, así lo quiso",
+    "El mismo suelo que piso",
+    "Seguirá, yo me habré ido",
+    "Rumbo también del olvido",
+    "No hay doctrina que no vaya",
+    "Y no hay pueblo que no se haya",
+    "Creído el pueblo elegido"
+  )
+)
+
 make_big_string <- function() {
   # This creates a character vector that would exceed the capacity of BinaryArray
   rep(purrr::map_chr(2047:2050, ~paste(sample(letters, ., replace = TRUE), collapse = "")), 2^18)
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 990f024212e..e84eb12b08a 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -492,7 +492,7 @@ test_that("filter() with %in%", {
     tibble(int = df1$int[c(3, 4, 6)], part = 1)
   )
 
-# ARROW-9606: bug in %in% filter on partition column with >1 partition columns
+  # ARROW-9606: bug in %in% filter on partition column with >1 partition columns
   ds <- open_dataset(hive_dir)
   expect_equivalent(
     ds %>%
@@ -503,6 +503,25 @@ test_that("filter() with %in%", {
   )
 })
 
+test_that("filter() with strings", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  expect_equivalent(
+    ds %>%
+      select(chr, part) %>%
+      filter(chr == "b", part == 1) %>%
+      collect(),
+    tibble(chr = "b", part = 1)
+  )
+
+  expect_equivalent(
+    ds %>%
+      select(chr, part) %>%
+      filter(toupper(chr) == "B", part == 1) %>%
+      collect(),
+    tibble(chr = "b", part = 1)
+  )
+})
+
 test_that("filter() with .data", {
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
   expect_equivalent(
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index a80e17c6f3e..3082019ed98 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -18,6 +18,7 @@
 context("dplyr verbs")
 
 library(dplyr)
+library(stringr)
 
 expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its start
                                tbl,  # A tbl/df as reference, will make RB/Table with
@@ -83,6 +84,11 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star
 }
 
 tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
 
 test_that("basic select/filter/collect", {
   batch <- record_batch(tbl)
@@ -256,6 +262,50 @@ test_that("filter() with %in%", {
   )
 })
 
+test_that("filter() with string ops", {
+  # Extra instrumentation to ensure that we're calling Arrow compute here
+  # because many base R string functions implicitly call as.character,
+  # which means they still work on Arrays but actually force data into R
+  # 1) wrapper that raises a warning if as.character is called. Can't wrap
+  #    the whole test because as.character apparently gets called in other
+  #    (presumably legitimate) places
+  # 2) Wrap the test in expect_warning(expr, NA) to catch the warning
+
+  with_no_as_character <- function(expr) {
+    trace(
+      "as.character",
+      tracer = quote(warning("as.character was called")),
+      print = FALSE,
+      where = toupper
+    )
+    on.exit(untrace("as.character", where = toupper))
+    force(expr)
+  }
+
+  expect_warning(
+    expect_dplyr_equal(
+      input %>%
+        filter(dbl > 2, with_no_as_character(toupper(chr)) %in% c("D", "F")) %>%
+        collect(),
+      tbl
+    ),
+  NA)
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl > 2, str_length(verses) > 25) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl > 2, str_length(str_trim(padded_strings, "left")) > 5) %>%
+      collect(),
+    tbl
+  )
+})
+
 test_that("filter environment scope", {
   # "object 'b_var' not found"
   expect_dplyr_error(input %>% filter(batch, chr == b_var))
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index a073b73479f..821e14a493b 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -20,6 +20,8 @@ context("To/from Python")
 test_that("install_pyarrow", {
   skip_on_cran()
   skip_if_not_dev_mode()
+  # Python problems on Apple M1 still
+  skip_if(grepl("arm-apple", R.Version()$platform))
   skip_if_not_installed("reticulate")
   venv <- try(reticulate::virtualenv_create("arrow-test"))
   # Bail out if virtualenv isn't available