diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 0ce4bdcdd5e..78f2e393305 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -330,6 +330,37 @@ arrow_string_join_function <- function(null_handling, null_replacement = NULL) { } } +# Currently, Arrow does not supports a locale option for string case conversion +# functions, contrast to stringr's API, so the 'locale' argument is only valid +# for stringr's default value ("en"). The following are string functions that +# take a 'locale' option as its second argument: +# str_to_lower +# str_to_upper +# str_to_title +# +# Arrow locale will be supported with ARROW-14126 +stop_if_locale_provided <- function(locale) { + if (!identical(locale, "en")) { + stop("Providing a value for 'locale' other than the default ('en') is not supported by Arrow. ", + "To change locale, use 'Sys.setlocale()'", call. = FALSE) + } +} + +nse_funcs$str_to_lower <- function(string, locale = "en") { + stop_if_locale_provided(locale) + Expression$create("utf8_lower", string) +} + +nse_funcs$str_to_upper <- function(string, locale = "en") { + stop_if_locale_provided(locale) + Expression$create("utf8_upper", string) +} + +nse_funcs$str_to_title <- function(string, locale = "en") { + stop_if_locale_provided(locale) + Expression$create("utf8_title", string) +} + nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) { side <- match.arg(side) trim_fun <- switch(side, diff --git a/r/R/expression.R b/r/R/expression.R index 857be74f5ce..e3e425c02fd 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -50,9 +50,9 @@ "str_length" = "utf8_length", # str_pad is defined in dplyr-functions.R # str_sub is defined in dplyr-functions.R - "str_to_lower" = "utf8_lower", - "str_to_title" = "utf8_title", - "str_to_upper" = "utf8_upper", + # str_to_lower is defined in dplyr-functions.R + # str_to_title is defined in dplyr-functions.R + # str_to_upper is defined in dplyr-functions.R # str_trim is defined in dplyr-functions.R "stri_reverse" = "utf8_reverse", # substr is defined in dplyr-functions.R diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 2946ebdb606..8e603bc0d97 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -467,6 +467,27 @@ test_that("strsplit and str_split", { ) }) +test_that("str_to_lower, str_to_upper, and str_to_title", { + df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!")) + expect_dplyr_equal( + input %>% + transmute( + x_lower = str_to_lower(x), + x_upper = str_to_upper(x), + x_title = str_to_title(x) + ) %>% + collect(), + df + ) + + # Error checking a single function because they all use the same code path. + expect_error( + nse_funcs$str_to_lower("Apache Arrow", locale = "sp"), + "Providing a value for 'locale' other than the default ('en') is not supported by Arrow", + fixed = TRUE + ) +}) + test_that("arrow_*_split_whitespace functions", { # use only ASCII whitespace characters df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))