Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions r/R/dplyr-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,37 @@ arrow_string_join_function <- function(null_handling, null_replacement = NULL) {
}
}

# Currently, Arrow does not supports a locale option for string case conversion
# functions, contrast to stringr's API, so the 'locale' argument is only valid
# for stringr's default value ("en"). The following are string functions that
# take a 'locale' option as its second argument:
# str_to_lower
# str_to_upper
# str_to_title
#
# Arrow locale will be supported with ARROW-14126
stop_if_locale_provided <- function(locale) {
if (!identical(locale, "en")) {
stop("Providing a value for 'locale' other than the default ('en') is not supported by Arrow. ",
"To change locale, use 'Sys.setlocale()'", call. = FALSE)
}
}

nse_funcs$str_to_lower <- function(string, locale = "en") {
stop_if_locale_provided(locale)
Expression$create("utf8_lower", string)
}

nse_funcs$str_to_upper <- function(string, locale = "en") {
stop_if_locale_provided(locale)
Expression$create("utf8_upper", string)
}

nse_funcs$str_to_title <- function(string, locale = "en") {
stop_if_locale_provided(locale)
Expression$create("utf8_title", string)
}

nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
side <- match.arg(side)
trim_fun <- switch(side,
Expand Down
6 changes: 3 additions & 3 deletions r/R/expression.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@
"str_length" = "utf8_length",
# str_pad is defined in dplyr-functions.R
# str_sub is defined in dplyr-functions.R
"str_to_lower" = "utf8_lower",
"str_to_title" = "utf8_title",
"str_to_upper" = "utf8_upper",
# str_to_lower is defined in dplyr-functions.R
# str_to_title is defined in dplyr-functions.R
# str_to_upper is defined in dplyr-functions.R
# str_trim is defined in dplyr-functions.R
"stri_reverse" = "utf8_reverse",
# substr is defined in dplyr-functions.R
Expand Down
21 changes: 21 additions & 0 deletions r/tests/testthat/test-dplyr-string-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,27 @@ test_that("strsplit and str_split", {
)
})

test_that("str_to_lower, str_to_upper, and str_to_title", {
df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!"))
expect_dplyr_equal(
input %>%
transmute(
x_lower = str_to_lower(x),
x_upper = str_to_upper(x),
x_title = str_to_title(x)
) %>%
collect(),
df
)

# Error checking a single function because they all use the same code path.
expect_error(
nse_funcs$str_to_lower("Apache Arrow", locale = "sp"),
"Providing a value for 'locale' other than the default ('en') is not supported by Arrow",
fixed = TRUE
)
})

test_that("arrow_*_split_whitespace functions", {
# use only ASCII whitespace characters
df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
Expand Down