diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index dbb9d5f46f6..1a2bf4d9f65 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -341,6 +341,7 @@ arrow_string_join_function <- function(null_handling, null_replacement = NULL) { # str_to_lower # str_to_upper # str_to_title +# str_to_sentence # # Arrow locale will be supported with ARROW-14126 stop_if_locale_provided <- function(locale) { @@ -367,6 +368,11 @@ nse_funcs$str_to_title <- function(string, locale = "en") { Expression$create("utf8_title", string) } +nse_funcs$str_to_sentence <- function(string, locale = "en") { + stop_if_locale_provided(locale) + Expression$create("utf8_capitalize", string) +} + nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) { side <- match.arg(side) trim_fun <- switch(side, diff --git a/r/R/expression.R b/r/R/expression.R index b1b6635f538..f3110f40ef0 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -53,6 +53,7 @@ # str_to_lower is defined in dplyr-functions.R # str_to_title is defined in dplyr-functions.R # str_to_upper is defined in dplyr-functions.R + # str_to_sentence is defined in dplyr-functions.R # str_trim is defined in dplyr-functions.R "stri_reverse" = "utf8_reverse", # substr is defined in dplyr-functions.R diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index dd59b5ac55d..f6fc5f313c4 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -1336,3 +1336,27 @@ test_that("str_starts, str_ends, startsWith, endsWith", { df ) }) + +test_that("str_to_sentence", { + df <- tibble( + one_sent = c("first word", "the second word", "the third word"), + two_sent = c("first word. second word? third word! fourth word", + "second word", "third word") + ) + + expect_dplyr_equal( + input %>% + mutate(sentence_case = str_to_sentence(one_sent)) %>% + collect(), + df + ) + + # there is something strange going on with str_to_sentence in stringr where + # it doesn't recognise `.` as a sentence end + expect_dplyr_error( + input %>% + mutate(sentence_case_two = str_to_sentence(two_sent)) %>% + collect(), + df + ) +})