diff --git a/r/DESCRIPTION b/r/DESCRIPTION index a6536015530..3ad9472a209 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -48,6 +48,7 @@ Suggests: pkgload, reticulate, rmarkdown, + stringi, stringr, testthat, tibble, diff --git a/r/R/expression.R b/r/R/expression.R index 26351d61aa4..bfbaa2f0ab1 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -30,6 +30,7 @@ "str_length" = "utf8_length", "str_to_lower" = "utf8_lower", "str_to_upper" = "utf8_upper", + "str_reverse" = "utf8_reverse", # str_trim is defined in dplyr-functions.R "year" = "year", "isoyear" = "iso_year", diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index a58a04eb109..4cb07c9e39d 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -20,6 +20,7 @@ skip_if_not_available("utf8proc") library(dplyr) library(stringr) +library(stringi) test_that("paste, paste0, and str_c", { df <- tibble( @@ -712,7 +713,6 @@ test_that("strptime", { tstamp, check.tzone = FALSE ) - }) test_that("errors in strptime", { @@ -725,6 +725,43 @@ test_that("errors in strptime", { ) }) +test_that("stri_reverse and arrow_ascii_reverse functions", { + + df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux")) + + df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux")) + + expect_dplyr_equal( + input %>% + mutate(x = stri_reverse(x)) %>% + collect(), + df_utf8 + ) + + expect_dplyr_equal( + input %>% + mutate(x = stri_reverse(x)) %>% + collect(), + df_ascii + ) + + expect_equivalent( + df_ascii %>% + Table$create() %>% + mutate(x = arrow_ascii_reverse(x)) %>% + collect(), + tibble(x = c("rab dna\nooF", "xuuq dna xuq dna\tzab")) + ) + + expect_error( + df_utf8 %>% + Table$create() %>% + mutate(x = arrow_ascii_reverse(x)) %>% + collect(), + "Invalid: Non-ASCII sequence in input" + ) +}) + test_that("str_like", { df <- tibble(x = c("Foo and bar", "baz and qux and quux")) @@ -783,7 +820,6 @@ test_that("str_like", { input %>% mutate(x = str_like(x, "%baz%")) %>% collect(), - df, + df ) - })