diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index a91db55257a..3c61157f260 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -601,6 +601,50 @@ nse_funcs$str_pad <- function(string, width, side = c("left", "right", "both"), ) } +nse_funcs$startsWith <- function(x, prefix) { + Expression$create( + "starts_with", + x, + options = list(pattern = prefix) + ) +} + +nse_funcs$endsWith <- function(x, suffix) { + Expression$create( + "ends_with", + x, + options = list(pattern = suffix) + ) +} + +nse_funcs$str_starts <- function(string, pattern, negate = FALSE) { + opts <- get_stringr_pattern_options(enexpr(pattern)) + if (opts$fixed) { + out <- nse_funcs$startsWith(x = string, prefix = opts$pattern) + } else { + out <- nse_funcs$grepl(pattern = paste0("^", opts$pattern), x = string, fixed = FALSE) + } + + if (negate) { + out <- !out + } + out +} + +nse_funcs$str_ends <- function(string, pattern, negate = FALSE) { + opts <- get_stringr_pattern_options(enexpr(pattern)) + if (opts$fixed) { + out <- nse_funcs$endsWith(x = string, suffix = opts$pattern) + } else { + out <- nse_funcs$grepl(pattern = paste0(opts$pattern, "$"), x = string, fixed = FALSE) + } + + if (negate) { + out <- !out + } + out +} + # String function helpers # format `pattern` as needed for case insensitivity and literal matching by RE2 diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 1a226eff25a..dd59b5ac55d 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -1248,3 +1248,91 @@ test_that("str_sub", { "`end` must be length 1 - other lengths are not supported in Arrow" ) }) + +test_that("str_starts, str_ends, startsWith, endsWith", { + df <- tibble(x = c("Foo", "bar", "baz", "qux")) + + expect_dplyr_equal( + input %>% + filter(str_starts(x, "b.*")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(str_starts(x, "b.*", negate = TRUE)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(str_starts(x, fixed("b.*"))) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(str_starts(x, fixed("b"))) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(str_ends(x, "r")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(str_ends(x, "r", negate = TRUE)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(str_ends(x, fixed("r$"))) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(str_ends(x, fixed("r"))) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(startsWith(x, "b")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(endsWith(x, "r")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(startsWith(x, "b.*")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + filter(endsWith(x, "r$")) %>% + collect(), + df + ) +})