diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 1cf6fabebee..7356c469eb1 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -303,6 +303,14 @@ nse_funcs$str_detect <- function(string, pattern, negate = FALSE) { out } +nse_funcs$str_like <- function(string, pattern, ignore_case = TRUE) { + Expression$create( + "match_like", + string, + options = list(pattern = pattern, ignore_case = ignore_case) + ) +} + # Encapsulate some common logic for sub/gsub/str_replace/str_replace_all arrow_r_string_replace_function <- function(max_replacements) { function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) { diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 3d322ab6c71..01bc684c6df 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -232,7 +232,8 @@ std::shared_ptr make_compute_options( return out; } - if (func_name == "match_substring" || func_name == "match_substring_regex") { + if (func_name == "match_substring" || func_name == "match_substring_regex" || + func_name == "match_like") { using Options = arrow::compute::MatchSubstringOptions; bool ignore_case = false; if (!Rf_isNull(options["ignore_case"])) { diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 4afb88e5732..a58a04eb109 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -724,3 +724,66 @@ test_that("errors in strptime", { 'Time zone argument not supported by Arrow' ) }) + +test_that("str_like", { + + df <- tibble(x = c("Foo and bar", "baz and qux and quux")) + + # TODO: After new version of stringr with str_like has been released, update all + # these tests to use expect_dplyr_equal + + # No match - entire string + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "baz")) %>% + collect(), + tibble(x = c(FALSE, FALSE)) + ) + + # Match - entire string + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "Foo and bar")) %>% + collect(), + tibble(x = c(TRUE, FALSE)) + ) + + # Wildcard + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "f%", ignore_case = TRUE)) %>% + collect(), + tibble(x = c(TRUE, FALSE)) + ) + + # Ignore case + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "f%", ignore_case = FALSE)) %>% + collect(), + tibble(x = c(FALSE, FALSE)) + ) + + # Single character + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "_a%")) %>% + collect(), + tibble(x = c(FALSE, TRUE)) + ) + + # This will give an error until a new version of stringr with str_like has been released + skip("Test will fail until stringr > 1.4.0 is release") + expect_dplyr_equal( + input %>% + mutate(x = str_like(x, "%baz%")) %>% + collect(), + df, + ) + +})