Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions r/R/dplyr-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,14 @@ nse_funcs$str_detect <- function(string, pattern, negate = FALSE) {
out
}

nse_funcs$str_like <- function(string, pattern, ignore_case = TRUE) {
Expression$create(
"match_like",
string,
options = list(pattern = pattern, ignore_case = ignore_case)
)
}

# Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
arrow_r_string_replace_function <- function(max_replacements) {
function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
Expand Down
3 changes: 2 additions & 1 deletion r/src/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
return out;
}

if (func_name == "match_substring" || func_name == "match_substring_regex") {
if (func_name == "match_substring" || func_name == "match_substring_regex" ||
func_name == "match_like") {
using Options = arrow::compute::MatchSubstringOptions;
bool ignore_case = false;
if (!Rf_isNull(options["ignore_case"])) {
Expand Down
63 changes: 63 additions & 0 deletions r/tests/testthat/test-dplyr-string-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -724,3 +724,66 @@ test_that("errors in strptime", {
'Time zone argument not supported by Arrow'
)
})

test_that("str_like", {

df <- tibble(x = c("Foo and bar", "baz and qux and quux"))

# TODO: After new version of stringr with str_like has been released, update all
# these tests to use expect_dplyr_equal

# No match - entire string
expect_equivalent(
df %>%
Table$create() %>%
mutate(x = str_like(x, "baz")) %>%
collect(),
tibble(x = c(FALSE, FALSE))
)

# Match - entire string
expect_equivalent(
df %>%
Table$create() %>%
mutate(x = str_like(x, "Foo and bar")) %>%
collect(),
tibble(x = c(TRUE, FALSE))
)

# Wildcard
expect_equivalent(
df %>%
Table$create() %>%
mutate(x = str_like(x, "f%", ignore_case = TRUE)) %>%
collect(),
tibble(x = c(TRUE, FALSE))
)

# Ignore case
expect_equivalent(
df %>%
Table$create() %>%
mutate(x = str_like(x, "f%", ignore_case = FALSE)) %>%
collect(),
tibble(x = c(FALSE, FALSE))
)

# Single character
expect_equivalent(
df %>%
Table$create() %>%
mutate(x = str_like(x, "_a%")) %>%
collect(),
tibble(x = c(FALSE, TRUE))
)

# This will give an error until a new version of stringr with str_like has been released
skip("Test will fail until stringr > 1.4.0 is release")
expect_dplyr_equal(
input %>%
mutate(x = str_like(x, "%baz%")) %>%
collect(),
df,
)

})