From a3a38967c4636c6b04775a922ecad6db639c48aa Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 24 Jun 2021 07:30:03 +0100 Subject: [PATCH 1/6] Add str_like and associated tests --- r/R/dplyr-functions.R | 8 +++ r/src/compute.cpp | 2 +- .../testthat/test-dplyr-string-functions.R | 65 +++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 1cf6fabebee..7356c469eb1 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -303,6 +303,14 @@ nse_funcs$str_detect <- function(string, pattern, negate = FALSE) { out } +nse_funcs$str_like <- function(string, pattern, ignore_case = TRUE) { + Expression$create( + "match_like", + string, + options = list(pattern = pattern, ignore_case = ignore_case) + ) +} + # Encapsulate some common logic for sub/gsub/str_replace/str_replace_all arrow_r_string_replace_function <- function(max_replacements) { function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) { diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 3d322ab6c71..2f7c3d1aa53 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -232,7 +232,7 @@ std::shared_ptr make_compute_options( return out; } - if (func_name == "match_substring" || func_name == "match_substring_regex") { + if (func_name == "match_substring" || func_name == "match_substring_regex" || func_name == "match_like") { using Options = arrow::compute::MatchSubstringOptions; bool ignore_case = false; if (!Rf_isNull(options["ignore_case"])) { diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 4afb88e5732..26eb5fdcc9a 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -724,3 +724,68 @@ test_that("errors in strptime", { 'Time zone argument not supported by Arrow' ) }) + +test_that("str_like", { + + df <- tibble(x = c("Foo and bar", "baz and qux and quux")) + + # This will give an error until a new version of stringr with str_like has been released + expect_error( + expect_dplyr_equal( + input %>% + mutate(x = str_like(x, "%baz%")) %>% + collect(), + df + ), + 'could not find function "str_like"' + ) + + # After new version of stringr with str_like has been released, update all these + # tests to use expect_dplyr_equal + + # No match - entire string + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "baz")) %>% + collect(), + tibble(x = c(FALSE, FALSE)) + ) + + # Match - entire string + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "Foo and bar")) %>% + collect(), + tibble(x = c(TRUE, FALSE)) + ) + + # Wildcard + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "f%", ignore_case = TRUE)) %>% + collect(), + tibble(x = c(TRUE, FALSE)) + ) + + # Ignore case + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "f%", ignore_case = FALSE)) %>% + collect(), + tibble(x = c(FALSE, FALSE)) + ) + + # Single character + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = str_like(x, "_a%")) %>% + collect(), + tibble(x = c(FALSE, TRUE)) + ) + +}) From 9a06ed600fd0525a6ac0fe8b11ed0b28268bf993 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 24 Jun 2021 10:07:01 +0100 Subject: [PATCH 2/6] Fix weird language error --- .../testthat/test-dplyr-string-functions.R | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 26eb5fdcc9a..a66872d680d 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -730,15 +730,18 @@ test_that("str_like", { df <- tibble(x = c("Foo and bar", "baz and qux and quux")) # This will give an error until a new version of stringr with str_like has been released - expect_error( - expect_dplyr_equal( - input %>% - mutate(x = str_like(x, "%baz%")) %>% - collect(), - df - ), - 'could not find function "str_like"' - ) + with_language("en", { + expect_error( + expect_dplyr_equal( + input %>% + mutate(x = str_like(x, "%baz%")) %>% + collect(), + df + ), + 'could not find function "str_like"' + ) + }) + # After new version of stringr with str_like has been released, update all these # tests to use expect_dplyr_equal From 8168d3952279b75cc21755cc7a7084cdd837f644 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 24 Jun 2021 10:16:41 +0100 Subject: [PATCH 3/6] Fix linting --- r/src/compute.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 2f7c3d1aa53..01bc684c6df 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -232,7 +232,8 @@ std::shared_ptr make_compute_options( return out; } - if (func_name == "match_substring" || func_name == "match_substring_regex" || func_name == "match_like") { + if (func_name == "match_substring" || func_name == "match_substring_regex" || + func_name == "match_like") { using Options = arrow::compute::MatchSubstringOptions; bool ignore_case = false; if (!Rf_isNull(options["ignore_case"])) { From a88819cb4857584f5e43641f3521156f1e555a35 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 24 Jun 2021 12:23:03 +0100 Subject: [PATCH 4/6] Refactor test --- r/tests/testthat/test-dplyr-string-functions.R | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index a66872d680d..2c766750dae 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -730,18 +730,14 @@ test_that("str_like", { df <- tibble(x = c("Foo and bar", "baz and qux and quux")) # This will give an error until a new version of stringr with str_like has been released - with_language("en", { - expect_error( + expect_error( expect_dplyr_equal( input %>% mutate(x = str_like(x, "%baz%")) %>% collect(), - df - ), - 'could not find function "str_like"' - ) - }) - + df, + ) + ) # After new version of stringr with str_like has been released, update all these # tests to use expect_dplyr_equal From 2188a48ce2d5a08a85233b2145be77ca377c5642 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 24 Jun 2021 14:12:18 +0100 Subject: [PATCH 5/6] Move test to skip it --- .../testthat/test-dplyr-string-functions.R | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 2c766750dae..5952e5a9b8e 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -729,16 +729,6 @@ test_that("str_like", { df <- tibble(x = c("Foo and bar", "baz and qux and quux")) - # This will give an error until a new version of stringr with str_like has been released - expect_error( - expect_dplyr_equal( - input %>% - mutate(x = str_like(x, "%baz%")) %>% - collect(), - df, - ) - ) - # After new version of stringr with str_like has been released, update all these # tests to use expect_dplyr_equal @@ -787,4 +777,13 @@ test_that("str_like", { tibble(x = c(FALSE, TRUE)) ) + # This will give an error until a new version of stringr with str_like has been released + skip("Test will fail until stringr > 1.4.0 is release") + expect_dplyr_equal( + input %>% + mutate(x = str_like(x, "%baz%")) %>% + collect(), + df, + ) + }) From 71cfae80b0ca25675f099bdab4afa087f13fdf6c Mon Sep 17 00:00:00 2001 From: Nic Date: Thu, 24 Jun 2021 14:12:40 +0100 Subject: [PATCH 6/6] Update r/tests/testthat/test-dplyr-string-functions.R Co-authored-by: Ian Cook --- r/tests/testthat/test-dplyr-string-functions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 5952e5a9b8e..a58a04eb109 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -729,8 +729,8 @@ test_that("str_like", { df <- tibble(x = c("Foo and bar", "baz and qux and quux")) - # After new version of stringr with str_like has been released, update all these - # tests to use expect_dplyr_equal + # TODO: After new version of stringr with str_like has been released, update all + # these tests to use expect_dplyr_equal # No match - entire string expect_equivalent(