From 18481383e1dc420f30f53e5025fb121ab2f40b3f Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 24 Jun 2021 06:15:19 +0100 Subject: [PATCH 1/8] Implement find_substring and appropriate tests --- r/src/compute.cpp | 2 +- .../testthat/test-dplyr-string-functions.R | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 01bc684c6df..170a7d1572d 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -233,7 +233,7 @@ std::shared_ptr make_compute_options( } if (func_name == "match_substring" || func_name == "match_substring_regex" || - func_name == "match_like") { + func_name == "match_like" || func_name == "find_substring") { using Options = arrow::compute::MatchSubstringOptions; bool ignore_case = false; if (!Rf_isNull(options["ignore_case"])) { diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 4cb07c9e39d..ec6cdaaf1d9 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -823,3 +823,26 @@ test_that("str_like", { df ) }) + +test_that("arrow_find_substring", { + + df <- tibble(x = c("Foo and Bar", "baz and qux and quux")) + + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = arrow_find_substring(x, options = list(pattern = "b"))) %>% + collect(), + tibble(x = c(-1, 0)) + ) + + # This should no longer result in an error after ARROW-13157 is resolved + expect_error( + df %>% + Table$create() %>% + mutate(x = arrow_find_substring(x, options = list(pattern = "b", ignore_case = TRUE))) %>% + collect(), + "NotImplemented: find_substring with ignore_case" + ) + +}) From 9314f406a45b06014a8db183a69faf01f3c1b670 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 24 Jun 2021 14:21:28 +0100 Subject: [PATCH 2/8] Swap comment for skip() --- r/tests/testthat/test-dplyr-string-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index ec6cdaaf1d9..2c32294acbb 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -836,7 +836,7 @@ test_that("arrow_find_substring", { tibble(x = c(-1, 0)) ) - # This should no longer result in an error after ARROW-13157 is resolved + skip("ARROW-13157: find_substring not implemented with ignore_case") expect_error( df %>% Table$create() %>% From c480af27e1db7e92a035b374951a171415c591aa Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 25 Jun 2021 12:22:41 -0400 Subject: [PATCH 3/8] Update test --- .../testthat/test-dplyr-string-functions.R | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 2c32294acbb..33e9af2115e 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -725,6 +725,29 @@ test_that("errors in strptime", { ) }) +test_that("arrow_find_substring", { + + df <- tibble(x = c("Foo and Bar", "baz and qux and quux")) + + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = arrow_find_substring(x, options = list(pattern = "b"))) %>% + collect(), + tibble(x = c(-1, 0)) + ) + + skip("ARROW-13157: find_substring not implemented with ignore_case") + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = arrow_find_substring(x, options = list(pattern = "b", ignore_case = TRUE))) %>% + collect(), + tibble(x = c(8, 0)) + ) + +}) + test_that("stri_reverse and arrow_ascii_reverse functions", { df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux")) From 9f4b3d7c97e084b66a996c87b81c314885aa4b69 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 28 Jun 2021 14:32:30 -0400 Subject: [PATCH 4/8] Remove skipped test --- r/tests/testthat/test-dplyr-string-functions.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 33e9af2115e..ec81caf918c 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -737,7 +737,6 @@ test_that("arrow_find_substring", { tibble(x = c(-1, 0)) ) - skip("ARROW-13157: find_substring not implemented with ignore_case") expect_equivalent( df %>% Table$create() %>% From 6bfb3224e18e1b727accc6150257e8183d1ee4b6 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 28 Jun 2021 14:35:23 -0400 Subject: [PATCH 5/8] Remove dup test code --- .../testthat/test-dplyr-string-functions.R | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index ec81caf918c..c32ef030f07 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -736,7 +736,6 @@ test_that("arrow_find_substring", { collect(), tibble(x = c(-1, 0)) ) - expect_equivalent( df %>% Table$create() %>% @@ -744,7 +743,6 @@ test_that("arrow_find_substring", { collect(), tibble(x = c(8, 0)) ) - }) test_that("stri_reverse and arrow_ascii_reverse functions", { @@ -845,26 +843,3 @@ test_that("str_like", { df ) }) - -test_that("arrow_find_substring", { - - df <- tibble(x = c("Foo and Bar", "baz and qux and quux")) - - expect_equivalent( - df %>% - Table$create() %>% - mutate(x = arrow_find_substring(x, options = list(pattern = "b"))) %>% - collect(), - tibble(x = c(-1, 0)) - ) - - skip("ARROW-13157: find_substring not implemented with ignore_case") - expect_error( - df %>% - Table$create() %>% - mutate(x = arrow_find_substring(x, options = list(pattern = "b", ignore_case = TRUE))) %>% - collect(), - "NotImplemented: find_substring with ignore_case" - ) - -}) From a26e59d2634a437256271bc1bfba4253d1ae5752 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 28 Jun 2021 16:03:01 -0400 Subject: [PATCH 6/8] Add binding for find_substring_regex options --- r/src/compute.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 170a7d1572d..9a05dd02859 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -233,7 +233,8 @@ std::shared_ptr make_compute_options( } if (func_name == "match_substring" || func_name == "match_substring_regex" || - func_name == "match_like" || func_name == "find_substring") { + func_name == "find_substring" || func_name == "find_substring_regex" || + func_name == "match_like") { using Options = arrow::compute::MatchSubstringOptions; bool ignore_case = false; if (!Rf_isNull(options["ignore_case"])) { From 41391e995689cba359b163450540ed795a25b486 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 28 Jun 2021 16:03:56 -0400 Subject: [PATCH 7/8] Add find_substring_regex tests --- .../testthat/test-dplyr-string-functions.R | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index c32ef030f07..3b88502cd68 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -725,8 +725,8 @@ test_that("errors in strptime", { ) }) -test_that("arrow_find_substring", { - +test_that("arrow_find_substring and arrow_find_substring_regex", { + df <- tibble(x = c("Foo and Bar", "baz and qux and quux")) expect_equivalent( @@ -743,6 +743,26 @@ test_that("arrow_find_substring", { collect(), tibble(x = c(8, 0)) ) + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = arrow_find_substring_regex( + x, + options = list(pattern = "^[fb]") + )) %>% + collect(), + tibble(x = c(-1, 0)) + ) + expect_equivalent( + df %>% + Table$create() %>% + mutate(x = arrow_find_substring_regex( + x, + options = list(pattern = "[AEIOU]", ignore_case = TRUE) + )) %>% + collect(), + tibble(x = c(1, 1)) + ) }) test_that("stri_reverse and arrow_ascii_reverse functions", { From 6ea2a47b27a9f1e2314f2e77de95a828f3dee32e Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 28 Jun 2021 16:04:17 -0400 Subject: [PATCH 8/8] Formatting --- r/tests/testthat/test-dplyr-string-functions.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 3b88502cd68..ecbe2f00f2d 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -728,7 +728,7 @@ test_that("errors in strptime", { test_that("arrow_find_substring and arrow_find_substring_regex", { df <- tibble(x = c("Foo and Bar", "baz and qux and quux")) - + expect_equivalent( df %>% Table$create() %>% @@ -739,7 +739,10 @@ test_that("arrow_find_substring and arrow_find_substring_regex", { expect_equivalent( df %>% Table$create() %>% - mutate(x = arrow_find_substring(x, options = list(pattern = "b", ignore_case = TRUE))) %>% + mutate(x = arrow_find_substring( + x, + options = list(pattern = "b", ignore_case = TRUE) + )) %>% collect(), tibble(x = c(8, 0)) )