From 9dadba23c00164b9d97a6f5f8e4c9768336fecb4 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 6 Apr 2021 18:10:05 -0400 Subject: [PATCH 1/7] Do assignment of stringr modifiers in a separate function --- r/R/dplyr.R | 96 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 35 deletions(-) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index 7c2ef4889ad..796aeebcf9f 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -474,42 +474,14 @@ arrow_r_string_replace_function <- function(FUN, max_replacements) { arrow_stringr_string_replace_function <- function(FUN, max_replacements) { function(string, pattern, replacement) { - # Assign stringr pattern modifier functions locally - fixed <- function(pattern, ignore_case = FALSE, ...) { - check_dots(...) - list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case) - } - regex <- function(pattern, ignore_case = FALSE, ...) { - check_dots(...) - list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case) - } - coll <- boundary <- function(...) { - stop( - "Pattern modifier `", - match.call()[[1]], - "()` is not supported in Arrow", - call. = FALSE - ) - } - check_dots <- function(...) { - dots <- list(...) - if (length(dots)) { - warning( - "Ignoring pattern modifier ", - ngettext(length(dots), "argument ", "arguments "), - "not supported in Arrow: ", - oxford_paste(names(dots)), - call. = FALSE - ) - } - } - ensure_opts <- function(opts) { - if (is.character(opts)) { - opts <- list(pattern = opts, fixed = TRUE, ignore_case = FALSE) - } - opts - } + + # Assign the stringr pattern modifier functions locally in this function + assign_stringr_helpers() + + # Evaluate `pattern` in this function environment where the stringr pattern + # modifier functions are defined opts <- ensure_opts(eval(enexpr(pattern))) + arrow_r_string_replace_function(FUN, max_replacements)( pattern = opts$pattern, replacement = replacement, @@ -520,6 +492,60 @@ arrow_stringr_string_replace_function <- function(FUN, max_replacements) { } } +# this function assigns definitions for the stringr pattern modifier functions +# (fixed, regex, etc.) plus a required helper function (ensure_opts) in the +# calling function's environment, where they are required to evaluate `pattern` +assign_stringr_helpers <- function() { + check_dots <- function(...) { + dots <- list(...) + if (length(dots)) { + warning( + "Ignoring pattern modifier ", + ngettext(length(dots), "argument ", "arguments "), + "not supported in Arrow: ", + oxford_paste(names(dots)), + call. = FALSE + ) + } + } + assign( + "fixed", + function(pattern, ignore_case = FALSE, ...) { + check_dots(...) + list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case) + }, + parent.frame() + ) + assign( + "regex", + function(pattern, ignore_case = FALSE, ...) { + check_dots(...) + list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case) + }, + parent.frame() + ) + unsup <- function(...) { + stop( + "Pattern modifier `", + match.call()[[1]], + "()` is not supported in Arrow", + call. = FALSE + ) + } + assign("coll", unsup, parent.frame()) + assign("boundary", unsup, parent.frame()) + assign( + "ensure_opts", + function(opts) { + if (is.character(opts)) { + opts <- list(pattern = opts, fixed = TRUE, ignore_case = FALSE) + } + opts + }, + parent.frame() + ) +} + # We'll populate these at package load time. dplyr_functions <- NULL init_env <- function () { From 6e57af9ee20fbf39b7257576b7d2a98811e6c8b7 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 6 Apr 2021 18:11:42 -0400 Subject: [PATCH 2/7] Implement grepl() translation --- r/R/dplyr.R | 21 +++++++++++++++++++++ r/src/compute.cpp | 5 +++++ 2 files changed, 26 insertions(+) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index 796aeebcf9f..4da50cb5bc7 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -422,6 +422,7 @@ build_function_list <- function(FUN) { both = FUN("utf8_trim_whitespace", string) ) }, + grepl = arrow_r_string_match_function(FUN), sub = arrow_r_string_replace_function(FUN, 1L), gsub = arrow_r_string_replace_function(FUN, -1L), str_replace = arrow_stringr_string_replace_function(FUN, 1L), @@ -438,6 +439,26 @@ build_function_list <- function(FUN) { ) } +arrow_r_string_match_function <- function(FUN) { + function(pattern, x, ignore.case = FALSE, fixed = FALSE) { + if (ignore.case) { + # see the comments in the definition of `arrow_r_string_replace_function` + # below for an explanation of how this handles `ignore.case` and `fixed` + if (fixed) { + pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE) + pattern <- paste0("(?i)\\Q", pattern, "\\E") + } else { + pattern <- paste0("(?i)", pattern) + } + } + FUN( + ifelse(fixed && !ignore.case, "match_substring", "match_substring_regex"), + x, + options = list(pattern = pattern) + ) + } +} + arrow_r_string_replace_function <- function(FUN, max_replacements) { function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) { if (ignore.case) { diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 9600eb0d621..f33153069f2 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -206,6 +206,11 @@ std::shared_ptr make_compute_options( return make_cast_options(options); } + if (func_name == "match_substring" || func_name == "match_substring_regex") { + using Options = arrow::compute::MatchSubstringOptions; + return std::make_shared(cpp11::as_cpp(options["pattern"])); + } + if (func_name == "replace_substring" || func_name == "replace_substring_regex") { using Options = arrow::compute::ReplaceSubstringOptions; int64_t max_replacements = -1; From f3a8b52d6c520e5310089b6c8d94e2e3066bf168 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 6 Apr 2021 18:12:08 -0400 Subject: [PATCH 3/7] Implement str_detect() translation --- r/R/dplyr.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index 4da50cb5bc7..f5ab3e45df3 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -423,6 +423,7 @@ build_function_list <- function(FUN) { ) }, grepl = arrow_r_string_match_function(FUN), + str_detect = arrow_stringr_string_match_function(FUN), sub = arrow_r_string_replace_function(FUN, 1L), gsub = arrow_r_string_replace_function(FUN, -1L), str_replace = arrow_stringr_string_replace_function(FUN, 1L), @@ -459,6 +460,28 @@ arrow_r_string_match_function <- function(FUN) { } } +arrow_stringr_string_match_function <- function(FUN) { + function(string, pattern, negate = FALSE) { + browser() + + # Assign the stringr pattern modifier functions locally in this function + assign_stringr_helpers() + + # Evaluate `pattern` in this function environment where the stringr pattern + # modifier functions are defined + opts <- ensure_opts(eval(enexpr(pattern))) + + out <- arrow_r_string_match_function(FUN)( + pattern = opts$pattern, + x = string, + ignore.case = opts$ignore_case, + fixed = opts$fixed + ) + if (negate) out <- FUN("invert", out) + out + } +} + arrow_r_string_replace_function <- function(FUN, max_replacements) { function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) { if (ignore.case) { From 5bf0cf4148d2c572b00ca1e998d5a0590d4e5e5d Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 6 Apr 2021 19:37:02 -0400 Subject: [PATCH 4/7] Simplify handling of stringr pattern modifier functions --- r/R/dplyr.R | 81 +++++++++++++++++------------------------------------ 1 file changed, 26 insertions(+), 55 deletions(-) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index f5ab3e45df3..b064f2039e1 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -462,15 +462,7 @@ arrow_r_string_match_function <- function(FUN) { arrow_stringr_string_match_function <- function(FUN) { function(string, pattern, negate = FALSE) { - browser() - - # Assign the stringr pattern modifier functions locally in this function - assign_stringr_helpers() - - # Evaluate `pattern` in this function environment where the stringr pattern - # modifier functions are defined - opts <- ensure_opts(eval(enexpr(pattern))) - + opts <- get_stringr_pattern_options(pattern) out <- arrow_r_string_match_function(FUN)( pattern = opts$pattern, x = string, @@ -518,14 +510,7 @@ arrow_r_string_replace_function <- function(FUN, max_replacements) { arrow_stringr_string_replace_function <- function(FUN, max_replacements) { function(string, pattern, replacement) { - - # Assign the stringr pattern modifier functions locally in this function - assign_stringr_helpers() - - # Evaluate `pattern` in this function environment where the stringr pattern - # modifier functions are defined - opts <- ensure_opts(eval(enexpr(pattern))) - + opts <- get_stringr_pattern_options(pattern) arrow_r_string_replace_function(FUN, max_replacements)( pattern = opts$pattern, replacement = replacement, @@ -537,9 +522,24 @@ arrow_stringr_string_replace_function <- function(FUN, max_replacements) { } # this function assigns definitions for the stringr pattern modifier functions -# (fixed, regex, etc.) plus a required helper function (ensure_opts) in the -# calling function's environment, where they are required to evaluate `pattern` -assign_stringr_helpers <- function() { +# functions (fixed, regex, etc.) in itself, and uses them to evaluate `pattern` +get_stringr_pattern_options <- function(pattern) { + fixed <- function(pattern, ignore_case = FALSE, ...) { + check_dots(...) + list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case) + } + regex <- function(pattern, ignore_case = FALSE, ...) { + check_dots(...) + list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case) + } + coll <- boundary <- function(...) { + stop( + "Pattern modifier `", + match.call()[[1]], + "()` is not supported in Arrow", + call. = FALSE + ) + } check_dots <- function(...) { dots <- list(...) if (length(dots)) { @@ -552,42 +552,13 @@ assign_stringr_helpers <- function() { ) } } - assign( - "fixed", - function(pattern, ignore_case = FALSE, ...) { - check_dots(...) - list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case) - }, - parent.frame() - ) - assign( - "regex", - function(pattern, ignore_case = FALSE, ...) { - check_dots(...) - list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case) - }, - parent.frame() - ) - unsup <- function(...) { - stop( - "Pattern modifier `", - match.call()[[1]], - "()` is not supported in Arrow", - call. = FALSE - ) + ensure_opts <- function(opts) { + if (is.character(opts)) { + opts <- list(pattern = opts, fixed = TRUE, ignore_case = FALSE) + } + opts } - assign("coll", unsup, parent.frame()) - assign("boundary", unsup, parent.frame()) - assign( - "ensure_opts", - function(opts) { - if (is.character(opts)) { - opts <- list(pattern = opts, fixed = TRUE, ignore_case = FALSE) - } - opts - }, - parent.frame() - ) + ensure_opts(eval(enexpr(pattern))) } # We'll populate these at package load time. From f05321292daa48bd0a58a151819e99ec6a14bc62 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 6 Apr 2021 19:43:37 -0400 Subject: [PATCH 5/7] Simplify preparing pattern, replacement for RE2 --- r/R/dplyr.R | 68 +++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index b064f2039e1..e118cba5736 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -442,20 +442,10 @@ build_function_list <- function(FUN) { arrow_r_string_match_function <- function(FUN) { function(pattern, x, ignore.case = FALSE, fixed = FALSE) { - if (ignore.case) { - # see the comments in the definition of `arrow_r_string_replace_function` - # below for an explanation of how this handles `ignore.case` and `fixed` - if (fixed) { - pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE) - pattern <- paste0("(?i)\\Q", pattern, "\\E") - } else { - pattern <- paste0("(?i)", pattern) - } - } FUN( ifelse(fixed && !ignore.case, "match_substring", "match_substring_regex"), x, - options = list(pattern = pattern) + options = list(pattern = format_string_pattern(pattern, ignore.case, fixed)) ) } } @@ -476,32 +466,12 @@ arrow_stringr_string_match_function <- function(FUN) { arrow_r_string_replace_function <- function(FUN, max_replacements) { function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) { - if (ignore.case) { - # Prepend "(?i)" to the regex for case insensitivity - if (fixed) { - # Arrow lacks native support for case-insensitive literal string - # replacement, so we use the regular expression engine (RE2) to do this. - # https://github.com/google/re2/wiki/Syntax - # - # Everything between "\Q" and "\E" is treated as literal text. - # - # If the search text contains any literal "\E" strings, make them - # lowercase so they won't signal the end of the literal text: - pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE) - pattern <- paste0("(?i)\\Q", pattern, "\\E") - # Escape single backslashes in the regex replacement text so they are - # interpreted as literal backslashes: - replacement <- gsub("\\", "\\\\", replacement, fixed = TRUE) - } else { - pattern <- paste0("(?i)", pattern) - } - } FUN( ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"), x, options = list( - pattern = pattern, - replacement = replacement, + pattern = format_string_pattern(pattern, ignore.case, fixed), + replacement = format_string_replacement(replacement, ignore.case, fixed), max_replacements = max_replacements ) ) @@ -521,6 +491,38 @@ arrow_stringr_string_replace_function <- function(FUN, max_replacements) { } } +# format `pattern` as needed for case insensitivity and literal matching by RE2 +format_string_pattern <- function(pattern, ignore.case, fixed) { + # Arrow lacks native support for case-insensitive literal string matching and + # replacement, so we use the regular expression engine (RE2) to do this. + # https://github.com/google/re2/wiki/Syntax + if (ignore.case) { + if (fixed) { + # Everything between "\Q" and "\E" is treated as literal text. + # If the search text contains any literal "\E" strings, make them + # lowercase so they won't signal the end of the literal text: + pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE) + pattern <- paste0("\\Q", pattern, "\\E") + } + # Prepend "(?i)" for case-insensitive matching + pattern <- paste0("(?i)", pattern) + } + pattern +} + +# format `replacement` as needed for literal replacement by RE2 +format_string_replacement <- function(replacement, ignore.case, fixed) { + # Arrow lacks native support for case-insensitive literal string + # replacement, so we use the regular expression engine (RE2) to do this. + # https://github.com/google/re2/wiki/Syntax + if (ignore.case && fixed) { + # Escape single backslashes in the regex replacement text so they are + # interpreted as literal backslashes: + replacement <- gsub("\\", "\\\\", replacement, fixed = TRUE) + } + replacement +} + # this function assigns definitions for the stringr pattern modifier functions # functions (fixed, regex, etc.) in itself, and uses them to evaluate `pattern` get_stringr_pattern_options <- function(pattern) { From a04184ba4c4e48976ada69981edab4762ba507cd Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 6 Apr 2021 20:08:32 -0400 Subject: [PATCH 6/7] Move enexpr() to the right place --- r/R/dplyr.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index e118cba5736..32211894810 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -452,7 +452,7 @@ arrow_r_string_match_function <- function(FUN) { arrow_stringr_string_match_function <- function(FUN) { function(string, pattern, negate = FALSE) { - opts <- get_stringr_pattern_options(pattern) + opts <- get_stringr_pattern_options(enexpr(pattern)) out <- arrow_r_string_match_function(FUN)( pattern = opts$pattern, x = string, @@ -480,7 +480,7 @@ arrow_r_string_replace_function <- function(FUN, max_replacements) { arrow_stringr_string_replace_function <- function(FUN, max_replacements) { function(string, pattern, replacement) { - opts <- get_stringr_pattern_options(pattern) + opts <- get_stringr_pattern_options(enexpr(pattern)) arrow_r_string_replace_function(FUN, max_replacements)( pattern = opts$pattern, replacement = replacement, @@ -524,7 +524,8 @@ format_string_replacement <- function(replacement, ignore.case, fixed) { } # this function assigns definitions for the stringr pattern modifier functions -# functions (fixed, regex, etc.) in itself, and uses them to evaluate `pattern` +# (fixed, regex, etc.) in itself, and uses them to evaluate the quoted +# expression `pattern` get_stringr_pattern_options <- function(pattern) { fixed <- function(pattern, ignore_case = FALSE, ...) { check_dots(...) @@ -560,7 +561,7 @@ get_stringr_pattern_options <- function(pattern) { } opts } - ensure_opts(eval(enexpr(pattern))) + ensure_opts(eval(pattern)) } # We'll populate these at package load time. From 191aa2f9131fdac04bc3181dd9fb538da41fd84f Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 6 Apr 2021 22:24:44 -0400 Subject: [PATCH 7/7] Add tests --- .../testthat/test-dplyr-string-functions.R | 166 ++++++++++++++++-- 1 file changed, 155 insertions(+), 11 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index adbff443e2e..5faf2436f55 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -20,9 +20,18 @@ skip_if_not_available("utf8proc") library(dplyr) library(stringr) -test_that("sub and gsub with ignore.case = FALSE and fixed = TRUE", { +test_that("grepl with ignore.case = FALSE and fixed = TRUE", { df <- tibble(x = c("Foo", "bar")) + expect_dplyr_equal( + input %>% + filter(grepl("o", x, fixed = TRUE)) %>% + collect(), + df + ) +}) +test_that("sub and gsub with ignore.case = FALSE and fixed = TRUE", { + df <- tibble(x = c("Foo", "bar")) expect_dplyr_equal( input %>% transmute(x = sub("Foo", "baz", x, fixed = TRUE)) %>% @@ -35,12 +44,109 @@ test_that("sub and gsub with ignore.case = FALSE and fixed = TRUE", { collect(), df ) - }) # many of the remainder of these tests require RE2 skip_if_not_available("re2") +test_that("grepl", { + df <- tibble(x = c("Foo", "bar")) + + for(fixed in c(TRUE, FALSE)) { + + expect_dplyr_equal( + input %>% + filter(grepl("Foo", x, fixed = fixed)) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(x = grepl("^B.+", x, ignore.case = FALSE, fixed = fixed)) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + filter(grepl("Foo", x, ignore.case = FALSE, fixed = fixed)) %>% + collect(), + df + ) + + } + +}) + +test_that("grepl with ignore.case = TRUE and fixed = TRUE", { + df <- tibble(x = c("Foo", "bar")) + + # base::grepl() ignores ignore.case = TRUE with a warning when fixed = TRUE, + # so we can't use expect_dplyr_equal() for these tests + expect_equal( + df %>% + Table$create() %>% + filter(grepl("O", x, ignore.case = TRUE, fixed = TRUE)) %>% + collect(), + tibble(x = "Foo") + ) + expect_equal( + df %>% + Table$create() %>% + filter(x = grepl("^B.+", x, ignore.case = TRUE, fixed = TRUE)) %>% + collect(), + tibble(x = character(0)) + ) + +}) + +test_that("str_detect", { + df <- tibble(x = c("Foo", "bar")) + + expect_dplyr_equal( + input %>% + filter(str_detect(x, regex("^F"))) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE), negate = TRUE)) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + filter(str_detect(x, fixed("o"))) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + filter(str_detect(x, fixed("O"))) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + filter(str_detect(x, fixed("O", ignore_case = TRUE))) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + filter(str_detect(x, fixed("O", ignore_case = TRUE), negate = TRUE)) %>% + collect(), + df + ) + +}) + test_that("sub and gsub", { df <- tibble(x = c("Foo", "bar")) @@ -100,8 +206,6 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", { test_that("str_replace and str_replace_all", { df <- tibble(x = c("Foo", "bar")) - library(stringr) - expect_dplyr_equal( input %>% transmute(x = str_replace_all(x, regex("^F"), "baz")) %>% @@ -135,7 +239,19 @@ test_that("str_replace and str_replace_all", { }) -test_that("backreferences", { +test_that("backreferences in pattern", { + skip("RE2 does not support backreferences in pattern (https://github.com/google/re2/issues/101)") + df <- tibble(x = c("Foo", "bar")) + + expect_dplyr_equal( + input %>% + filter(str_detect(x, regex("F([aeiou])\\1"))) %>% + collect(), + df + ) +}) + +test_that("backreferences (substitutions) in replacement", { df <- tibble(x = c("Foo", "bar")) expect_dplyr_equal( @@ -155,14 +271,27 @@ test_that("backreferences", { collect(), df ) + expect_dplyr_equal( + input %>% + transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>% + collect(), + df + ) }) test_that("edge cases", { - # in case-insensitive fixed replace, test that "\\E" in the search string and - # backslashes in the replacement string are interpreted literally. - # this test does not use expect_dplyr_equal() because base::sub() does not - # support ignore.case = TRUE when fixed = TRUE. + # in case-insensitive fixed match/replace, test that "\\E" in the search + # string and backslashes in the replacement string are interpreted literally. + # this test does not use expect_dplyr_equal() because base::sub() and + # base::grepl() do not support ignore.case = TRUE when fixed = TRUE. + expect_equal( + tibble(x = c("\\Q\\e\\D")) %>% + Table$create() %>% + filter(grepl("\\E", x, ignore.case = TRUE, fixed = TRUE)) %>% + collect(), + tibble(x = c("\\Q\\e\\D")) + ) expect_equal( tibble(x = c("\\Q\\e\\D")) %>% Table$create() %>% @@ -172,7 +301,13 @@ test_that("edge cases", { ) # test that a user's "(?i)" prefix does not break the "(?i)" prefix that's - # added in case-insensitive regex replace + # added in case-insensitive regex match/replace + expect_dplyr_equal( + input %>% + filter(grepl("(?i)^[abc]{3}$", x, ignore.case = TRUE, fixed = FALSE)) %>% + collect(), + tibble(x = c("ABC")) + ) expect_dplyr_equal( input %>% transmute(x = sub("(?i)^[abc]{3}$", "123", x, ignore.case = TRUE, fixed = FALSE)) %>% @@ -185,8 +320,15 @@ test_that("edge cases", { test_that("errors and warnings", { df <- tibble(x = c("Foo", "bar")) - # This condition generates an error, but abandon_ship() catches the error, + # These conditions generate an error, but abandon_ship() catches the error, # issues a warning, and pulls the data into R + expect_warning( + df %>% + Table$create() %>% + filter(str_detect(x, boundary(type = "character"))) %>% + collect(), + "not implemented" + ) expect_warning( df %>% Table$create() %>% @@ -194,6 +336,8 @@ test_that("errors and warnings", { collect(), "not supported" ) + + # This condition generates a warning expect_warning( df %>% Table$create() %>%