From c2db8a353184a59d3f2fe35e83a79cb26fb2b35e Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 17 Jun 2021 01:33:23 -0400 Subject: [PATCH 01/14] Add NullHandlingBehavior enum --- r/NAMESPACE | 1 + r/R/enums.R | 6 ++++++ r/man/enums.Rd | 5 +++++ 3 files changed, 12 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index f298ba905ee..ab45aa9985e 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -153,6 +153,7 @@ export(MessageReader) export(MessageType) export(MetadataVersion) export(NullEncodingBehavior) +export(NullHandlingBehavior) export(ParquetArrowReaderProperties) export(ParquetFileFormat) export(ParquetFileReader) diff --git a/r/R/enums.R b/r/R/enums.R index 4271f2ad138..8a5bf7366a9 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -140,3 +140,9 @@ QuantileInterpolation <- enum("QuantileInterpolation", NullEncodingBehavior <- enum("NullEncodingBehavior", ENCODE = 0L, MASK = 1L ) + +#' @export +#' @rdname enums +NullHandlingBehavior <- enum("NullHandlingBehavior", + EMIT_NULL = 0L, SKIP = 1L, REPLACE = 2L +) diff --git a/r/man/enums.Rd b/r/man/enums.Rd index b871516def8..57ec3ba115e 100644 --- a/r/man/enums.Rd +++ b/r/man/enums.Rd @@ -15,6 +15,7 @@ \alias{MetadataVersion} \alias{QuantileInterpolation} \alias{NullEncodingBehavior} +\alias{NullHandlingBehavior} \title{Arrow enums} \format{ An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4. @@ -40,6 +41,8 @@ An object of class \code{MetadataVersion} (inherits from \code{arrow-enum}) of l An object of class \code{QuantileInterpolation} (inherits from \code{arrow-enum}) of length 5. An object of class \code{NullEncodingBehavior} (inherits from \code{arrow-enum}) of length 2. + +An object of class \code{NullHandlingBehavior} (inherits from \code{arrow-enum}) of length 3. } \usage{ TimeUnit @@ -65,6 +68,8 @@ MetadataVersion QuantileInterpolation NullEncodingBehavior + +NullHandlingBehavior } \description{ Arrow enums From 664084dedaab8247fe83e3b301dce3a95de51cc1 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 17 Jun 2021 02:04:35 -0400 Subject: [PATCH 02/14] Handle options --- r/src/compute.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index eab9db54134..3d322ab6c71 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -218,6 +218,20 @@ std::shared_ptr make_compute_options( return make_cast_options(options); } + if (func_name == "binary_join_element_wise") { + using Options = arrow::compute::JoinOptions; + auto out = std::make_shared(Options::Defaults()); + if (!Rf_isNull(options["null_handling"])) { + out->null_handling = + cpp11::as_cpp( + options["null_handling"]); + } + if (!Rf_isNull(options["null_replacement"])) { + out->null_replacement = cpp11::as_cpp(options["null_replacement"]); + } + return out; + } + if (func_name == "match_substring" || func_name == "match_substring_regex") { using Options = arrow::compute::MatchSubstringOptions; bool ignore_case = false; From e2983af5322b3c60c1c52c746670b1b25e434370 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 17 Jun 2021 02:04:51 -0400 Subject: [PATCH 03/14] Implement paste, paste0, str_c --- r/R/dplyr-functions.R | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 91d1b21ad88..821f867136c 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -215,6 +215,49 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) { } } +nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { + assert_that(is.null(collapse)) + arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep) +} + +nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) { + assert_that(is.null(collapse)) + arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "") +} + +nse_funcs$str_c <- function(..., sep = "", collapse = NULL) { + assert_that(is.null(collapse)) + arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep) +} + +arrow_string_join_function <- function(null_handling, null_replacement = NULL) { + # the binary_join_element_wise kernel takes the separator as the last argument + function(...) { + dots <- list(...) # sep is the last value in dots + for(i in seq_along(dots)) { + # handle scalar literal args, and cast all args to string for + # consistency with base::paste(), base::paste0(), and stringr::str_c() + if (!inherits(dots[[i]], "Expression")) { + assert_that(length(dots[[i]]) == 1) + # handle scalar literal NA consistent with the binary_join_element_wise + # kernel's handling of nulls in the data + if (null_handling == NullHandlingBehavior$REPLACE && is.na(dots[[i]])) { + dots[[i]] <- null_replacement + } + dots[[i]] <- Expression$scalar(as.character(dots[[i]])) + } else { + dots[[i]] <- nse_funcs$as.character(dots[[i]]) + } + } + args <- c(function_name = "binary_join_element_wise", dots) + args$options <- list( + null_handling = null_handling, + null_replacement = null_replacement + ) + do.call(Expression$create, args) + } +} + nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) { side <- match.arg(side) trim_fun <- switch(side, From 655f1a081b5340cb45d8f54636bddcdfe2698b38 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 17 Jun 2021 02:10:44 -0400 Subject: [PATCH 04/14] Add missing space --- r/R/dplyr-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 821f867136c..083d6e968ab 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -234,7 +234,7 @@ arrow_string_join_function <- function(null_handling, null_replacement = NULL) { # the binary_join_element_wise kernel takes the separator as the last argument function(...) { dots <- list(...) # sep is the last value in dots - for(i in seq_along(dots)) { + for (i in seq_along(dots)) { # handle scalar literal args, and cast all args to string for # consistency with base::paste(), base::paste0(), and stringr::str_c() if (!inherits(dots[[i]], "Expression")) { From f7567862be5d52da06984eba516fb6f9b91ef275 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 17 Jun 2021 18:06:26 -0400 Subject: [PATCH 05/14] Improve assertion message --- r/R/dplyr-functions.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 083d6e968ab..7faebed4f06 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -238,7 +238,10 @@ arrow_string_join_function <- function(null_handling, null_replacement = NULL) { # handle scalar literal args, and cast all args to string for # consistency with base::paste(), base::paste0(), and stringr::str_c() if (!inherits(dots[[i]], "Expression")) { - assert_that(length(dots[[i]]) == 1) + assert_that( + length(dots[[i]]) == 1, + msg = "Literal vectors of length != 1 not supported in string concatenation" + ) # handle scalar literal NA consistent with the binary_join_element_wise # kernel's handling of nulls in the data if (null_handling == NullHandlingBehavior$REPLACE && is.na(dots[[i]])) { From 822b2a0de32840ee00e241ae9436d607a5b6fc20 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 17 Jun 2021 18:08:28 -0400 Subject: [PATCH 06/14] Add tests --- .../testthat/test-dplyr-string-functions.R | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index ea27aa14777..5e9b822f211 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -21,6 +21,152 @@ skip_if_not_available("utf8proc") library(dplyr) library(stringr) +test_that("paste, paste0, and str_c", { + df <- tibble( + v = c("α", "β", "γ"), + w = c("a", "b", "c"), + x = c("d", NA_character_, "f"), + y = c(NA_character_, "h", "i"), + z = c(1.1, 2.2, NA) + ) + + # non-ASCII characters + expect_dplyr_equal( + input %>% + transmute(paste(v, w)) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(paste(v, w, sep = "፨")) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(str_c(x, y, sep = "〷")) %>% + collect(), + df + ) + + # NAs in data + expect_dplyr_equal( + input %>% + transmute(paste(x, y)) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(paste(x, y, sep = "-")) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(str_c(x, y)) %>% + collect(), + df + ) + + # non-character column in dots + expect_dplyr_equal( + input %>% + transmute(paste0(x, y, z)) %>% + collect(), + df + ) + + # literal string in dots + expect_dplyr_equal( + input %>% + transmute(paste(x, "foo", y)) %>% + collect(), + df + ) + + # expressions in dots + expect_dplyr_equal( + input %>% + transmute(paste0(x, toupper(y), as.character(z))) %>% + collect(), + df + ) + + # sep passed in dots to paste0 (which doesn't take a sep argument) + expect_dplyr_equal( + input %>% + transmute(paste0(x, y, sep = "-")) %>% + collect(), + df + ) + + # sep is a column reference + expect_dplyr_equal( + input %>% + transmute(paste(x, y, sep = w)) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(str_c(x, y, sep = w)) %>% + collect(), + df + ) + + # known differences + + # arrow allows the separator to be an array + expect_equal( + df %>% + Table$create() %>% + transmute(result = paste(x, y, sep = w)) %>% + collect(), + df %>% + transmute(result = paste(x, w, y, sep = "")) + ) + + # arrow allows the separator to be scalar literal NA + expect_equal( + df %>% + Table$create() %>% + transmute(result = paste(x, y, sep = NA_character_)) %>% + collect(), + df %>% + transmute(result = paste(x, NA_character_, y, sep = "")) + ) + + # known errors + + # collapse argument not supported + x <- Expression$field_ref("x") + y <- Expression$field_ref("x") + expect_failure( + nse_funcs$paste(x, y, collapse = ""), + "collapse" + ) + expect_failure( + nse_funcs$paste0(x, y, collapse = ""), + "collapse" + ) + expect_failure( + nse_funcs$str_c(x, y, collapse = ""), + "collapse" + ) + + # literal vectors of length != 1 not supported + expect_failure( + nse_funcs$paste(x, character(0), y), + "Literal vectors of length != 1 not supported in string concatenation" + ) + expect_failure( + nse_funcs$paste(x, c(",", ";"), y), + "Literal vectors of length != 1 not supported in string concatenation" + ) +}) + test_that("grepl with ignore.case = FALSE and fixed = TRUE", { df <- tibble(x = c("Foo", "bar")) expect_dplyr_equal( From 404ca602ed5264347bdb60da925be41b57a5b9e7 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 17 Jun 2021 18:18:25 -0400 Subject: [PATCH 07/14] Fix test errors --- .../testthat/test-dplyr-string-functions.R | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 5e9b822f211..6c68c2c2311 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -102,20 +102,6 @@ test_that("paste, paste0, and str_c", { df ) - # sep is a column reference - expect_dplyr_equal( - input %>% - transmute(paste(x, y, sep = w)) %>% - collect(), - df - ) - expect_dplyr_equal( - input %>% - transmute(str_c(x, y, sep = w)) %>% - collect(), - df - ) - # known differences # arrow allows the separator to be an array @@ -138,30 +124,30 @@ test_that("paste, paste0, and str_c", { transmute(result = paste(x, NA_character_, y, sep = "")) ) - # known errors + # expected errors # collapse argument not supported x <- Expression$field_ref("x") y <- Expression$field_ref("x") - expect_failure( + expect_error( nse_funcs$paste(x, y, collapse = ""), "collapse" ) - expect_failure( + expect_error( nse_funcs$paste0(x, y, collapse = ""), "collapse" ) - expect_failure( + expect_error( nse_funcs$str_c(x, y, collapse = ""), "collapse" ) # literal vectors of length != 1 not supported - expect_failure( + expect_error( nse_funcs$paste(x, character(0), y), "Literal vectors of length != 1 not supported in string concatenation" ) - expect_failure( + expect_error( nse_funcs$paste(x, c(",", ";"), y), "Literal vectors of length != 1 not supported in string concatenation" ) From 209d47aaec3443066bf27dfa5ea86e848d4e435a Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 17 Jun 2021 18:21:10 -0400 Subject: [PATCH 08/14] Fix typo in test --- r/tests/testthat/test-dplyr-string-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 6c68c2c2311..24ee51755bb 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -45,7 +45,7 @@ test_that("paste, paste0, and str_c", { ) expect_dplyr_equal( input %>% - transmute(str_c(x, y, sep = "〷")) %>% + transmute(str_c(v, w, sep = "〷")) %>% collect(), df ) From f21fdfbdd27358f13e1a2ed374b9a12c85516e4c Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 18 Jun 2021 09:29:25 -0400 Subject: [PATCH 09/14] Fix test failures --- r/tests/testthat/test-dplyr-string-functions.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 24ee51755bb..a06b0b39cc7 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -30,7 +30,7 @@ test_that("paste, paste0, and str_c", { z = c(1.1, 2.2, NA) ) - # non-ASCII characters + # no NAs in data expect_dplyr_equal( input %>% transmute(paste(v, w)) %>% @@ -39,13 +39,25 @@ test_that("paste, paste0, and str_c", { ) expect_dplyr_equal( input %>% - transmute(paste(v, w, sep = "፨")) %>% + transmute(paste(v, w, sep = "-")) %>% collect(), df ) expect_dplyr_equal( input %>% - transmute(str_c(v, w, sep = "〷")) %>% + transmute(paste0(v, w)) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(str_c(v, w)) %>% + collect(), + df + ) + expect_dplyr_equal( + input %>% + transmute(str_c(v, w, sep = "+")) %>% collect(), df ) From 9b82c192489ec7e7b6c96928811a5d1f5d895a49 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 18 Jun 2021 09:55:48 -0400 Subject: [PATCH 10/14] Improve arrow_string_join_function --- r/R/dplyr-functions.R | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 7faebed4f06..78f83ef9d03 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -231,33 +231,35 @@ nse_funcs$str_c <- function(..., sep = "", collapse = NULL) { } arrow_string_join_function <- function(null_handling, null_replacement = NULL) { - # the binary_join_element_wise kernel takes the separator as the last argument + # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator + # as the last argument, so pass `sep` as the last dots arg to this function function(...) { - dots <- list(...) # sep is the last value in dots - for (i in seq_along(dots)) { + args <- lapply(list(...), function(arg) { # handle scalar literal args, and cast all args to string for # consistency with base::paste(), base::paste0(), and stringr::str_c() - if (!inherits(dots[[i]], "Expression")) { + if (!inherits(arg, "Expression")) { assert_that( - length(dots[[i]]) == 1, + length(arg) == 1, msg = "Literal vectors of length != 1 not supported in string concatenation" ) # handle scalar literal NA consistent with the binary_join_element_wise # kernel's handling of nulls in the data - if (null_handling == NullHandlingBehavior$REPLACE && is.na(dots[[i]])) { - dots[[i]] <- null_replacement + if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) { + arg <- null_replacement } - dots[[i]] <- Expression$scalar(as.character(dots[[i]])) + Expression$scalar(as.character(arg)) } else { - dots[[i]] <- nse_funcs$as.character(dots[[i]]) + nse_funcs$as.character(arg) } - } - args <- c(function_name = "binary_join_element_wise", dots) - args$options <- list( - null_handling = null_handling, - null_replacement = null_replacement + }) + Expression$create( + "binary_join_element_wise", + args = args, + options = list( + null_handling = null_handling, + null_replacement = null_replacement + ) ) - do.call(Expression$create, args) } } From 9dda54a6d076eff2b11c0544c283021bc1a2f0ff Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 18 Jun 2021 11:37:54 -0400 Subject: [PATCH 11/14] Apply patch to fix null handling --- cpp/src/arrow/compute/kernels/scalar_string.cc | 10 +++++++--- r/R/dplyr-functions.R | 5 ----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index 3f63bf2c405..7fe1fe0926c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -3587,10 +3587,14 @@ void AddBinaryJoin(FunctionRegistry* registry) { "binary_join_element_wise", Arity::VarArgs(/*min_args=*/1), &binary_join_element_wise_doc, &kDefaultJoinOptions); for (const auto& ty : BaseBinaryTypes()) { - DCHECK_OK( - func->AddKernel({InputType(ty)}, ty, + ScalarKernel kernel{KernelSignature::Make({InputType(ty)}, ty, /*is_varargs=*/true), GenerateTypeAgnosticVarBinaryBase(ty), - BinaryJoinElementWiseState::Init)); + BinaryJoinElementWiseState::Init}; + // This is redundant but expression simplification uses this to potentially replace + // calls with null + kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; + kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; + DCHECK_OK(func->AddKernel(std::move(kernel))); } DCHECK_OK(registry->AddFunction(std::move(func))); } diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 78f83ef9d03..bb21ee3f115 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -242,11 +242,6 @@ arrow_string_join_function <- function(null_handling, null_replacement = NULL) { length(arg) == 1, msg = "Literal vectors of length != 1 not supported in string concatenation" ) - # handle scalar literal NA consistent with the binary_join_element_wise - # kernel's handling of nulls in the data - if (null_handling == NullHandlingBehavior$REPLACE && is.na(arg)) { - arg <- null_replacement - } Expression$scalar(as.character(arg)) } else { nse_funcs$as.character(arg) From 77d4aae69ae06e6ef19a2219da123d9fdd4bb185 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 18 Jun 2021 12:41:06 -0400 Subject: [PATCH 12/14] Improve handling of literal NA separator and update tests --- r/R/dplyr-functions.R | 3 ++ .../testthat/test-dplyr-string-functions.R | 36 ++++++++++++------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index bb21ee3f115..51520f4937e 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -217,6 +217,9 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) { nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { assert_that(is.null(collapse)) + if (!inherits(sep, "Expression")) { + assert_that(!is.na(sep), msg = "Invalid separator") + } arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep) } diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index a06b0b39cc7..63ffb79bc65 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -29,6 +29,8 @@ test_that("paste, paste0, and str_c", { y = c(NA_character_, "h", "i"), z = c(1.1, 2.2, NA) ) + x <- Expression$field_ref("x") + y <- Expression$field_ref("y") # no NAs in data expect_dplyr_equal( @@ -98,6 +100,14 @@ test_that("paste, paste0, and str_c", { df ) + # literal NA in dots + expect_dplyr_equal( + input %>% + transmute(paste(x, NA, y)) %>% + collect(), + df + ) + # expressions in dots expect_dplyr_equal( input %>% @@ -106,6 +116,20 @@ test_that("paste, paste0, and str_c", { df ) + # sep is literal NA + # errors in paste() (consistent with base::paste()) + expect_error( + nse_funcs$paste(x, y, sep = NA_character_), + "Invalid separator" + ) + # emits null in str_c() (consistent with stringr::str_c()) + expect_dplyr_equal( + input %>% + transmute(str_c(x, y, sep = NA_character_)) %>% + collect(), + df + ) + # sep passed in dots to paste0 (which doesn't take a sep argument) expect_dplyr_equal( input %>% @@ -126,21 +150,9 @@ test_that("paste, paste0, and str_c", { transmute(result = paste(x, w, y, sep = "")) ) - # arrow allows the separator to be scalar literal NA - expect_equal( - df %>% - Table$create() %>% - transmute(result = paste(x, y, sep = NA_character_)) %>% - collect(), - df %>% - transmute(result = paste(x, NA_character_, y, sep = "")) - ) - # expected errors # collapse argument not supported - x <- Expression$field_ref("x") - y <- Expression$field_ref("x") expect_error( nse_funcs$paste(x, y, collapse = ""), "collapse" From 1f24ffdedd633bd388d04ab8dca930f54ca59eef Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 18 Jun 2021 17:14:41 -0400 Subject: [PATCH 13/14] Fix misleading comment Co-authored-by: David Li --- cpp/src/arrow/compute/kernels/scalar_string.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index 7fe1fe0926c..dbacb6bb96f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -3590,8 +3590,6 @@ void AddBinaryJoin(FunctionRegistry* registry) { ScalarKernel kernel{KernelSignature::Make({InputType(ty)}, ty, /*is_varargs=*/true), GenerateTypeAgnosticVarBinaryBase(ty), BinaryJoinElementWiseState::Init}; - // This is redundant but expression simplification uses this to potentially replace - // calls with null kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; DCHECK_OK(func->AddKernel(std::move(kernel))); From c8a4ab23a831675f162788b4e59c32fc941b0f93 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 23 Jun 2021 12:55:27 -0400 Subject: [PATCH 14/14] Fix issues from review --- r/R/dplyr-functions.R | 15 ++++++++++++--- r/tests/testthat/test-dplyr-string-functions.R | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 51520f4937e..1cf6fabebee 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -216,7 +216,10 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) { } nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { - assert_that(is.null(collapse)) + assert_that( + is.null(collapse), + msg = "paste() with the collapse argument is not yet supported in Arrow" + ) if (!inherits(sep, "Expression")) { assert_that(!is.na(sep), msg = "Invalid separator") } @@ -224,12 +227,18 @@ nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { } nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) { - assert_that(is.null(collapse)) + assert_that( + is.null(collapse), + msg = "paste0() with the collapse argument is not yet supported in Arrow" + ) arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "") } nse_funcs$str_c <- function(..., sep = "", collapse = NULL) { - assert_that(is.null(collapse)) + assert_that( + is.null(collapse), + msg = "str_c() with the collapse argument is not yet supported in Arrow" + ) arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep) } diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 63ffb79bc65..4afb88e5732 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -23,7 +23,7 @@ library(stringr) test_that("paste, paste0, and str_c", { df <- tibble( - v = c("α", "β", "γ"), + v = c("A", "B", "C"), w = c("a", "b", "c"), x = c("d", NA_character_, "f"), y = c(NA_character_, "h", "i"),