From 2a5a6c26501e60d8d80d25ae8a81a32d30e38343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Sat, 14 May 2022 23:47:49 +0100 Subject: [PATCH 001/129] register 2 bindings (`"package_fun"` as a replacement for `package::fun` and `"fun"`) --- r/R/dplyr-funcs.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 95c1f69f4fb..a4bd08c15d2 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -58,7 +58,8 @@ NULL #' @keywords internal #' register_binding <- function(fun_name, fun, registry = nse_funcs) { - name <- gsub("^.*?::", "", fun_name) + # test name <- gsub("^.*?::", "", fun_name) + name <- gsub("^.*?_", "", fun_name) namespace <- gsub("::.*$", "", fun_name) previous_fun <- if (name %in% names(registry)) registry[[name]] else NULL @@ -67,6 +68,7 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { rm(list = name, envir = registry, inherits = FALSE) } else { registry[[name]] <- fun + registry[[fun_name]] <- fun } invisible(previous_fun) From b5bca42eca00565dbccb758f635b61822c0c33af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Sat, 14 May 2022 23:49:20 +0100 Subject: [PATCH 002/129] first try with `lubridate_as_datetime` and `lubridate_make_difftime` --- r/R/dplyr-funcs-datetime.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index df830a6b66f..5fea1da9e0d 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -344,7 +344,7 @@ register_bindings_datetime_conversion <- function() { ) }) - register_binding("as_datetime", function(x, + register_binding("lubridate_as_datetime", function(x, origin = "1970-01-01", tz = "UTC", format = NULL) { @@ -475,7 +475,7 @@ register_bindings_duration <- function() { } register_bindings_duration_constructor <- function() { - register_binding("make_difftime", function(num = NULL, + register_binding("lubridate_make_difftime", function(num = NULL, units = "secs", ...) { if (units != "secs") { From c6e701ba06757fb14274f7af88fb5c87e7f0a976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Sat, 14 May 2022 23:50:37 +0100 Subject: [PATCH 003/129] replace `::` in expressions with "_" --- r/R/dplyr-mutate.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index 653c1e6f25a..67cd370f310 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -25,6 +25,13 @@ mutate.arrow_dplyr_query <- function(.data, .after = NULL) { call <- match.call() exprs <- ensure_named_exprs(quos(...)) + exprs2 <- exprs + for (i in seq_along(exprs)) { + exprs[[i]][[2]] <- exprs[[i]][[2]] %>% + expr_text() %>% + gsub("::", "_", .) %>% + parse_expr() + } .keep <- match.arg(.keep) .before <- enquo(.before) @@ -57,7 +64,7 @@ mutate.arrow_dplyr_query <- function(.data, if (inherits(results[[new_var]], "try-error")) { msg <- handle_arrow_not_supported( results[[new_var]], - format_expr(exprs[[i]]) + format_expr(exprs2[[i]]) ) return(abandon_ship(call, .data, msg)) } else if (!inherits(results[[new_var]], "Expression") && From 81c17dee469ea587006aff4d3d117ff09471d32b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Sat, 14 May 2022 23:52:28 +0100 Subject: [PATCH 004/129] remove redundant pipeline and add tests with namespacing --- r/tests/testthat/test-dplyr-funcs-datetime.R | 25 +++++++++++--------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index ce804d1727f..27b6a2573c3 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1668,17 +1668,20 @@ test_that("`as_datetime()`", { double_date = c(10.1, 25.2, NA) ) - test_df %>% - arrow_table() %>% - mutate( - ddate = as_datetime(date), - dchar_date_no_tz = as_datetime(char_date), - dchar_date_non_iso = as_datetime(char_date_non_iso, format = "%Y-%d-%m %H:%M:%S"), - dint_date = as_datetime(int_date, origin = "1970-01-02"), - dintegerish_date = as_datetime(integerish_date, origin = "1970-01-02"), - dintegerish_date2 = as_datetime(integerish_date, origin = "1970-01-01") - ) %>% - collect() + compare_dplyr_binding( + .input %>% + mutate( + ddate = lubridate::as_datetime(date), + dchar_date_no_tz = lubridate::as_datetime(char_date), + dchar_date_with_tz = lubridate::as_datetime(char_date, tz = "Pacific/Marquesas"), + dint_date = lubridate::as_datetime(int_date, origin = "1970-01-02"), + dintegerish_date = lubridate::as_datetime(integerish_date, origin = "1970-01-02"), + dintegerish_date2 = lubridate::as_datetime(integerish_date, origin = "1970-01-01"), + .keep = "used" + ) %>% + collect(), + test_df + ) compare_dplyr_binding( .input %>% From 6ec372dfe454793648264763f598554850c4b252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 16 May 2022 09:56:23 +0100 Subject: [PATCH 005/129] no piping --- r/R/dplyr-mutate.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index 67cd370f310..6b5d64e6961 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -26,11 +26,10 @@ mutate.arrow_dplyr_query <- function(.data, call <- match.call() exprs <- ensure_named_exprs(quos(...)) exprs2 <- exprs + # replace `::` with `_` in passed expressions for (i in seq_along(exprs)) { - exprs[[i]][[2]] <- exprs[[i]][[2]] %>% - expr_text() %>% - gsub("::", "_", .) %>% - parse_expr() + exprs[[i]][[2]] <- gsub("::", "_", rlang::expr_text(exprs[[i]][[2]])) + exprs[[i]][[2]] <- parse_expr(exprs[[i]][[2]]) } .keep <- match.arg(.keep) From 36d91521e27fc941b59deaf7185b9c29f64bf8f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 16 May 2022 09:56:55 +0100 Subject: [PATCH 006/129] update `dplyr-funcs` unit test to reflect new naming convention --- r/tests/testthat/test-dplyr-funcs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index d96b4b2cf87..5233cd92639 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -19,10 +19,10 @@ test_that("register_binding() works", { fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL - expect_null(register_binding("some_fun", fun1, fake_registry)) + expect_null(register_binding("some.pkg_some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) - expect_identical(register_binding("some_fun", NULL, fake_registry), fun1) + expect_identical(register_binding("some.pkg_some_fun", NULL, fake_registry), fun1) expect_false("some_fun" %in% names(fake_registry)) expect_silent(expect_null(register_binding("some_fun", NULL, fake_registry))) From 668391335819c8136befe1c118724a1330ad9079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 16 May 2022 10:13:30 +0100 Subject: [PATCH 007/129] updated un-registering logic in `register_binding()` + unit tests to reflect the new naming convention --- r/R/dplyr-funcs.R | 5 ++--- r/tests/testthat/test-dplyr-funcs.R | 9 ++++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index a4bd08c15d2..45608187b6f 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -58,14 +58,13 @@ NULL #' @keywords internal #' register_binding <- function(fun_name, fun, registry = nse_funcs) { - # test name <- gsub("^.*?::", "", fun_name) name <- gsub("^.*?_", "", fun_name) namespace <- gsub("::.*$", "", fun_name) - previous_fun <- if (name %in% names(registry)) registry[[name]] else NULL + previous_fun <- if (name %in% names(registry)) registry[[fun_name]] else NULL if (is.null(fun) && !is.null(previous_fun)) { - rm(list = name, envir = registry, inherits = FALSE) + rm(list = c(name, fun_name), envir = registry, inherits = FALSE) } else { registry[[name]] <- fun registry[[fun_name]] <- fun diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index 5233cd92639..83b567c9ee9 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -21,12 +21,13 @@ test_that("register_binding() works", { expect_null(register_binding("some.pkg_some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) + expect_identical(fake_registry$some.pkg_some_fun, fun1) expect_identical(register_binding("some.pkg_some_fun", NULL, fake_registry), fun1) expect_false("some_fun" %in% names(fake_registry)) - expect_silent(expect_null(register_binding("some_fun", NULL, fake_registry))) + expect_silent(expect_null(register_binding("some.pkg_some_fun", NULL, fake_registry))) - expect_null(register_binding("some_pkg::some_fun", fun1, fake_registry)) + expect_null(register_binding("somePkg_some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) }) @@ -34,6 +35,8 @@ test_that("register_binding_agg() works", { fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL - expect_null(register_binding_agg("some_fun", fun1, fake_registry)) + expect_null(register_binding_agg("somePkg_some_fun", fun1, fake_registry)) + names(fake_registry) expect_identical(fake_registry$some_fun, fun1) + expect_identical(fake_registry$somePkg_some_fun, fun1) }) From 2421416632d80283d97af3855840d9c438d20599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 16:08:20 +0100 Subject: [PATCH 008/129] undo changes to `mutate` (no more syntax translation) --- r/R/dplyr-mutate.R | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index 6b5d64e6961..653c1e6f25a 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -25,12 +25,6 @@ mutate.arrow_dplyr_query <- function(.data, .after = NULL) { call <- match.call() exprs <- ensure_named_exprs(quos(...)) - exprs2 <- exprs - # replace `::` with `_` in passed expressions - for (i in seq_along(exprs)) { - exprs[[i]][[2]] <- gsub("::", "_", rlang::expr_text(exprs[[i]][[2]])) - exprs[[i]][[2]] <- parse_expr(exprs[[i]][[2]]) - } .keep <- match.arg(.keep) .before <- enquo(.before) @@ -63,7 +57,7 @@ mutate.arrow_dplyr_query <- function(.data, if (inherits(results[[new_var]], "try-error")) { msg <- handle_arrow_not_supported( results[[new_var]], - format_expr(exprs2[[i]]) + format_expr(exprs[[i]]) ) return(abandon_ship(call, .data, msg)) } else if (!inherits(results[[new_var]], "Expression") && From 4ca6192ae74aad747530594ba1724cace90f6304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 16:13:38 +0100 Subject: [PATCH 009/129] update `register_binding` logic + register the utils bindings --- r/R/dplyr-funcs.R | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 45608187b6f..371baab62e4 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -58,16 +58,22 @@ NULL #' @keywords internal #' register_binding <- function(fun_name, fun, registry = nse_funcs) { - name <- gsub("^.*?_", "", fun_name) - namespace <- gsub("::.*$", "", fun_name) + if (fun_name == "::") { + name <- "::" + } else { + name <- gsub("^.*?::", "", fun_name) + } - previous_fun <- if (name %in% names(registry)) registry[[fun_name]] else NULL + previous_fun <- if (name %in% names(registry)) registry[[name]] else NULL if (is.null(fun) && !is.null(previous_fun)) { rm(list = c(name, fun_name), envir = registry, inherits = FALSE) - } else { + # register both as pkg::fun and as fun if fun name is pkg::fun + } else if (grepl("::", fun_name) && fun_name != "::") { registry[[name]] <- fun registry[[fun_name]] <- fun + } else { + registry[[name]] <- fun } invisible(previous_fun) @@ -108,6 +114,7 @@ create_binding_cache <- function() { register_bindings_math() register_bindings_string() register_bindings_type() + register_bindings_utils() # We only create the cache for nse_funcs and not agg_funcs .cache$functions <- c(as.list(nse_funcs), arrow_funcs) From 0577107de174b3d18289f97b35a7b9d45b006e73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 16:14:13 +0100 Subject: [PATCH 010/129] add the `::` binding --- r/DESCRIPTION | 1 + r/R/dplyr-funcs-utils.R | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 r/R/dplyr-funcs-utils.R diff --git a/r/DESCRIPTION b/r/DESCRIPTION index a7408d27d65..928955f1d1d 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -103,6 +103,7 @@ Collate: 'dplyr-funcs-math.R' 'dplyr-funcs-string.R' 'dplyr-funcs-type.R' + 'dplyr-funcs-utils.R' 'expression.R' 'dplyr-funcs.R' 'dplyr-glimpse.R' diff --git a/r/R/dplyr-funcs-utils.R b/r/R/dplyr-funcs-utils.R new file mode 100644 index 00000000000..fd0b93d851d --- /dev/null +++ b/r/R/dplyr-funcs-utils.R @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +register_bindings_utils <- function() { + register_binding("::", function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) + arrow:::nse_funcs[[paste0(lhs_name, "::", rhs_name)]] + }) +} From a5f55076bf609440dc4a2b7ec747e79ba5b6991c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 16:17:42 +0100 Subject: [PATCH 011/129] replace `_` with `::` for `as_datetime` and `make_difftime` --- r/R/dplyr-funcs-datetime.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 5fea1da9e0d..c4ee5bb76aa 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -344,7 +344,7 @@ register_bindings_datetime_conversion <- function() { ) }) - register_binding("lubridate_as_datetime", function(x, + register_binding("lubridate::as_datetime", function(x, origin = "1970-01-01", tz = "UTC", format = NULL) { @@ -475,7 +475,7 @@ register_bindings_duration <- function() { } register_bindings_duration_constructor <- function() { - register_binding("lubridate_make_difftime", function(num = NULL, + register_binding("lubridate::make_difftime", function(num = NULL, units = "secs", ...) { if (units != "secs") { From a0be5f9e71427a92bbf46e0142dda7fd4ca03be4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 16:24:05 +0100 Subject: [PATCH 012/129] update `register_binding()` tests --- r/tests/testthat/test-dplyr-funcs.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index 83b567c9ee9..8fd13b98e21 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -19,15 +19,15 @@ test_that("register_binding() works", { fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL - expect_null(register_binding("some.pkg_some_fun", fun1, fake_registry)) + expect_null(register_binding("some.pkg::some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) - expect_identical(fake_registry$some.pkg_some_fun, fun1) + expect_identical(fake_registry$`some.pkg::some_fun`, fun1) - expect_identical(register_binding("some.pkg_some_fun", NULL, fake_registry), fun1) + expect_identical(register_binding("some.pkg::some_fun", NULL, fake_registry), fun1) expect_false("some_fun" %in% names(fake_registry)) - expect_silent(expect_null(register_binding("some.pkg_some_fun", NULL, fake_registry))) + expect_silent(expect_null(register_binding("some.pkg::some_fun", NULL, fake_registry))) - expect_null(register_binding("somePkg_some_fun", fun1, fake_registry)) + expect_null(register_binding("somePkg::some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) }) @@ -35,8 +35,8 @@ test_that("register_binding_agg() works", { fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL - expect_null(register_binding_agg("somePkg_some_fun", fun1, fake_registry)) + expect_null(register_binding_agg("somePkg::some_fun", fun1, fake_registry)) names(fake_registry) expect_identical(fake_registry$some_fun, fun1) - expect_identical(fake_registry$somePkg_some_fun, fun1) + expect_identical(fake_registry$`somePkg::some_fun`, fun1) }) From fbc633e97746226aa42cc7e0672509e1080de895 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 16:24:58 +0100 Subject: [PATCH 013/129] update comment --- r/R/dplyr-funcs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 371baab62e4..f897729b38b 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -68,7 +68,7 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { if (is.null(fun) && !is.null(previous_fun)) { rm(list = c(name, fun_name), envir = registry, inherits = FALSE) - # register both as pkg::fun and as fun if fun name is pkg::fun + # register both as `pkg::fun` and as `fun` if `fun_name` is prefixed } else if (grepl("::", fun_name) && fun_name != "::") { registry[[name]] <- fun registry[[fun_name]] <- fun From 380ac60d37464538a2d81a57125b877e147cd991 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 21:30:15 +0100 Subject: [PATCH 014/129] moved `register_bindings_utils` in `dplyr-funcs.R` + deleted `dplyr-funcs-utils.R` --- r/DESCRIPTION | 1 - r/R/dplyr-funcs-utils.R | 24 ------------------------ r/R/dplyr-funcs.R | 8 ++++++++ 3 files changed, 8 insertions(+), 25 deletions(-) delete mode 100644 r/R/dplyr-funcs-utils.R diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 928955f1d1d..a7408d27d65 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -103,7 +103,6 @@ Collate: 'dplyr-funcs-math.R' 'dplyr-funcs-string.R' 'dplyr-funcs-type.R' - 'dplyr-funcs-utils.R' 'expression.R' 'dplyr-funcs.R' 'dplyr-glimpse.R' diff --git a/r/R/dplyr-funcs-utils.R b/r/R/dplyr-funcs-utils.R deleted file mode 100644 index fd0b93d851d..00000000000 --- a/r/R/dplyr-funcs-utils.R +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -register_bindings_utils <- function() { - register_binding("::", function(lhs, rhs) { - lhs_name <- as.character(substitute(lhs)) - rhs_name <- as.character(substitute(rhs)) - arrow:::nse_funcs[[paste0(lhs_name, "::", rhs_name)]] - }) -} diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index f897729b38b..a46ae2ce40a 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -124,3 +124,11 @@ create_binding_cache <- function() { nse_funcs <- new.env(parent = emptyenv()) agg_funcs <- new.env(parent = emptyenv()) .cache <- new.env(parent = emptyenv()) + +register_bindings_utils <- function() { + register_binding("::", function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) + arrow:::nse_funcs[[paste0(lhs_name, "::", rhs_name)]] + }) +} From 0a86888649a6c260e258d8b30bc7f49ffea96f9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 21:44:57 +0100 Subject: [PATCH 015/129] qualifying `string()` with `arrow::` no longer needed and typos (`if64` instead of `i64`) --- r/tests/testthat/test-dplyr-funcs-type.R | 69 ++++++++++++------------ 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index b32fe8f7f88..539dc98f8ba 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -246,40 +246,41 @@ test_that("type checks with is() giving Arrow types", { dec256 = Array$create(pi)$cast(decimal256(3, 2)), f64 = Array$create(1.1, float64()), str = Array$create("a", arrow::string()) - ) %>% transmute( - i32_is_i32 = is(i32, int32()), - i32_is_dec = is(i32, decimal(3, 2)), - i32_is_dec128 = is(i32, decimal128(3, 2)), - i32_is_dec256 = is(i32, decimal256(3, 2)), - i32_is_i64 = is(i32, float64()), - i32_is_str = is(i32, arrow::string()), - dec_is_i32 = is(dec, int32()), - dec_is_dec = is(dec, decimal(3, 2)), - dec_is_dec128 = is(dec, decimal128(3, 2)), - dec_is_dec256 = is(dec, decimal256(3, 2)), - dec_is_i64 = is(dec, float64()), - dec_is_str = is(dec, arrow::string()), - dec128_is_i32 = is(dec128, int32()), - dec128_is_dec128 = is(dec128, decimal128(3, 2)), - dec128_is_dec256 = is(dec128, decimal256(3, 2)), - dec128_is_i64 = is(dec128, float64()), - dec128_is_str = is(dec128, arrow::string()), - dec256_is_i32 = is(dec128, int32()), - dec256_is_dec128 = is(dec128, decimal128(3, 2)), - dec256_is_dec256 = is(dec128, decimal256(3, 2)), - dec256_is_i64 = is(dec128, float64()), - dec256_is_str = is(dec128, arrow::string()), - f64_is_i32 = is(f64, int32()), - f64_is_dec = is(f64, decimal(3, 2)), - f64_is_dec128 = is(f64, decimal128(3, 2)), - f64_is_dec256 = is(f64, decimal256(3, 2)), - f64_is_i64 = is(f64, float64()), - f64_is_str = is(f64, arrow::string()), - str_is_i32 = is(str, int32()), - str_is_dec128 = is(str, decimal128(3, 2)), - str_is_dec256 = is(str, decimal256(3, 2)), - str_is_i64 = is(str, float64()), - str_is_str = is(str, arrow::string()) + ) %>% + transmute( + i32_is_i32 = is(i32, int32()), + i32_is_dec = is(i32, decimal(3, 2)), + i32_is_dec128 = is(i32, decimal128(3, 2)), + i32_is_dec256 = is(i32, decimal256(3, 2)), + i32_is_f64 = is(i32, float64()), + i32_is_str = is(i32, string()), + dec_is_i32 = is(dec, int32()), + dec_is_dec = is(dec, decimal(3, 2)), + dec_is_dec128 = is(dec, decimal128(3, 2)), + dec_is_dec256 = is(dec, decimal256(3, 2)), + dec_is_f64 = is(dec, float64()), + dec_is_str = is(dec, string()), + dec128_is_i32 = is(dec128, int32()), + dec128_is_dec128 = is(dec128, decimal128(3, 2)), + dec128_is_dec256 = is(dec128, decimal256(3, 2)), + dec128_is_f64 = is(dec128, float64()), + dec128_is_str = is(dec128, string()), + dec256_is_i32 = is(dec128, int32()), + dec256_is_dec128 = is(dec128, decimal128(3, 2)), + dec256_is_dec256 = is(dec128, decimal256(3, 2)), + dec256_is_f64 = is(dec128, float64()), + dec256_is_str = is(dec128, string()), + f64_is_i32 = is(f64, int32()), + f64_is_dec = is(f64, decimal(3, 2)), + f64_is_dec128 = is(f64, decimal128(3, 2)), + f64_is_dec256 = is(f64, decimal256(3, 2)), + f64_is_f64 = is(f64, float64()), + f64_is_str = is(f64, string()), + str_is_i32 = is(str, int32()), + str_is_dec128 = is(str, decimal128(3, 2)), + str_is_dec256 = is(str, decimal256(3, 2)), + str_is_i64 = is(str, float64()), + str_is_str = is(str, string()) ) %>% collect() %>% t() %>% From a59c3ddea7f1105ab8391ac004eaf27f13ba3fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 17 May 2022 22:38:15 +0100 Subject: [PATCH 016/129] replace `lubridate::ymd_hms()` with `as.POSIXct()` in the filtering tests since we do not have the `ymd_hms` binding yet --- r/tests/testthat/test-dataset-dplyr.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R index fecda56c6c2..ed3a20ff8a2 100644 --- a/r/tests/testthat/test-dataset-dplyr.R +++ b/r/tests/testthat/test-dataset-dplyr.R @@ -73,7 +73,7 @@ test_that("filter() on timestamp columns", { ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equal( ds %>% - filter(ts >= lubridate::ymd_hms("2015-05-04 03:12:39")) %>% + filter(ts >= as.POSIXct("2015-05-04 03:12:39", tz = "UTC")) %>% filter(part == 1) %>% select(ts) %>% collect(), @@ -119,7 +119,7 @@ test_that("filter() on date32 columns", { # Also with timestamp scalar expect_equal( open_dataset(tmp) %>% - filter(date > lubridate::ymd_hms("2020-02-02 00:00:00")) %>% + filter(date > as.POSIXct("2020-02-02 00:00:00", tz = "UTC")) %>% collect() %>% nrow(), 1L From 393529aa02f5cf8bbca9044b566528c46e5fe66b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 10:32:58 +0100 Subject: [PATCH 017/129] batch1 of `nse_funcs` with `pkg::` prefix + unit tests --- r/R/dplyr-funcs-conditional.R | 8 +-- r/R/dplyr-funcs-datetime.R | 6 +- r/R/dplyr-funcs-string.R | 8 +-- r/R/expression.R | 64 +++++++++---------- r/tests/testthat/test-dplyr-filter.R | 2 +- .../testthat/test-dplyr-funcs-conditional.R | 8 +-- r/tests/testthat/test-dplyr-funcs-datetime.R | 6 +- r/tests/testthat/test-dplyr-funcs-string.R | 6 +- 8 files changed, 54 insertions(+), 54 deletions(-) diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R index 493031d2f57..74d19d85903 100644 --- a/r/R/dplyr-funcs-conditional.R +++ b/r/R/dplyr-funcs-conditional.R @@ -16,7 +16,7 @@ # under the License. register_bindings_conditional <- function() { - register_binding("coalesce", function(...) { + register_binding("dplyr::coalesce", function(...) { args <- list2(...) if (length(args) < 1) { abort("At least one argument must be supplied to coalesce()") @@ -60,14 +60,14 @@ register_bindings_conditional <- function() { build_expr("if_else", condition, true, false) } - register_binding("if_else", if_else_binding) + register_binding("dplyr::if_else", if_else_binding) # Although base R ifelse allows `yes` and `no` to be different classes - register_binding("ifelse", function(test, yes, no) { + register_binding("base::ifelse", function(test, yes, no) { if_else_binding(condition = test, true = yes, false = no) }) - register_binding("case_when", function(...) { + register_binding("dplyr::case_when", function(...) { formulas <- list2(...) n <- length(formulas) if (n == 0) { diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index c4ee5bb76aa..08d9d0e3625 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -153,9 +153,9 @@ register_bindings_datetime_components <- function() { Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x)) }) - register_binding("wday", function(x, label = FALSE, abbr = TRUE, - week_start = getOption("lubridate.week.start", 7), - locale = Sys.getlocale("LC_TIME")) { + register_binding("lubridate::wday", function(x, label = FALSE, abbr = TRUE, + week_start = getOption("lubridate.week.start", 7), + locale = Sys.getlocale("LC_TIME")) { if (label) { if (abbr) { format <- "%a" diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R index 892c5175486..c3a44b688b8 100644 --- a/r/R/dplyr-funcs-string.R +++ b/r/R/dplyr-funcs-string.R @@ -400,22 +400,22 @@ register_bindings_string_other <- function() { } }) - register_binding("str_to_lower", function(string, locale = "en") { + register_binding("stringr::str_to_lower", function(string, locale = "en") { stop_if_locale_provided(locale) Expression$create("utf8_lower", string) }) - register_binding("str_to_upper", function(string, locale = "en") { + register_binding("stringr::str_to_upper", function(string, locale = "en") { stop_if_locale_provided(locale) Expression$create("utf8_upper", string) }) - register_binding("str_to_title", function(string, locale = "en") { + register_binding("stringr::str_to_title", function(string, locale = "en") { stop_if_locale_provided(locale) Expression$create("utf8_title", string) }) - register_binding("str_trim", function(string, side = c("both", "left", "right")) { + register_binding("stringr::str_trim", function(string, side = c("both", "left", "right")) { side <- match.arg(side) trim_fun <- switch(side, left = "utf8_ltrim_whitespace", diff --git a/r/R/expression.R b/r/R/expression.R index be43de01e1c..6b9eb5e89c5 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -26,59 +26,59 @@ # functions are arranged alphabetically by name within categories # arithmetic functions - "abs" = "abs_checked", - "ceiling" = "ceil", - "floor" = "floor", - "log10" = "log10_checked", - "log1p" = "log1p_checked", - "log2" = "log2_checked", - "sign" = "sign", + "base::abs" = "abs_checked", + "base::ceiling" = "ceil", + "base::floor" = "floor", + "base::log10" = "log10_checked", + "base::log1p" = "log1p_checked", + "base::log2" = "log2_checked", + "base::sign" = "sign", # trunc is defined in dplyr-functions.R # trigonometric functions - "acos" = "acos_checked", - "asin" = "asin_checked", - "cos" = "cos_checked", - "sin" = "sin_checked", - "tan" = "tan_checked", + "base::acos" = "acos_checked", + "base::asin" = "asin_checked", + "base::cos" = "cos_checked", + "base::sin" = "sin_checked", + "base::tan" = "tan_checked", # logical functions "!" = "invert", # string functions # nchar is defined in dplyr-functions.R - "str_length" = "utf8_length", + "stringr::str_length" = "utf8_length", # str_pad is defined in dplyr-functions.R # str_sub is defined in dplyr-functions.R # str_to_lower is defined in dplyr-functions.R # str_to_title is defined in dplyr-functions.R # str_to_upper is defined in dplyr-functions.R # str_trim is defined in dplyr-functions.R - "stri_reverse" = "utf8_reverse", + "stringi::stri_reverse" = "utf8_reverse", # substr is defined in dplyr-functions.R # substring is defined in dplyr-functions.R - "tolower" = "utf8_lower", - "toupper" = "utf8_upper", + "base::tolower" = "utf8_lower", + "base::toupper" = "utf8_upper", # date and time functions - "day" = "day", - "dst" = "is_dst", - "hour" = "hour", - "isoweek" = "iso_week", - "epiweek" = "us_week", - "isoyear" = "iso_year", - "epiyear" = "us_year", - "minute" = "minute", - "quarter" = "quarter", + "lubridate::day" = "day", + "lubridate::dst" = "is_dst", + "lubridate::hour" = "hour", + "lubridate::isoweek" = "iso_week", + "lubridate::epiweek" = "us_week", + "lubridate::isoyear" = "iso_year", + "lubridate::epiyear" = "us_year", + "lubridate::minute" = "minute", + "lubridate::quarter" = "quarter", # second is defined in dplyr-functions.R # wday is defined in dplyr-functions.R - "mday" = "day", - "yday" = "day_of_year", - "year" = "year", - "leap_year" = "is_leap_year", + "lubridate::mday" = "day", + "lubridate::yday" = "day_of_year", + "lubridate::year" = "year", + "lubridate::leap_year" = "is_leap_year", # type conversion functions - "as.factor" = "dictionary_encode" + "base::as.factor" = "dictionary_encode" ) .binary_function_map <- list( @@ -104,8 +104,8 @@ "%%" = "divide_checked", "^" = "power_checked", "%in%" = "is_in_meta_binary", - "strrep" = "binary_repeat", - "str_dup" = "binary_repeat" + "base::strrep" = "binary_repeat", + "stringr::str_dup" = "binary_repeat" ) .array_function_map <- c(.unary_function_map, .binary_function_map) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 60c740a5c1a..9b33732a97b 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -245,7 +245,7 @@ test_that("filter() with string ops", { skip_if_not_available("utf8proc") compare_dplyr_binding( .input %>% - filter(dbl > 2, str_length(verses) > 25) %>% + filter(dbl > 2, stringr::str_length(verses) > 25) %>% collect(), tbl ) diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index 4f5fdb0af4e..7c6f48df815 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -29,7 +29,7 @@ test_that("if_else and ifelse", { compare_dplyr_binding( .input %>% mutate( - y = if_else(int > 5, 1, 0) + y = dplyr::if_else(int > 5, 1, 0) ) %>% collect(), tbl @@ -65,7 +65,7 @@ test_that("if_else and ifelse", { compare_dplyr_binding( .input %>% mutate( - y = ifelse(int > 5, 1, 0) + y = base::ifelse(int > 5, 1, 0) ) %>% collect(), tbl @@ -165,7 +165,7 @@ test_that("if_else and ifelse", { test_that("case_when()", { compare_dplyr_binding( .input %>% - transmute(cw = case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>% + transmute(cw = dplyr::case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>% collect(), tbl ) @@ -293,7 +293,7 @@ test_that("coalesce()", { compare_dplyr_binding( .input %>% mutate( - cw = coalesce(w), + cw = dplyr::coalesce(w), cz = coalesce(z), cwx = coalesce(w, x), cwxy = coalesce(w, x, y), diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 27b6a2573c3..6e6edf46aca 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -196,7 +196,7 @@ test_that("strftime", { compare_dplyr_binding( .input %>% - mutate(x = strftime(datetime, format = formats)) %>% + mutate(x = base::strftime(datetime, format = formats)) %>% collect(), times ) @@ -469,7 +469,7 @@ test_that("extract week from timestamp", { test_that("extract day from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = day(datetime)) %>% + mutate(x = lubridate::day(datetime)) %>% collect(), test_df ) @@ -478,7 +478,7 @@ test_that("extract day from timestamp", { test_that("extract wday from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = wday(datetime)) %>% + mutate(x = lubridate::wday(datetime)) %>% collect(), test_df ) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index c4d54d325f4..ba892e7a1fd 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -490,7 +490,7 @@ test_that("strrep and str_dup", { for (times in 0:8) { compare_dplyr_binding( .input %>% - mutate(x = strrep(x, times)) %>% + mutate(x = base::strrep(x, times)) %>% collect(), df ) @@ -509,7 +509,7 @@ test_that("str_to_lower, str_to_upper, and str_to_title", { compare_dplyr_binding( .input %>% transmute( - x_lower = str_to_lower(x), + x_lower = stringr::str_to_lower(x), x_upper = str_to_upper(x), x_title = str_to_title(x) ) %>% @@ -759,7 +759,7 @@ test_that("stri_reverse and arrow_ascii_reverse functions", { compare_dplyr_binding( .input %>% - mutate(x = stri_reverse(x)) %>% + mutate(x = stringi::stri_reverse(x)) %>% collect(), df_utf8 ) From f47108fc705743f1d9052e6f9f7cdae58e2fe38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 11:34:48 +0100 Subject: [PATCH 018/129] batch 2 - date/time functionality + undo unary namespacing (it fails for `ceiling`) --- r/R/dplyr-datetime-helpers.R | 20 +++--- r/R/dplyr-funcs-datetime.R | 52 +++++++------- r/R/expression.R | 24 +++---- r/tests/testthat/test-dplyr-funcs-datetime.R | 74 +++++++++++--------- 4 files changed, 91 insertions(+), 79 deletions(-) diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R index af2f1deef81..9199ce0dd52 100644 --- a/r/R/dplyr-datetime-helpers.R +++ b/r/R/dplyr-datetime-helpers.R @@ -27,16 +27,16 @@ check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) { } .helpers_function_map <- list( - "dminutes" = list(60, "s"), - "dhours" = list(3600, "s"), - "ddays" = list(86400, "s"), - "dweeks" = list(604800, "s"), - "dmonths" = list(2629800, "s"), - "dyears" = list(31557600, "s"), - "dseconds" = list(1, "s"), - "dmilliseconds" = list(1, "ms"), - "dmicroseconds" = list(1, "us"), - "dnanoseconds" = list(1, "ns") + "lubridate::dminutes" = list(60, "s"), + "lubridate::dhours" = list(3600, "s"), + "lubridate::ddays" = list(86400, "s"), + "lubridate::dweeks" = list(604800, "s"), + "lubridate::dmonths" = list(2629800, "s"), + "lubridate::dyears" = list(31557600, "s"), + "lubridate::dseconds" = list(1, "s"), + "lubridate::dmilliseconds" = list(1, "ms"), + "lubridate::dmicroseconds" = list(1, "us"), + "lubridate::dnanoseconds" = list(1, "ns") ) make_duration <- function(x, unit) { # TODO(ARROW-15862): remove first cast to int64 diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 08d9d0e3625..e1dc9749588 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -95,7 +95,7 @@ register_bindings_datetime_utility <- function() { Expression$create("strftime", ts, options = list(format = format, locale = check_time_locale())) }) - register_binding("format_ISO8601", function(x, usetz = FALSE, precision = NULL, ...) { + register_binding("lubridate::format_ISO8601", function(x, usetz = FALSE, precision = NULL, ...) { ISO8601_precision_map <- list( y = "%Y", @@ -126,7 +126,7 @@ register_bindings_datetime_utility <- function() { Expression$create("strftime", x, options = list(format = format, locale = "C")) }) - register_binding("is.Date", function(x) { + register_binding("lubridate::is.Date", function(x) { inherits(x, "Date") || (inherits(x, "Expression") && x$type_id() %in% Type[c("DATE32", "DATE64")]) }) @@ -135,10 +135,10 @@ register_bindings_datetime_utility <- function() { inherits(x, c("POSIXt", "POSIXct", "POSIXlt", "Date")) || (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")]) } - register_binding("is.instant", is_instant_binding) - register_binding("is.timepoint", is_instant_binding) + register_binding("lubridate::is.instant", is_instant_binding) + register_binding("lubridate::is.timepoint", is_instant_binding) - register_binding("is.POSIXct", function(x) { + register_binding("lubridate::is.POSIXct", function(x) { inherits(x, "POSIXct") || (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")]) }) @@ -149,7 +149,7 @@ register_bindings_datetime_utility <- function() { } register_bindings_datetime_components <- function() { - register_binding("second", function(x) { + register_binding("lubridate::second", function(x) { Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x)) }) @@ -168,11 +168,11 @@ register_bindings_datetime_components <- function() { Expression$create("day_of_week", x, options = list(count_from_zero = FALSE, week_start = week_start)) }) - register_binding("week", function(x) { + register_binding("lubridate::week", function(x) { (call_binding("yday", x) - 1) %/% 7 + 1 }) - register_binding("month", function(x, + register_binding("lubridate::month", function(x, label = FALSE, abbr = TRUE, locale = Sys.getlocale("LC_TIME")) { @@ -207,14 +207,14 @@ register_bindings_datetime_components <- function() { build_expr("month", x) }) - register_binding("am", function(x) { + register_binding("lubridate::am", function(x) { hour <- Expression$create("hour", x) hour < 12 }) - register_binding("pm", function(x) { + register_binding("lubridate::pm", function(x) { !call_binding("am", x) }) - register_binding("tz", function(x) { + register_binding("lubridate::tz", function(x) { if (!call_binding("is.POSIXct", x)) { abort( paste0( @@ -227,7 +227,7 @@ register_bindings_datetime_components <- function() { x$type()$timezone() }) - register_binding("semester", function(x, with_year = FALSE) { + register_binding("lubridate::semester", function(x, with_year = FALSE) { month <- call_binding("month", x) semester <- call_binding("if_else", month <= 6, 1L, 2L) if (with_year) { @@ -240,7 +240,7 @@ register_bindings_datetime_components <- function() { } register_bindings_datetime_conversion <- function() { - register_binding("make_datetime", function(year = 1970L, + register_binding("lubridate::make_datetime", function(year = 1970L, month = 1L, day = 1L, hour = 0L, @@ -258,12 +258,12 @@ register_bindings_datetime_conversion <- function() { build_expr("strptime", x, options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L)) }) - register_binding("make_date", function(year = 1970L, month = 1L, day = 1L) { + register_binding("lubridate::make_date", function(year = 1970L, month = 1L, day = 1L) { x <- call_binding("make_datetime", year, month, day) build_expr("cast", x, options = cast_options(to_type = date32())) }) - register_binding("ISOdatetime", function(year, + register_binding("base::ISOdatetime", function(year, month, day, hour, @@ -282,7 +282,7 @@ register_bindings_datetime_conversion <- function() { call_binding("make_datetime", year, month, day, hour, min, sec, tz) }) - register_binding("ISOdate", function(year, + register_binding("base::ISOdate", function(year, month, day, hour = 12, @@ -292,7 +292,7 @@ register_bindings_datetime_conversion <- function() { call_binding("make_datetime", year, month, day, hour, min, sec, tz) }) - register_binding("as.Date", function(x, + register_binding("base::as.Date", function(x, format = NULL, tryFormats = "%Y-%m-%d", origin = "1970-01-01", @@ -324,7 +324,7 @@ register_bindings_datetime_conversion <- function() { ) }) - register_binding("as_date", function(x, + register_binding("lubridate::as_date", function(x, format = NULL, origin = "1970-01-01", tz = NULL) { @@ -367,7 +367,7 @@ register_bindings_datetime_conversion <- function() { build_expr("assume_timezone", output, options = list(timezone = tz)) }) - register_binding("decimal_date", function(date) { + register_binding("lubridate::decimal_date", function(date) { y <- build_expr("year", date) start <- call_binding("make_datetime", year = y, tz = "UTC") sofar <- call_binding("difftime", date, start, units = "secs") @@ -380,7 +380,7 @@ register_bindings_datetime_conversion <- function() { y + sofar$cast(int64()) / total }) - register_binding("date_decimal", function(decimal, tz = "UTC") { + register_binding("lubridate::date_decimal", function(decimal, tz = "UTC") { y <- build_expr("floor", decimal) start <- call_binding("make_datetime", year = y, tz = tz) @@ -399,7 +399,7 @@ register_bindings_datetime_conversion <- function() { } register_bindings_duration <- function() { - register_binding("difftime", function(time1, + register_binding("base::difftime", function(time1, time2, tz, units = "secs") { @@ -440,7 +440,7 @@ register_bindings_duration <- function() { subtract_output <- build_expr("-", time1, time2) build_expr("cast", subtract_output, options = cast_options(to_type = duration("s"))) }) - register_binding("as.difftime", function(x, + register_binding("base::as.difftime", function(x, format = "%X", units = "secs") { # windows doesn't seem to like "%X" @@ -520,13 +520,13 @@ register_bindings_duration_helpers <- function() { ) } - register_binding("dpicoseconds", function(x = 1) { + register_binding("lubridate::dpicoseconds", function(x = 1) { abort("Duration in picoseconds not supported in Arrow.") }) } register_bindings_datetime_parsers <- function() { - register_binding("parse_date_time", function(x, + register_binding("lubridate::parse_date_time", function(x, orders, tz = "UTC", truncated = 0, @@ -584,7 +584,9 @@ register_bindings_datetime_parsers <- function() { } for (ymd_order in ymd_parser_vec) { - register_binding(ymd_order, ymd_parser_map_factory(ymd_order)) + register_binding( + paste0("lubridate::", ymd_order), + ymd_parser_map_factory(ymd_order)) } register_binding("fast_strptime", function(x, diff --git a/r/R/expression.R b/r/R/expression.R index 6b9eb5e89c5..835fdd60a26 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -26,21 +26,21 @@ # functions are arranged alphabetically by name within categories # arithmetic functions - "base::abs" = "abs_checked", - "base::ceiling" = "ceil", - "base::floor" = "floor", - "base::log10" = "log10_checked", - "base::log1p" = "log1p_checked", - "base::log2" = "log2_checked", - "base::sign" = "sign", + "abs" = "abs_checked", + "ceiling" = "ceil", + "floor" = "floor", + "log10" = "log10_checked", + "log1p" = "log1p_checked", + "log2" = "log2_checked", + "sign" = "sign", # trunc is defined in dplyr-functions.R # trigonometric functions - "base::acos" = "acos_checked", - "base::asin" = "asin_checked", - "base::cos" = "cos_checked", - "base::sin" = "sin_checked", - "base::tan" = "tan_checked", + "acos" = "acos_checked", + "asin" = "asin_checked", + "cos" = "cos_checked", + "sin" = "sin_checked", + "tan" = "tan_checked", # logical functions "!" = "invert", diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 6e6edf46aca..6594e4696a5 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -280,7 +280,7 @@ test_that("format_ISO8601", { compare_dplyr_binding( .input %>% - mutate(x = format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% + mutate(x = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% collect(), times ) @@ -340,14 +340,20 @@ test_that("is.* functions from lubridate", { # make sure all true and at least one false value is considered compare_dplyr_binding( .input %>% - mutate(x = is.POSIXct(datetime), y = is.POSIXct(integer)) %>% + mutate( + x = lubridate::is.POSIXct(datetime), + y = is.POSIXct(integer) + ) %>% collect(), test_df ) compare_dplyr_binding( .input %>% - mutate(x = is.Date(date), y = is.Date(integer)) %>% + mutate( + x = lubridate::is.Date(date), + y = is.Date(integer) + ) %>% collect(), test_df ) @@ -355,7 +361,7 @@ test_that("is.* functions from lubridate", { compare_dplyr_binding( .input %>% mutate( - x = is.instant(datetime), + x = lubridate::is.instant(datetime), y = is.instant(date), z = is.instant(integer) ) %>% @@ -366,7 +372,7 @@ test_that("is.* functions from lubridate", { compare_dplyr_binding( .input %>% mutate( - x = is.timepoint(datetime), + x = lubridate::is.timepoint(datetime), y = is.instant(date), z = is.timepoint(integer) ) %>% @@ -424,7 +430,7 @@ test_that("extract month from timestamp", { compare_dplyr_binding( .input %>% # R returns ordered factor whereas Arrow returns character - mutate(x = as.character(month(datetime, label = TRUE))) %>% + mutate(x = as.character(lubridate::month(datetime, label = TRUE))) %>% collect(), test_df, ignore_attr = TRUE @@ -818,7 +824,7 @@ test_that("semester works with temporal types and integers", { compare_dplyr_binding( .input %>% mutate( - sem_wo_year = semester(dates), + sem_wo_year = lubridate::semester(dates), sem_w_year = semester(dates, with_year = TRUE) ) %>% collect(), @@ -1026,7 +1032,7 @@ test_that("make_date & make_datetime", { compare_dplyr_binding( .input %>% - mutate(composed_date = make_date(year, month, day)) %>% + mutate(composed_date = lubridate::make_date(year, month, day)) %>% collect(), test_df ) @@ -1040,7 +1046,10 @@ test_that("make_date & make_datetime", { compare_dplyr_binding( .input %>% - mutate(composed_datetime = make_datetime(year, month, day, hour, min, sec)) %>% + mutate( + composed_datetime = + lubridate::make_datetime(year, month, day, hour, min, sec) + ) %>% collect(), test_df, # the make_datetime binding uses strptime which does not support tz, hence @@ -1074,7 +1083,7 @@ test_that("ISO_datetime & ISOdate", { compare_dplyr_binding( .input %>% - mutate(composed_date = ISOdate(year, month, day)) %>% + mutate(composed_date = base::ISOdate(year, month, day)) %>% collect(), test_df, # the make_datetime binding uses strptime which does not support tz, hence @@ -1096,7 +1105,8 @@ test_that("ISO_datetime & ISOdate", { compare_dplyr_binding( .input %>% mutate( - composed_datetime = ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") + composed_datetime = + base::ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") ) %>% collect(), test_df, @@ -1118,7 +1128,7 @@ test_that("ISO_datetime & ISOdate", { ) }) -test_that("difftime works correctly", { +test_that("difftime works", { test_df <- tibble( time1 = as.POSIXct( c("2021-02-20", "2021-07-31 0:0:0", "2021-10-30", "2021-01-31 0:0:0") @@ -1132,7 +1142,7 @@ test_that("difftime works correctly", { compare_dplyr_binding( .input %>% mutate( - secs2 = difftime(time1, time2, units = "secs") + secs2 = base::difftime(time1, time2, units = "secs") ) %>% collect(), test_df, @@ -1204,7 +1214,7 @@ test_that("as.difftime()", { compare_dplyr_binding( .input %>% - mutate(hms_difftime = as.difftime(hms_string, units = "secs")) %>% + mutate(hms_difftime = base::as.difftime(hms_string, units = "secs")) %>% collect(), test_df ) @@ -1274,11 +1284,11 @@ test_that("`decimal_date()` and `date_decimal()`", { compare_dplyr_binding( .input %>% mutate( - decimal_date_from_POSIXct = decimal_date(b), + decimal_date_from_POSIXct = lubridate::decimal_date(b), decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")), decimal_date_from_r_date_obj = decimal_date(as.Date("2022-03-25")), decimal_date_from_date = decimal_date(c), - date_from_decimal = date_decimal(a), + date_from_decimal = lubridate::date_decimal(a), date_from_decimal_r_obj = date_decimal(2022.178) ) %>% collect(), @@ -1299,12 +1309,12 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { compare_dplyr_binding( .input %>% mutate( - dminutes = dminutes(x), - dhours = dhours(x), - ddays = ddays(x), - dweeks = dweeks(x), - dmonths = dmonths(x), - dyears = dyears(x) + dminutes = lubridate::dminutes(x), + dhours = lubridate::dhours(x), + ddays = lubridate::ddays(x), + dweeks = lubridate::dweeks(x), + dmonths = lubridate::dmonths(x), + dyears = lubridate::dyears(x) ) %>% collect(), example_d, @@ -1317,7 +1327,7 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { dhours = dhours(x), ddays = ddays(x), new_date_1 = date_to_add + ddays, - new_date_2 = date_to_add + ddays - dhours(3), + new_date_2 = date_to_add + ddays - lubridate::dhours(3), new_duration = dhours - ddays ) %>% collect(), @@ -1362,8 +1372,8 @@ test_that("dseconds, dmilliseconds, dmicroseconds, dnanoseconds, dpicoseconds", compare_dplyr_binding( .input %>% mutate( - dseconds = dseconds(x), - dmilliseconds = dmilliseconds(x), + dseconds = lubridate::dseconds(x), + dmilliseconds = lubridate::dmilliseconds(x), dmicroseconds = dmicroseconds(x), dnanoseconds = dnanoseconds(x), ) %>% @@ -1525,7 +1535,7 @@ test_that("`as.Date()` and `as_date()`", { compare_dplyr_binding( .input %>% mutate( - date_dv1 = as.Date(date_var), + date_dv1 = base::as.Date(date_var), date_pv1 = as.Date(posixct_var), date_pv_tz1 = as.Date(posixct_var, tz = "Pacific/Marquesas"), date_utc1 = as.Date(dt_utc), @@ -1535,7 +1545,7 @@ test_that("`as.Date()` and `as_date()`", { date_int1 = as.Date(integer_var, origin = "1970-01-01"), date_int_origin1 = as.Date(integer_var, origin = "1970-01-03"), date_integerish1 = as.Date(integerish_var, origin = "1970-01-01"), - date_dv2 = as_date(date_var), + date_dv2 = lubridate::as_date(date_var), date_pv2 = as_date(posixct_var), date_pv_tz2 = as_date(posixct_var, tz = "Pacific/Marquesas"), date_utc2 = as_date(dt_utc), @@ -1717,7 +1727,7 @@ test_that("parse_date_time() works with year, month, and date components", { compare_dplyr_binding( .input %>% mutate( - parsed_date_ymd = parse_date_time(string_ymd, orders = "ymd"), + parsed_date_ymd = lubridate::parse_date_time(string_ymd, orders = "ymd"), parsed_date_dmy = parse_date_time(string_dmy, orders = "dmy"), parsed_date_mdy = parse_date_time(string_mdy, orders = "mdy") ) %>% @@ -1805,8 +1815,8 @@ test_that("year, month, day date/time parsers", { compare_dplyr_binding( .input %>% mutate( - ymd_date = ymd(ymd_string), - ydm_date = ydm(ydm_string), + ymd_date = lubridate::ymd(ymd_string), + ydm_date = lubridate::ydm(ydm_string), mdy_date = mdy(mdy_string), myd_date = myd(myd_string), dmy_date = dmy(dmy_string), @@ -1819,8 +1829,8 @@ test_that("year, month, day date/time parsers", { compare_dplyr_binding( .input %>% mutate( - ymd_date = ymd(ymd_string, tz = "Pacific/Marquesas"), - ydm_date = ydm(ydm_string, tz = "Pacific/Marquesas"), + ymd_date = lubridate::ymd(ymd_string, tz = "Pacific/Marquesas"), + ydm_date = lubridate::ydm(ydm_string, tz = "Pacific/Marquesas"), mdy_date = mdy(mdy_string, tz = "Pacific/Marquesas"), myd_date = myd(myd_string, tz = "Pacific/Marquesas"), dmy_date = dmy(dmy_string, tz = "Pacific/Marquesas"), From 6b8402a001037992e4a5dfd6878d5e7044cd61f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 13:26:07 +0100 Subject: [PATCH 019/129] updated registrations and unit tests for math functions --- r/R/dplyr-funcs-math.R | 12 +++++------ r/tests/testthat/test-dplyr-funcs-math.R | 26 ++++++++++++++++++------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/r/R/dplyr-funcs-math.R b/r/R/dplyr-funcs-math.R index 0ba2ddc856e..a6b47a9738d 100644 --- a/r/R/dplyr-funcs-math.R +++ b/r/R/dplyr-funcs-math.R @@ -49,10 +49,10 @@ register_bindings_math <- function() { Expression$create("logb_checked", x, Expression$scalar(base)) } - register_binding("log", log_binding) - register_binding("logb", log_binding) + register_binding("base::log", log_binding) + register_binding("base::logb", log_binding) - register_binding("pmin", function(..., na.rm = FALSE) { + register_binding("base::pmin", function(..., na.rm = FALSE) { build_expr( "min_element_wise", ..., @@ -60,7 +60,7 @@ register_bindings_math <- function() { ) }) - register_binding("pmax", function(..., na.rm = FALSE) { + register_binding("base::pmax", function(..., na.rm = FALSE) { build_expr( "max_element_wise", ..., @@ -68,12 +68,12 @@ register_bindings_math <- function() { ) }) - register_binding("trunc", function(x, ...) { + register_binding("base::trunc", function(x, ...) { # accepts and ignores ... for consistency with base::trunc() build_expr("trunc", x) }) - register_binding("round", function(x, digits = 0) { + register_binding("base::round", function(x, digits = 0) { build_expr( "round", x, diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 47a9f0b7c02..0a0c18ae0d8 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -51,7 +51,9 @@ test_that("ceiling(), floor(), trunc(), round()", { c = ceiling(x), f = floor(x), t = trunc(x), - r = round(x) + t2 = base::trunc(x), + r = round(x), + r2 = base::round(x) ) %>% collect(), df @@ -141,7 +143,7 @@ test_that("log functions", { compare_dplyr_binding( .input %>% - mutate(y = log(x)) %>% + mutate(y = base::log(x)) %>% collect(), df ) @@ -223,28 +225,40 @@ test_that("log functions", { compare_dplyr_binding( .input %>% - mutate(y = logb(x)) %>% + mutate( + y = logb(x), + z = base::logb(x) + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(y = log1p(x)) %>% + mutate( + y = log1p(x), + z = base::log1p(x) + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(y = log2(x)) %>% + mutate( + y = log2(x), + z = base::log2(x) + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(y = log10(x)) %>% + mutate( + y = log10(x), + z = base::log10(x) + ) %>% collect(), df ) From 6fe340f78162816247206c1d38c4caf246a20955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 13:55:00 +0100 Subject: [PATCH 020/129] reverse changes in expressions and math tests --- r/R/expression.R | 8 ++++---- r/tests/testthat/test-dplyr-funcs-math.R | 12 ++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/r/R/expression.R b/r/R/expression.R index 835fdd60a26..51c0f37a126 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -54,11 +54,11 @@ # str_to_title is defined in dplyr-functions.R # str_to_upper is defined in dplyr-functions.R # str_trim is defined in dplyr-functions.R - "stringi::stri_reverse" = "utf8_reverse", + "stri_reverse" = "utf8_reverse", # substr is defined in dplyr-functions.R # substring is defined in dplyr-functions.R - "base::tolower" = "utf8_lower", - "base::toupper" = "utf8_upper", + "tolower" = "utf8_lower", + "toupper" = "utf8_upper", # date and time functions "lubridate::day" = "day", @@ -78,7 +78,7 @@ "lubridate::leap_year" = "is_leap_year", # type conversion functions - "base::as.factor" = "dictionary_encode" + "as.factor" = "dictionary_encode" ) .binary_function_map <- list( diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 0a0c18ae0d8..ac9fea65568 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -226,8 +226,7 @@ test_that("log functions", { compare_dplyr_binding( .input %>% mutate( - y = logb(x), - z = base::logb(x) + y = logb(x) ) %>% collect(), df @@ -236,8 +235,7 @@ test_that("log functions", { compare_dplyr_binding( .input %>% mutate( - y = log1p(x), - z = base::log1p(x) + y = log1p(x) ) %>% collect(), df @@ -246,8 +244,7 @@ test_that("log functions", { compare_dplyr_binding( .input %>% mutate( - y = log2(x), - z = base::log2(x) + y = log2(x) ) %>% collect(), df @@ -256,8 +253,7 @@ test_that("log functions", { compare_dplyr_binding( .input %>% mutate( - y = log10(x), - z = base::log10(x) + y = log10(x) ) %>% collect(), df From e5ff46ffa0b56e2988fd7d239ddf98527936f709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 13:55:30 +0100 Subject: [PATCH 021/129] test no longer needed since we suppor namespacing --- r/tests/testthat/test-dplyr-mutate.R | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index beb893afec7..f46907855b9 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -140,17 +140,6 @@ test_that("transmute() with unsupported arguments", { ) }) -test_that("transmute() defuses dots arguments (ARROW-13262)", { - expect_warning( - tbl %>% - Table$create() %>% - transmute(stringr::str_c(chr, chr)) %>% - collect(), - "Expression stringr::str_c(chr, chr) not supported in Arrow; pulling data into R", - fixed = TRUE - ) -}) - test_that("mutate and refer to previous mutants", { compare_dplyr_binding( .input %>% @@ -525,9 +514,9 @@ test_that("mutate and pmin/pmax", { compare_dplyr_binding( .input %>% mutate( - max_val_1 = pmax(val1, val2, val3), + max_val_1 = base::pmax(val1, val2, val3), max_val_2 = pmax(val1, val2, val3, na.rm = TRUE), - min_val_1 = pmin(val1, val2, val3), + min_val_1 = base::pmin(val1, val2, val3), min_val_2 = pmin(val1, val2, val3, na.rm = TRUE) ) %>% collect(), From 06ad99fd164bac2ccc2d48903a190b80bdc45ee2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 14:18:06 +0100 Subject: [PATCH 022/129] update names and tests for string bindings --- r/R/dplyr-funcs-string.R | 55 +++++++++++++--------- r/tests/testthat/test-dplyr-funcs-string.R | 55 ++++++++++++---------- r/tests/testthat/test-dplyr-mutate.R | 2 +- 3 files changed, 63 insertions(+), 49 deletions(-) diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R index c3a44b688b8..b300d7c439e 100644 --- a/r/R/dplyr-funcs-string.R +++ b/r/R/dplyr-funcs-string.R @@ -161,7 +161,7 @@ register_bindings_string_join <- function() { } } - register_binding("paste", function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { + register_binding("base::paste", function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { assert_that( is.null(collapse), msg = "paste() with the collapse argument is not yet supported in Arrow" @@ -172,7 +172,7 @@ register_bindings_string_join <- function() { arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep) }) - register_binding("paste0", function(..., collapse = NULL, recycle0 = FALSE) { + register_binding("base::paste0", function(..., collapse = NULL, recycle0 = FALSE) { assert_that( is.null(collapse), msg = "paste0() with the collapse argument is not yet supported in Arrow" @@ -180,7 +180,7 @@ register_bindings_string_join <- function() { arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "") }) - register_binding("str_c", function(..., sep = "", collapse = NULL) { + register_binding("stringr::str_c", function(..., sep = "", collapse = NULL) { assert_that( is.null(collapse), msg = "str_c() with the collapse argument is not yet supported in Arrow" @@ -198,7 +198,10 @@ register_bindings_string_regex <- function() { ) } - register_binding("grepl", function(pattern, x, ignore.case = FALSE, fixed = FALSE) { + register_binding("base::grepl", function(pattern, + x, + ignore.case = FALSE, + fixed = FALSE) { arrow_fun <- ifelse(fixed, "match_substring", "match_substring_regex") out <- create_string_match_expr( arrow_fun, @@ -210,7 +213,7 @@ register_bindings_string_regex <- function() { }) - register_binding("str_detect", function(string, pattern, negate = FALSE) { + register_binding("stringr::str_detect", function(string, pattern, negate = FALSE) { opts <- get_stringr_pattern_options(enexpr(pattern)) arrow_fun <- ifelse(opts$fixed, "match_substring", "match_substring_regex") out <- create_string_match_expr(arrow_fun, @@ -224,7 +227,9 @@ register_bindings_string_regex <- function() { out }) - register_binding("str_like", function(string, pattern, ignore_case = TRUE) { + register_binding("stringr::str_like", function(string, + pattern, + ignore_case = TRUE) { Expression$create( "match_like", string, @@ -232,7 +237,7 @@ register_bindings_string_regex <- function() { ) }) - register_binding("str_count", function(string, pattern) { + register_binding("stringr::str_count", function(string, pattern) { opts <- get_stringr_pattern_options(enexpr(pattern)) if (!is.string(pattern)) { arrow_not_supported("`pattern` must be a length 1 character vector; other values") @@ -245,7 +250,7 @@ register_bindings_string_regex <- function() { ) }) - register_binding("startsWith", function(x, prefix) { + register_binding("base::startsWith", function(x, prefix) { Expression$create( "starts_with", x, @@ -253,7 +258,7 @@ register_bindings_string_regex <- function() { ) }) - register_binding("endsWith", function(x, suffix) { + register_binding("base::endsWith", function(x, suffix) { Expression$create( "ends_with", x, @@ -261,7 +266,7 @@ register_bindings_string_regex <- function() { ) }) - register_binding("str_starts", function(string, pattern, negate = FALSE) { + register_binding("stringr::str_starts", function(string, pattern, negate = FALSE) { opts <- get_stringr_pattern_options(enexpr(pattern)) if (opts$fixed) { out <- call_binding("startsWith", x = string, prefix = opts$pattern) @@ -279,7 +284,7 @@ register_bindings_string_regex <- function() { out }) - register_binding("str_ends", function(string, pattern, negate = FALSE) { + register_binding("stringr::str_ends", function(string, pattern, negate = FALSE) { opts <- get_stringr_pattern_options(enexpr(pattern)) if (opts$fixed) { out <- call_binding("endsWith", x = string, suffix = opts$pattern) @@ -326,12 +331,12 @@ register_bindings_string_regex <- function() { } } - register_binding("sub", arrow_r_string_replace_function(1L)) - register_binding("gsub", arrow_r_string_replace_function(-1L)) - register_binding("str_replace", arrow_stringr_string_replace_function(1L)) - register_binding("str_replace_all", arrow_stringr_string_replace_function(-1L)) + register_binding("base::sub", arrow_r_string_replace_function(1L)) + register_binding("base::gsub", arrow_r_string_replace_function(-1L)) + register_binding("stringr::str_replace", arrow_stringr_string_replace_function(1L)) + register_binding("stringr::str_replace_all", arrow_stringr_string_replace_function(-1L)) - register_binding("strsplit", function(x, split, fixed = FALSE, perl = FALSE, + register_binding("base::strsplit", function(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE) { assert_that(is.string(split)) @@ -350,7 +355,10 @@ register_bindings_string_regex <- function() { ) }) - register_binding("str_split", function(string, pattern, n = Inf, simplify = FALSE) { + register_binding("stringr::str_split", function(string, + pattern, + n = Inf, + simplify = FALSE) { opts <- get_stringr_pattern_options(enexpr(pattern)) arrow_fun <- ifelse(opts$fixed, "split_pattern", "split_pattern_regex") if (opts$ignore_case) { @@ -382,7 +390,7 @@ register_bindings_string_regex <- function() { } register_bindings_string_other <- function() { - register_binding("nchar", function(x, type = "chars", allowNA = FALSE, keepNA = NA) { + register_binding("base::nchar", function(x, type = "chars", allowNA = FALSE, keepNA = NA) { if (allowNA) { arrow_not_supported("allowNA = TRUE") } @@ -425,7 +433,7 @@ register_bindings_string_other <- function() { Expression$create(trim_fun, string) }) - register_binding("substr", function(x, start, stop) { + register_binding("base::substr", function(x, start, stop) { assert_that( length(start) == 1, msg = "`start` must be length 1 - other lengths are not supported in Arrow" @@ -457,11 +465,11 @@ register_bindings_string_other <- function() { ) }) - register_binding("substring", function(text, first, last) { + register_binding("base::substring", function(text, first, last) { call_binding("substr", x = text, start = first, stop = last) }) - register_binding("str_sub", function(string, start = 1L, end = -1L) { + register_binding("stringr::str_sub", function(string, start = 1L, end = -1L) { assert_that( length(start) == 1, msg = "`start` must be length 1 - other lengths are not supported in Arrow" @@ -498,7 +506,10 @@ register_bindings_string_other <- function() { }) - register_binding("str_pad", function(string, width, side = c("left", "right", "both"), pad = " ") { + register_binding("stringr::str_pad", function(string, + width, + side = c("left", "right", "both"), + pad = " ") { assert_that(is_integerish(width)) side <- match.arg(side) assert_that(is.string(pad)) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index ba892e7a1fd..a3e90d92f73 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -55,7 +55,7 @@ test_that("paste, paste0, and str_c", { ) compare_dplyr_binding( .input %>% - transmute(str_c(v, w)) %>% + transmute(stringr::str_c(v, w)) %>% collect(), df ) @@ -89,7 +89,7 @@ test_that("paste, paste0, and str_c", { # non-character column in dots compare_dplyr_binding( .input %>% - transmute(paste0(x, y, z)) %>% + transmute(base::paste0(x, y, z)) %>% collect(), df ) @@ -105,7 +105,7 @@ test_that("paste, paste0, and str_c", { # literal NA in dots compare_dplyr_binding( .input %>% - transmute(paste(x, NA, y)) %>% + transmute(base::paste(x, NA, y)) %>% collect(), df ) @@ -183,13 +183,13 @@ test_that("grepl with ignore.case = FALSE and fixed = TRUE", { df <- tibble(x = c("Foo", "bar", NA_character_)) compare_dplyr_binding( .input %>% - filter(grepl("o", x, fixed = TRUE)) %>% + filter(base::grepl("o", x, fixed = TRUE)) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(x = grepl("o", x, fixed = TRUE)) %>% + mutate(x = base::grepl("o", x, fixed = TRUE)) %>% collect(), df ) @@ -277,13 +277,13 @@ test_that("str_detect", { compare_dplyr_binding( .input %>% - filter(str_detect(x, regex("^F"))) %>% + filter(stringr::str_detect(x, regex("^F"))) %>% collect(), df ) compare_dplyr_binding( .input %>% - transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% + transmute(x = stringr::str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% collect(), df ) @@ -325,7 +325,7 @@ test_that("sub and gsub", { for (fixed in c(TRUE, FALSE)) { compare_dplyr_binding( .input %>% - transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>% + transmute(x = base::sub("Foo", "baz", x, fixed = fixed)) %>% collect(), df ) @@ -359,7 +359,7 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", { expect_equal( df %>% Table$create() %>% - transmute(x = gsub("o", "u", x, ignore.case = TRUE, fixed = TRUE)) %>% + transmute(x = base::gsub("o", "u", x, ignore.case = TRUE, fixed = TRUE)) %>% collect(), tibble(x = c("Fuu", "bar")) ) @@ -377,14 +377,14 @@ test_that("str_replace and str_replace_all", { compare_dplyr_binding( .input %>% - transmute(x = str_replace_all(x, "^F", "baz")) %>% + transmute(x = stringr::str_replace_all(x, "^F", "baz")) %>% collect(), df ) compare_dplyr_binding( .input %>% - transmute(x = str_replace_all(x, regex("^F"), "baz")) %>% + transmute(x = stringr::str_replace_all(x, regex("^F"), "baz")) %>% collect(), df ) @@ -427,7 +427,7 @@ test_that("strsplit and str_split", { compare_dplyr_binding( .input %>% - mutate(x = strsplit(x, "and")) %>% + mutate(x = base::strsplit(x, "and")) %>% collect(), df, # `ignore_attr = TRUE` because the vctr coming back from arrow (ListArray) @@ -450,7 +450,7 @@ test_that("strsplit and str_split", { ) compare_dplyr_binding( .input %>% - mutate(x = str_split(x, "and")) %>% + mutate(x = stringr::str_split(x, "and")) %>% collect(), df, ignore_attr = TRUE @@ -759,7 +759,7 @@ test_that("stri_reverse and arrow_ascii_reverse functions", { compare_dplyr_binding( .input %>% - mutate(x = stringi::stri_reverse(x)) %>% + mutate(x = stri_reverse(x)) %>% collect(), df_utf8 ) @@ -798,7 +798,7 @@ test_that("str_like", { expect_equal( df %>% Table$create() %>% - mutate(x = str_like(x, "baz")) %>% + mutate(x = stringr::str_like(x, "baz")) %>% collect(), tibble(x = c(FALSE, FALSE)) ) @@ -854,7 +854,7 @@ test_that("str_pad", { compare_dplyr_binding( .input %>% - mutate(x = str_pad(x, width = 31)) %>% + mutate(x = stringr::str_pad(x, width = 31)) %>% collect(), df ) @@ -893,7 +893,7 @@ test_that("substr", { compare_dplyr_binding( .input %>% - mutate(y = substr(x, 1, 6)) %>% + mutate(y = base::substr(x, 1, 6)) %>% collect(), df ) @@ -972,7 +972,10 @@ test_that("substring", { compare_dplyr_binding( .input %>% - mutate(y = substring(x, 1, 6)) %>% + mutate( + y = substring(x, 1, 6), + z = base::substring(x, 1, 6) + ) %>% collect(), df ) @@ -983,7 +986,7 @@ test_that("str_sub", { compare_dplyr_binding( .input %>% - mutate(y = str_sub(x, 1, 6)) %>% + mutate(y = stringr::str_sub(x, 1, 6)) %>% collect(), df ) @@ -1067,7 +1070,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { compare_dplyr_binding( .input %>% - filter(str_starts(x, "b.*")) %>% + filter(stringr::str_starts(x, "b.*")) %>% collect(), df ) @@ -1096,7 +1099,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { compare_dplyr_binding( .input %>% transmute( - a = str_starts(x, "b.*"), + a = stringr::str_starts(x, "b.*"), b = str_starts(x, "b.*", negate = TRUE), c = str_starts(x, fixed("b")), d = str_starts(x, fixed("b"), negate = TRUE) @@ -1107,7 +1110,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { compare_dplyr_binding( .input %>% - filter(str_ends(x, "r")) %>% + filter(stringr::str_ends(x, "r")) %>% collect(), df ) @@ -1136,7 +1139,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { compare_dplyr_binding( .input %>% transmute( - a = str_ends(x, "r"), + a = stringr::str_ends(x, "r"), b = str_ends(x, "r", negate = TRUE), c = str_ends(x, fixed("r")), d = str_ends(x, fixed("r"), negate = TRUE) @@ -1146,14 +1149,14 @@ test_that("str_starts, str_ends, startsWith, endsWith", { ) compare_dplyr_binding( .input %>% - filter(startsWith(x, "b")) %>% + filter(base::startsWith(x, "b")) %>% collect(), df ) compare_dplyr_binding( .input %>% - filter(endsWith(x, "r")) %>% + filter(base::endsWith(x, "r")) %>% collect(), df ) @@ -1191,7 +1194,7 @@ test_that("str_count", { compare_dplyr_binding( .input %>% - mutate(a_count = str_count(cities, pattern = "a")) %>% + mutate(a_count = stringr::str_count(cities, pattern = "a")) %>% collect(), df ) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index f46907855b9..cb30f831e33 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -159,7 +159,7 @@ test_that("nchar() arguments", { .input %>% select(int, verses) %>% mutate( - line_lengths = nchar(verses, type = "bytes"), + line_lengths = base::nchar(verses, type = "bytes"), longer = line_lengths * 10 ) %>% filter(line_lengths > 15) %>% From d1917c3496f0bbbae23ad33d2210a0088981cdec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 14:18:28 +0100 Subject: [PATCH 023/129] small datetime styling update --- r/R/dplyr-funcs-datetime.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index e1dc9749588..24ec8ddbfd9 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -153,7 +153,9 @@ register_bindings_datetime_components <- function() { Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x)) }) - register_binding("lubridate::wday", function(x, label = FALSE, abbr = TRUE, + register_binding("lubridate::wday", function(x, + label = FALSE, + abbr = TRUE, week_start = getOption("lubridate.week.start", 7), locale = Sys.getlocale("LC_TIME")) { if (label) { From d89cb0475387997c7364f41bfee14ac23d9ec67b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 15:09:39 +0100 Subject: [PATCH 024/129] renaming type bindings + unit test update --- r/R/dplyr-funcs-type.R | 63 +++++++++++++----------- r/tests/testthat/test-dplyr-filter.R | 2 +- r/tests/testthat/test-dplyr-funcs-type.R | 58 ++++++++++++---------- 3 files changed, 66 insertions(+), 57 deletions(-) diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R index 6c409c6c7e8..657dbcbb683 100644 --- a/r/R/dplyr-funcs-type.R +++ b/r/R/dplyr-funcs-type.R @@ -43,13 +43,13 @@ register_bindings_type_cast <- function() { # as.* type casting functions # as.factor() is mapped in expression.R - register_binding("as.character", function(x) { + register_binding("base::as.character", function(x) { build_expr("cast", x, options = cast_options(to_type = string())) }) - register_binding("as.double", function(x) { + register_binding("base::as.double", function(x) { build_expr("cast", x, options = cast_options(to_type = float64())) }) - register_binding("as.integer", function(x) { + register_binding("base::as.integer", function(x) { build_expr( "cast", x, @@ -60,7 +60,7 @@ register_bindings_type_cast <- function() { ) ) }) - register_binding("as.integer64", function(x) { + register_binding("bit64::as.integer64", function(x) { build_expr( "cast", x, @@ -71,14 +71,14 @@ register_bindings_type_cast <- function() { ) ) }) - register_binding("as.logical", function(x) { + register_binding("base::as.logical", function(x) { build_expr("cast", x, options = cast_options(to_type = boolean())) }) - register_binding("as.numeric", function(x) { + register_binding("base::as.numeric", function(x) { build_expr("cast", x, options = cast_options(to_type = float64())) }) - register_binding("is", function(object, class2) { + register_binding("methods::is", function(object, class2) { if (is.string(class2)) { switch(class2, # for R data types, pass off to is.*() functions @@ -103,7 +103,9 @@ register_bindings_type_cast <- function() { }) # Create a data frame/tibble/struct column - register_binding("tibble", function(..., .rows = NULL, .name_repair = NULL) { + register_binding("tibble::tibble", function(..., + .rows = NULL, + .name_repair = NULL) { if (!is.null(.rows)) arrow_not_supported(".rows") if (!is.null(.name_repair)) arrow_not_supported(".name_repair") @@ -122,8 +124,11 @@ register_bindings_type_cast <- function() { ) }) - register_binding("data.frame", function(..., row.names = NULL, - check.rows = NULL, check.names = TRUE, fix.empty.names = TRUE, + register_binding("base::data.frame", function(..., + row.names = NULL, + check.rows = NULL, + check.names = TRUE, + fix.empty.names = TRUE, stringsAsFactors = FALSE) { # we need a specific value of stringsAsFactors because the default was # TRUE in R <= 3.6 @@ -159,70 +164,70 @@ register_bindings_type_cast <- function() { register_bindings_type_inspect <- function() { # is.* type functions - register_binding("is.character", function(x) { + register_binding("base::is.character", function(x) { is.character(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c("STRING", "LARGE_STRING")]) }) - register_binding("is.numeric", function(x) { + register_binding("base::is.numeric", function(x) { is.numeric(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c( "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE", "DECIMAL128", "DECIMAL256" )]) }) - register_binding("is.double", function(x) { + register_binding("base::is.double", function(x) { is.double(x) || (inherits(x, "Expression") && x$type_id() == Type["DOUBLE"]) }) - register_binding("is.integer", function(x) { + register_binding("base::is.integer", function(x) { is.integer(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c( "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", "UINT64", "INT64" )]) }) - register_binding("is.integer64", function(x) { + register_binding("bit64::is.integer64", function(x) { inherits(x, "integer64") || (inherits(x, "Expression") && x$type_id() == Type["INT64"]) }) - register_binding("is.logical", function(x) { + register_binding("base::is.logical", function(x) { is.logical(x) || (inherits(x, "Expression") && x$type_id() == Type["BOOL"]) }) - register_binding("is.factor", function(x) { + register_binding("base::is.factor", function(x) { is.factor(x) || (inherits(x, "Expression") && x$type_id() == Type["DICTIONARY"]) }) - register_binding("is.list", function(x) { + register_binding("base::is.list", function(x) { is.list(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c( "LIST", "FIXED_SIZE_LIST", "LARGE_LIST" )]) }) # rlang::is_* type functions - register_binding("is_character", function(x, n = NULL) { + register_binding("rlang::is_character", function(x, n = NULL) { assert_that(is.null(n)) call_binding("is.character", x) }) - register_binding("is_double", function(x, n = NULL, finite = NULL) { + register_binding("rlang::is_double", function(x, n = NULL, finite = NULL) { assert_that(is.null(n) && is.null(finite)) call_binding("is.double", x) }) - register_binding("is_integer", function(x, n = NULL) { + register_binding("rlang::is_integer", function(x, n = NULL) { assert_that(is.null(n)) call_binding("is.integer", x) }) - register_binding("is_list", function(x, n = NULL) { + register_binding("rlang::is_list", function(x, n = NULL) { assert_that(is.null(n)) call_binding("is.list", x) }) - register_binding("is_logical", function(x, n = NULL) { + register_binding("rlang::is_logical", function(x, n = NULL) { assert_that(is.null(n)) call_binding("is.logical", x) }) } register_bindings_type_elementwise <- function() { - register_binding("is.na", function(x) { + register_binding("base::is.na", function(x) { build_expr("is_null", x, options = list(nan_is_null = TRUE)) }) - register_binding("is.nan", function(x) { + register_binding("base::is.nan", function(x) { if (is.double(x) || (inherits(x, "Expression") && x$type_id() %in% TYPES_WITH_NAN)) { # TODO: if an option is added to the is_nan kernel to treat NA as NaN, @@ -233,17 +238,17 @@ register_bindings_type_elementwise <- function() { } }) - register_binding("between", function(x, left, right) { + register_binding("dplyr::between", function(x, left, right) { x >= left & x <= right }) - register_binding("is.finite", function(x) { + register_binding("base::is.finite", function(x) { is_fin <- Expression$create("is_finite", x) # for compatibility with base::is.finite(), return FALSE for NA_real_ is_fin & !call_binding("is.na", is_fin) }) - register_binding("is.infinite", function(x) { + register_binding("base::is.infinite", function(x) { is_inf <- Expression$create("is_inf", x) # for compatibility with base::is.infinite(), return FALSE for NA_real_ is_inf & !call_binding("is.na", is_inf) @@ -251,7 +256,7 @@ register_bindings_type_elementwise <- function() { } register_bindings_type_format <- function() { - register_binding("format", function(x, ...) { + register_binding("base::format", function(x, ...) { # We use R's format if we get a single R object here since we don't (yet) # support all of the possible options for casting to string if (!inherits(x, "Expression")) { diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 9b33732a97b..5efb5a37a7f 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -198,7 +198,7 @@ test_that("Negative scalar values", { test_that("filter() with between()", { compare_dplyr_binding( .input %>% - filter(between(dbl, 1, 2)) %>% + filter(dplyr::between(dbl, 1, 2)) %>% collect(), tbl ) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 539dc98f8ba..01334e55e8b 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -92,10 +92,10 @@ test_that("explicit type conversions with as.*()", { compare_dplyr_binding( .input %>% transmute( - int2chr = as.character(int), - int2dbl = as.double(int), - int2int = as.integer(int), - int2num = as.numeric(int), + int2chr = base::as.character(int), + int2dbl = base::as.double(int), + int2int = base::as.integer(int), + int2num = base::as.numeric(int), dbl2chr = as.character(dbl), dbl2dbl = as.double(dbl), dbl2int = as.integer(dbl), @@ -130,7 +130,7 @@ test_that("explicit type conversions with as.*()", { compare_dplyr_binding( .input %>% transmute( - chr2i64 = as.integer64(chr), + chr2i64 = bit64::as.integer64(chr), dbl2i64 = as.integer64(dbl), i642i64 = as.integer64(i64), rchr2i64 = as.integer64("10000000000"), @@ -143,8 +143,8 @@ test_that("explicit type conversions with as.*()", { compare_dplyr_binding( .input %>% transmute( - chr2lgl = as.logical(chr), - dbl2lgl = as.logical(dbl), + chr2lgl = base::as.logical(chr), + dbl2lgl = base::as.logical(dbl), int2lgl = as.logical(int), rchr2lgl = as.logical("TRUE"), rdbl2lgl = as.logical(0), @@ -208,7 +208,9 @@ test_that("is.finite(), is.infinite(), is.nan()", { .input %>% transmute( is_fin = is.finite(x), - is_inf = is.infinite(x) + namespaced_is_fin = base::is.finite(x), + is_inf = is.infinite(x), + namespaced_is_inf = base::is.infinite(x) ) %>% collect(), df @@ -217,7 +219,8 @@ test_that("is.finite(), is.infinite(), is.nan()", { compare_dplyr_binding( .input %>% transmute( - is_nan = is.nan(x) + is_nan = is.nan(x), + namespaced_is_nan = base::is.nan(x) ) %>% collect(), df @@ -229,7 +232,8 @@ test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", { compare_dplyr_binding( .input %>% transmute( - is_na = is.na(x) + is_na = is.na(x), + namespaced_is_na = base::is.na(x) ) %>% collect(), df @@ -248,7 +252,7 @@ test_that("type checks with is() giving Arrow types", { str = Array$create("a", arrow::string()) ) %>% transmute( - i32_is_i32 = is(i32, int32()), + i32_is_i32 = methods::is(i32, int32()), i32_is_dec = is(i32, decimal(3, 2)), i32_is_dec128 = is(i32, decimal128(3, 2)), i32_is_dec256 = is(i32, decimal256(3, 2)), @@ -436,14 +440,14 @@ test_that("type checks with is.*()", { compare_dplyr_binding( .input %>% transmute( - chr_is_chr = is.character(chr), - chr_is_dbl = is.double(chr), - chr_is_fct = is.factor(chr), - chr_is_int = is.integer(chr), - chr_is_i64 = is.integer64(chr), - chr_is_lst = is.list(chr), - chr_is_lgl = is.logical(chr), - chr_is_num = is.numeric(chr), + chr_is_chr = base::is.character(chr), + chr_is_dbl = base::is.double(chr), + chr_is_fct = base::is.factor(chr), + chr_is_int = base::is.integer(chr), + chr_is_i64 = bit64::is.integer64(chr), + chr_is_lst = base::is.list(chr), + chr_is_lgl = base::is.logical(chr), + chr_is_num = base::is.numeric(chr), dbl_is_chr = is.character(dbl), dbl_is_dbl = is.double(dbl), dbl_is_fct = is.factor(dbl), @@ -515,11 +519,11 @@ test_that("type checks with is_*()", { compare_dplyr_binding( .input %>% transmute( - chr_is_chr = is_character(chr), - chr_is_dbl = is_double(chr), - chr_is_int = is_integer(chr), - chr_is_lst = is_list(chr), - chr_is_lgl = is_logical(chr), + chr_is_chr = rlang::is_character(chr), + chr_is_dbl = rlang::is_double(chr), + chr_is_int = rlang::is_integer(chr), + chr_is_lst = rlang::is_list(chr), + chr_is_lgl = rlang::is_logical(chr), dbl_is_chr = is_character(dbl), dbl_is_dbl = is_double(dbl), dbl_is_int = is_integer(dbl), @@ -735,7 +739,7 @@ test_that("structs/nested data frames/tibbles can be created", { compare_dplyr_binding( .input %>% transmute( - df_col = data.frame(regular_col1, fix.empty.names = FALSE) + df_col = base::data.frame(regular_col1, fix.empty.names = FALSE) ) %>% collect() %>% mutate(df_col = as.data.frame(df_col)), @@ -787,7 +791,7 @@ test_that("nested structs can be created from scalars and existing data frames", compare_dplyr_binding( .input %>% transmute( - df_col = tibble(b = 3) + df_col = tibble::tibble(b = 3) ) %>% collect(), tibble(a = 1:2) @@ -823,7 +827,7 @@ test_that("format date/time", { compare_dplyr_binding( .input %>% - mutate(x = format(datetime, format = formats)) %>% + mutate(x = base::format(datetime, format = formats)) %>% collect(), times ) From dc5902fd49b95411e95b7600538302909ae31f90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 15:27:24 +0100 Subject: [PATCH 025/129] undo before merge --- r/R/dplyr-funcs.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index a46ae2ce40a..b3b1b81a7a5 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -64,6 +64,29 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { name <- gsub("^.*?::", "", fun_name) } + operators <- c( + "!", "==", "!=", ">", ">=", "<", "<=", "&", "|", "+", "-", "*", "/", "%/%", + "%%", "^", "%in%" + ) + + # aggregating functions need some special attention + agg_functions <- c( + "sum", "any", "all", "mean", "sd", "var", "quantile", "median", + "n_distinct", "n", "min", "max" + ) + + unary_exprs <- c( + "abs", "ceiling", "floor", "sign", "acos", "asin", "cos", "sin", "tan", + "stri_reverse" + ) + + if (!grepl("::", fun_name) && + !(fun_name %in% operators) && + !(fun_name %in% agg_functions) && + !(fun_name %in% unary_exprs)) { + print(fun_name) + } + previous_fun <- if (name %in% names(registry)) registry[[name]] else NULL if (is.null(fun) && !is.null(previous_fun)) { From d4bf12028e8bd6f181536cb0e0742530a26d48ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 18 May 2022 15:28:44 +0100 Subject: [PATCH 026/129] remove before merge --- r/R/dplyr-funcs.R | 1 + 1 file changed, 1 insertion(+) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index b3b1b81a7a5..794cda9af67 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -69,6 +69,7 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { "%%", "^", "%in%" ) + # TODO remove before merge # aggregating functions need some special attention agg_functions <- c( "sum", "any", "all", "mean", "sd", "var", "quantile", "median", From 8e1d11dd8440b40950863bb55a74fa27700fd9b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 14:46:34 +0100 Subject: [PATCH 027/129] clean dyplyr-funcs --- r/R/dplyr-funcs.R | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 794cda9af67..a46ae2ce40a 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -64,30 +64,6 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { name <- gsub("^.*?::", "", fun_name) } - operators <- c( - "!", "==", "!=", ">", ">=", "<", "<=", "&", "|", "+", "-", "*", "/", "%/%", - "%%", "^", "%in%" - ) - - # TODO remove before merge - # aggregating functions need some special attention - agg_functions <- c( - "sum", "any", "all", "mean", "sd", "var", "quantile", "median", - "n_distinct", "n", "min", "max" - ) - - unary_exprs <- c( - "abs", "ceiling", "floor", "sign", "acos", "asin", "cos", "sin", "tan", - "stri_reverse" - ) - - if (!grepl("::", fun_name) && - !(fun_name %in% operators) && - !(fun_name %in% agg_functions) && - !(fun_name %in% unary_exprs)) { - print(fun_name) - } - previous_fun <- if (name %in% names(registry)) registry[[name]] else NULL if (is.null(fun) && !is.null(previous_fun)) { From ee29ec3f86cb5d3d00db457a0840cbcdbee95b89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 15:46:40 +0100 Subject: [PATCH 028/129] unit tests for maths functions --- r/tests/testthat/test-dplyr-funcs-math.R | 39 ++++++++++++++++++------ 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index ac9fea65568..abba0edabbf 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -198,7 +198,7 @@ test_that("log functions", { filter(x != 1) %>% mutate( y = log(x, base = x), - z = log(2, base = x) + z = base::log(2, base = x) ) %>% collect(), df @@ -226,7 +226,8 @@ test_that("log functions", { compare_dplyr_binding( .input %>% mutate( - y = logb(x) + y = logb(x), + z = base::logb(x) ) %>% collect(), df @@ -235,7 +236,8 @@ test_that("log functions", { compare_dplyr_binding( .input %>% mutate( - y = log1p(x) + y = log1p(x), + z = base::log1p(x) ) %>% collect(), df @@ -244,7 +246,8 @@ test_that("log functions", { compare_dplyr_binding( .input %>% mutate( - y = log2(x) + y = log2(x), + z = base::log2(x) ) %>% collect(), df @@ -253,7 +256,8 @@ test_that("log functions", { compare_dplyr_binding( .input %>% mutate( - y = log10(x) + y = log10(x), + z = base::log10(x) ) %>% collect(), df @@ -265,35 +269,50 @@ test_that("trig functions", { compare_dplyr_binding( .input %>% - mutate(y = sin(x)) %>% + mutate( + y = sin(x), + z = base::sin(x) + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(y = cos(x)) %>% + mutate( + y = cos(x), + z = base::cos(x) + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(y = tan(x)) %>% + mutate( + y = tan(x), + z = base::tan(x) + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(y = asin(x)) %>% + mutate( + y = asin(x), + z = base::asin(x) + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(y = acos(x)) %>% + mutate( + y = acos(x), + z = base::acos(x) + ) %>% collect(), df ) From 8bed6e94b367bb65832a10d5b1b5f00e764a8c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 15:46:53 +0100 Subject: [PATCH 029/129] `lubridate::date` --- r/tests/testthat/test-dplyr-funcs-datetime.R | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 6594e4696a5..2aa8d275cad 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -943,15 +943,9 @@ test_that("date works in arrow", { r_date_object <- lubridate::ymd_hms("2012-03-26 23:12:13") - # we can't (for now) use namespacing, so we need to make sure lubridate::date() - # and not base::date() is being used. This is due to the way testthat runs and - # normal use of arrow would not have to do this explicitly. - # TODO: remove after ARROW-14575 - date <- lubridate::date - compare_dplyr_binding( .input %>% - mutate(a_date = date(posixct_date)) %>% + mutate(a_date = lubridate::date(posixct_date)) %>% collect(), test_df ) From 4f67040720af66fdf7d90c6756c1accb8979a9e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 15:47:24 +0100 Subject: [PATCH 030/129] added unit tests for `base::tolower()` and `base::toupper()` --- r/tests/testthat/test-dplyr-funcs-string.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index a3e90d92f73..1ffe2e8b73b 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -504,14 +504,16 @@ test_that("strrep and str_dup", { } }) -test_that("str_to_lower, str_to_upper, and str_to_title", { +test_that("str_to_lower, str_to_upper, and str_to_title tolower toupper", { df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!")) compare_dplyr_binding( .input %>% transmute( x_lower = stringr::str_to_lower(x), x_upper = str_to_upper(x), - x_title = str_to_title(x) + x_title = str_to_title(x), + x_tolower = base::tolower(x), + x_toupper = toupper(x) ) %>% collect(), df From 1baaea612c8c3bfa901ededf9302059c0813784e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 15:47:39 +0100 Subject: [PATCH 031/129] `lubridate::date` --- r/R/dplyr-funcs-datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 24ec8ddbfd9..084a82afe06 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -143,7 +143,7 @@ register_bindings_datetime_utility <- function() { (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")]) }) - register_binding("date", function(x) { + register_binding("lubridate::date", function(x) { build_expr("cast", x, options = list(to_type = date32())) }) } From c8a5a4e64bb6046b7cee1f8b50f3408054e270cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 15:48:19 +0100 Subject: [PATCH 032/129] `pkg::` prefix for maths functions & `tolower` + `toupper` --- r/R/expression.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/r/R/expression.R b/r/R/expression.R index 51c0f37a126..35459d17d94 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -29,18 +29,18 @@ "abs" = "abs_checked", "ceiling" = "ceil", "floor" = "floor", - "log10" = "log10_checked", - "log1p" = "log1p_checked", - "log2" = "log2_checked", + "base::log10" = "log10_checked", + "base::log1p" = "log1p_checked", + "base::log2" = "log2_checked", "sign" = "sign", # trunc is defined in dplyr-functions.R # trigonometric functions - "acos" = "acos_checked", - "asin" = "asin_checked", - "cos" = "cos_checked", - "sin" = "sin_checked", - "tan" = "tan_checked", + "base::acos" = "acos_checked", + "base::asin" = "asin_checked", + "base::cos" = "cos_checked", + "base::sin" = "sin_checked", + "base::tan" = "tan_checked", # logical functions "!" = "invert", @@ -57,8 +57,8 @@ "stri_reverse" = "utf8_reverse", # substr is defined in dplyr-functions.R # substring is defined in dplyr-functions.R - "tolower" = "utf8_lower", - "toupper" = "utf8_upper", + "base::tolower" = "utf8_lower", + "base::toupper" = "utf8_upper", # date and time functions "lubridate::day" = "day", From 691711e76a561c0f4eb330d8dff35903cfd530d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 16:02:39 +0100 Subject: [PATCH 033/129] `abs` and `as.factor` --- r/R/expression.R | 4 ++-- r/tests/testthat/test-dplyr-funcs-math.R | 5 ++++- r/tests/testthat/test-dplyr-funcs-type.R | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/r/R/expression.R b/r/R/expression.R index 35459d17d94..a87f277eb5e 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -26,7 +26,7 @@ # functions are arranged alphabetically by name within categories # arithmetic functions - "abs" = "abs_checked", + "base::abs" = "abs_checked", "ceiling" = "ceil", "floor" = "floor", "base::log10" = "log10_checked", @@ -78,7 +78,7 @@ "lubridate::leap_year" = "is_leap_year", # type conversion functions - "as.factor" = "dictionary_encode" + "base::as.factor" = "dictionary_encode" ) .binary_function_map <- list( diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index abba0edabbf..1a15e916f61 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -25,7 +25,10 @@ test_that("abs()", { compare_dplyr_binding( .input %>% - transmute(abs = abs(x)) %>% + transmute( + abs = abs(x), + abs_base = base::abs(x) + ) %>% collect(), df ) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 01334e55e8b..fafeaf2c8f2 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -612,7 +612,7 @@ test_that("as.factor()/dictionary_encode()", { expect_warning( compare_dplyr_binding( .input %>% - transmute(x = as.factor(x)) %>% + transmute(x = base::as.factor(x)) %>% collect(), df2 ), From fa814676f51b363fc79d43fe29af2b0a22886f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 16:19:08 +0100 Subject: [PATCH 034/129] undo `lubridate::date` --- r/R/dplyr-funcs-datetime.R | 2 +- r/tests/testthat/test-dplyr-funcs-datetime.R | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 084a82afe06..24ec8ddbfd9 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -143,7 +143,7 @@ register_bindings_datetime_utility <- function() { (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")]) }) - register_binding("lubridate::date", function(x) { + register_binding("date", function(x) { build_expr("cast", x, options = list(to_type = date32())) }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 2aa8d275cad..988ab987e35 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -943,9 +943,15 @@ test_that("date works in arrow", { r_date_object <- lubridate::ymd_hms("2012-03-26 23:12:13") + # we can't (for now) use namespacing, so we need to make sure lubridate::date() + # and not base::date() is being used. This is due to the way testthat runs and + # normal use of arrow would not have to do this explicitly. + # TODO remove once https://issues.apache.org/jira/browse/ARROW-14575 is done + date <- lubridate::date + compare_dplyr_binding( .input %>% - mutate(a_date = lubridate::date(posixct_date)) %>% + mutate(a_date = date(posixct_date)) %>% collect(), test_df ) From e28a2518c401696998d7a6f160ffe129dd69cce6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 16:19:59 +0100 Subject: [PATCH 035/129] `sign` and `stringi::stri_reverse` --- r/R/expression.R | 4 ++-- r/tests/testthat/test-dplyr-funcs-math.R | 5 ++++- r/tests/testthat/test-dplyr-funcs-string.R | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/r/R/expression.R b/r/R/expression.R index a87f277eb5e..914b6477ac3 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -32,7 +32,7 @@ "base::log10" = "log10_checked", "base::log1p" = "log1p_checked", "base::log2" = "log2_checked", - "sign" = "sign", + "base::sign" = "sign", # trunc is defined in dplyr-functions.R # trigonometric functions @@ -54,7 +54,7 @@ # str_to_title is defined in dplyr-functions.R # str_to_upper is defined in dplyr-functions.R # str_trim is defined in dplyr-functions.R - "stri_reverse" = "utf8_reverse", + "stringi::stri_reverse" = "utf8_reverse", # substr is defined in dplyr-functions.R # substring is defined in dplyr-functions.R "base::tolower" = "utf8_lower", diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 1a15e916f61..b9b73d48957 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -39,7 +39,10 @@ test_that("sign()", { compare_dplyr_binding( .input %>% - transmute(sign = sign(x)) %>% + transmute( + sign = sign(x), + sign2 = base::sign(x) + ) %>% collect(), df ) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 1ffe2e8b73b..e8afe5b687d 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -761,7 +761,7 @@ test_that("stri_reverse and arrow_ascii_reverse functions", { compare_dplyr_binding( .input %>% - mutate(x = stri_reverse(x)) %>% + mutate(x = stringi::stri_reverse(x)) %>% collect(), df_utf8 ) From cc0e86c2bd597eef67ecedb1a9ac5fe1b5873c71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 16:28:07 +0100 Subject: [PATCH 036/129] `ceiling` and `floor` --- r/R/expression.R | 4 ++-- r/tests/testthat/test-dplyr-funcs-math.R | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/r/R/expression.R b/r/R/expression.R index 914b6477ac3..6b9eb5e89c5 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -27,8 +27,8 @@ # arithmetic functions "base::abs" = "abs_checked", - "ceiling" = "ceil", - "floor" = "floor", + "base::ceiling" = "ceil", + "base::floor" = "floor", "base::log10" = "log10_checked", "base::log1p" = "log1p_checked", "base::log2" = "log2_checked", diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index b9b73d48957..9af39416f96 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -55,7 +55,9 @@ test_that("ceiling(), floor(), trunc(), round()", { .input %>% mutate( c = ceiling(x), + c2 = base::ceiling(x), f = floor(x), + f2 = base::floor(x), t = trunc(x), t2 = base::trunc(x), r = round(x), From 5bb3bb1621826175050eefe1f5f140b366e9d287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 18:34:45 +0100 Subject: [PATCH 037/129] `lubridate::date` --- r/R/dplyr-funcs-datetime.R | 2 +- r/tests/testthat/test-dplyr-funcs-datetime.R | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 24ec8ddbfd9..084a82afe06 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -143,7 +143,7 @@ register_bindings_datetime_utility <- function() { (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")]) }) - register_binding("date", function(x) { + register_binding("lubridate::date", function(x) { build_expr("cast", x, options = list(to_type = date32())) }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 988ab987e35..dcc0e2819d9 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -943,15 +943,9 @@ test_that("date works in arrow", { r_date_object <- lubridate::ymd_hms("2012-03-26 23:12:13") - # we can't (for now) use namespacing, so we need to make sure lubridate::date() - # and not base::date() is being used. This is due to the way testthat runs and - # normal use of arrow would not have to do this explicitly. - # TODO remove once https://issues.apache.org/jira/browse/ARROW-14575 is done - date <- lubridate::date - compare_dplyr_binding( .input %>% - mutate(a_date = date(posixct_date)) %>% + mutate(a_date = lubridate::date(posixct_date)) %>% collect(), test_df ) @@ -965,7 +959,7 @@ test_that("date works in arrow", { compare_dplyr_binding( .input %>% - mutate(date_from_r_object = date(r_date_object)) %>% + mutate(date_from_r_object = lubridate::date(r_date_object)) %>% collect(), test_df ) From c46efb6d75b7719f65a43ab2d4a65710aea1a326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 20 May 2022 18:38:24 +0100 Subject: [PATCH 038/129] remove `arrow:::` prefix --- r/R/dplyr-funcs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index a46ae2ce40a..8c5a96edff4 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -129,6 +129,6 @@ register_bindings_utils <- function() { register_binding("::", function(lhs, rhs) { lhs_name <- as.character(substitute(lhs)) rhs_name <- as.character(substitute(rhs)) - arrow:::nse_funcs[[paste0(lhs_name, "::", rhs_name)]] + nse_funcs[[paste0(lhs_name, "::", rhs_name)]] }) } From ba645c7411d59a95b4cf1a3733550def802f2400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 23 May 2022 14:58:16 +0100 Subject: [PATCH 039/129] point to the correct expression --- r/R/arrow-datum.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 8632ca3053d..3eda1307cb7 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -124,7 +124,7 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { cos = , sin = , tan = { - eval_array_expression(.Generic, x) + eval_array_expression(paste0("base::", .Generic), x) }, log = eval_array_expression("logb_checked", x, base), log10 = eval_array_expression("log10_checked", x), From 1e96d09c655d4d6f922bf43c36534f9048fd4f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 4 Jul 2022 15:25:42 +0100 Subject: [PATCH 040/129] `base::strptime()` and `base::strftime()` --- r/R/dplyr-funcs-datetime.R | 4 ++-- r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 084a82afe06..6d270b7da9d 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -28,7 +28,7 @@ register_bindings_datetime <- function() { } register_bindings_datetime_utility <- function() { - register_binding("strptime", function(x, + register_binding("base::strptime", function(x, format = "%Y-%m-%d %H:%M:%S", tz = "", unit = "ms") { @@ -75,7 +75,7 @@ register_bindings_datetime_utility <- function() { output }) - register_binding("strftime", function(x, + register_binding("base::strftime", function(x, format = "", tz = "", usetz = FALSE) { diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index dcc0e2819d9..66bb916dbec 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -90,7 +90,7 @@ test_that("strptime", { t_string %>% Table$create() %>% mutate( - x = strptime(x, tz = "UTC") + x = base::strptime(x, tz = "UTC") ) %>% collect(), t_stamp_with_utc_tz From 91c4b99f7e3571b1b6812121bc23aa705d6dbe61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 4 Jul 2022 17:55:48 +0100 Subject: [PATCH 041/129] undo --- r/R/arrow-datum.R | 2 +- r/R/dplyr-funcs-math.R | 2 +- r/R/expression.R | 2 +- r/tests/testthat/test-dplyr-funcs-math.R | 6 +----- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 3eda1307cb7..8632ca3053d 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -124,7 +124,7 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { cos = , sin = , tan = { - eval_array_expression(paste0("base::", .Generic), x) + eval_array_expression(.Generic, x) }, log = eval_array_expression("logb_checked", x, base), log10 = eval_array_expression("log10_checked", x), diff --git a/r/R/dplyr-funcs-math.R b/r/R/dplyr-funcs-math.R index a6b47a9738d..142a634a9c4 100644 --- a/r/R/dplyr-funcs-math.R +++ b/r/R/dplyr-funcs-math.R @@ -68,7 +68,7 @@ register_bindings_math <- function() { ) }) - register_binding("base::trunc", function(x, ...) { + register_binding("trunc", function(x, ...) { # accepts and ignores ... for consistency with base::trunc() build_expr("trunc", x) }) diff --git a/r/R/expression.R b/r/R/expression.R index 6b9eb5e89c5..28afead9ec2 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -27,7 +27,7 @@ # arithmetic functions "base::abs" = "abs_checked", - "base::ceiling" = "ceil", + "ceiling" = "ceil", "base::floor" = "floor", "base::log10" = "log10_checked", "base::log1p" = "log1p_checked", diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 9af39416f96..5be41e45988 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -55,13 +55,9 @@ test_that("ceiling(), floor(), trunc(), round()", { .input %>% mutate( c = ceiling(x), - c2 = base::ceiling(x), f = floor(x), - f2 = base::floor(x), t = trunc(x), - t2 = base::trunc(x), - r = round(x), - r2 = base::round(x) + r = round(x) ) %>% collect(), df From 8da1365dc62a32354d7e64258d658ed401fb8d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 4 Jul 2022 18:44:40 +0100 Subject: [PATCH 042/129] register namespaced `fast_strptime()` --- r/R/dplyr-funcs-datetime.R | 2 +- r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 6d270b7da9d..bcffe7cf759 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -591,7 +591,7 @@ register_bindings_datetime_parsers <- function() { ymd_parser_map_factory(ymd_order)) } - register_binding("fast_strptime", function(x, + register_binding("lubridate::fast_strptime", function(x, format, tz = "UTC", lt = FALSE, diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 66bb916dbec..fcc59ac6b15 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1932,7 +1932,7 @@ test_that("lubridate's fast_strptime", { .input %>% mutate( date_multi_formats = - fast_strptime( + lubridate::fast_strptime( x, format = c("%Y-%m-%d %H:%M:%S", "%m-%d-%Y %H:%M:%S"), lt = FALSE From f534590e61736f18e88f2eae19509a57c7a7de1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 4 Jul 2022 19:24:36 +0100 Subject: [PATCH 043/129] math unary bindings --- r/R/dplyr-funcs-math.R | 2 +- r/R/expression.R | 2 +- r/tests/testthat/test-dplyr-funcs-math.R | 20 ++++++++++++-------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/r/R/dplyr-funcs-math.R b/r/R/dplyr-funcs-math.R index 142a634a9c4..a6b47a9738d 100644 --- a/r/R/dplyr-funcs-math.R +++ b/r/R/dplyr-funcs-math.R @@ -68,7 +68,7 @@ register_bindings_math <- function() { ) }) - register_binding("trunc", function(x, ...) { + register_binding("base::trunc", function(x, ...) { # accepts and ignores ... for consistency with base::trunc() build_expr("trunc", x) }) diff --git a/r/R/expression.R b/r/R/expression.R index 28afead9ec2..6b9eb5e89c5 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -27,7 +27,7 @@ # arithmetic functions "base::abs" = "abs_checked", - "ceiling" = "ceil", + "base::ceiling" = "ceil", "base::floor" = "floor", "base::log10" = "log10_checked", "base::log1p" = "log1p_checked", diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 5be41e45988..9935fec619a 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -27,7 +27,7 @@ test_that("abs()", { .input %>% transmute( abs = abs(x), - abs_base = base::abs(x) + abs_namespace = base::abs(x) ) %>% collect(), df @@ -41,7 +41,7 @@ test_that("sign()", { .input %>% transmute( sign = sign(x), - sign2 = base::sign(x) + sign_namespace = base::sign(x) ) %>% collect(), df @@ -55,9 +55,13 @@ test_that("ceiling(), floor(), trunc(), round()", { .input %>% mutate( c = ceiling(x), + c_namespace = base::ceiling(x), f = floor(x), + f_namespace = base::floor(x), t = trunc(x), - r = round(x) + t_namespace = base::trunc(x), + r = round(x), + r_namespace = base::round(x) ) %>% collect(), df @@ -275,7 +279,7 @@ test_that("trig functions", { .input %>% mutate( y = sin(x), - z = base::sin(x) + y_namespace = base::sin(x) ) %>% collect(), df @@ -285,7 +289,7 @@ test_that("trig functions", { .input %>% mutate( y = cos(x), - z = base::cos(x) + y_namespace = base::cos(x) ) %>% collect(), df @@ -295,7 +299,7 @@ test_that("trig functions", { .input %>% mutate( y = tan(x), - z = base::tan(x) + y_namespace = base::tan(x) ) %>% collect(), df @@ -305,7 +309,7 @@ test_that("trig functions", { .input %>% mutate( y = asin(x), - z = base::asin(x) + y_namespace = base::asin(x) ) %>% collect(), df @@ -315,7 +319,7 @@ test_that("trig functions", { .input %>% mutate( y = acos(x), - z = base::acos(x) + y_namespace = base::acos(x) ) %>% collect(), df From 8ffe53cf82600dde8aa842e8efc2e6218dce6f89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 4 Jul 2022 20:09:31 +0100 Subject: [PATCH 044/129] math functions for ArrowDatum --- r/R/arrow-datum.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 8632ca3053d..ff3ad1a00fe 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -113,10 +113,10 @@ Ops.ArrowDatum <- function(e1, e2) { #' @export Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { switch(.Generic, - abs = , - sign = , - floor = , - ceiling = , + abs = eval_array_expression("abs_checked", x), + sign = eval_array_expression("sign", x), + floor = eval_array_expression("floor", x), + ceiling = eval_array_expression("ceil", x), trunc = , acos = , asin = , From f61f7760e210c69a25414ec24680d26d537d8735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 4 Jul 2022 21:06:17 +0100 Subject: [PATCH 045/129] bump ci From 5157403153f6bdbe09434859459073b2cfd9ddc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 6 Jul 2022 11:22:29 +0100 Subject: [PATCH 046/129] make `filter` fail --- r/tests/testthat/test-dataset-dplyr.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R index ed3a20ff8a2..b2ac7e8f125 100644 --- a/r/tests/testthat/test-dataset-dplyr.R +++ b/r/tests/testthat/test-dataset-dplyr.R @@ -73,7 +73,7 @@ test_that("filter() on timestamp columns", { ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equal( ds %>% - filter(ts >= as.POSIXct("2015-05-04 03:12:39", tz = "UTC")) %>% + filter(ts >= lubridate::ymd_hms("2015-05-04 03:12:39", tz = "UTC")) %>% filter(part == 1) %>% select(ts) %>% collect(), @@ -119,7 +119,7 @@ test_that("filter() on date32 columns", { # Also with timestamp scalar expect_equal( open_dataset(tmp) %>% - filter(date > as.POSIXct("2020-02-02 00:00:00", tz = "UTC")) %>% + filter(date > lubridate::ymd_hms("2020-02-02 00:00:00", tz = "UTC")) %>% collect() %>% nrow(), 1L From 163ea2eb615ae6bcac7146cb9ab032ed6a499b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 6 Jul 2022 14:51:52 +0100 Subject: [PATCH 047/129] remove unrelated change --- r/tests/testthat/test-dataset-dplyr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R index b2ac7e8f125..73dbd228e38 100644 --- a/r/tests/testthat/test-dataset-dplyr.R +++ b/r/tests/testthat/test-dataset-dplyr.R @@ -73,7 +73,7 @@ test_that("filter() on timestamp columns", { ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_equal( ds %>% - filter(ts >= lubridate::ymd_hms("2015-05-04 03:12:39", tz = "UTC")) %>% + filter(ts >= lubridate::ymd_hms("2015-05-04 03:12:39")) %>% filter(part == 1) %>% select(ts) %>% collect(), From d7573533a727c8b5aa91e2d23be9782d60723feb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 6 Jul 2022 15:05:20 +0100 Subject: [PATCH 048/129] removed residual code --- r/tests/testthat/test-dplyr-funcs.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index 8fd13b98e21..28a3ac2579a 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -36,7 +36,6 @@ test_that("register_binding_agg() works", { fun1 <- function() NULL expect_null(register_binding_agg("somePkg::some_fun", fun1, fake_registry)) - names(fake_registry) expect_identical(fake_registry$some_fun, fun1) expect_identical(fake_registry$`somePkg::some_fun`, fun1) }) From 8b400990f5b6a5698f50e3c543c9ee4d20293f48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 6 Jul 2022 15:13:47 +0100 Subject: [PATCH 049/129] add namepace qualified tests and remove `pkg::` prefixes from other tests --- r/tests/testthat/test-dplyr-filter.R | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 5efb5a37a7f..0315989b2d5 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -198,7 +198,7 @@ test_that("Negative scalar values", { test_that("filter() with between()", { compare_dplyr_binding( .input %>% - filter(dplyr::between(dbl, 1, 2)) %>% + filter(between(dbl, 1, 2)) %>% collect(), tbl ) @@ -245,7 +245,7 @@ test_that("filter() with string ops", { skip_if_not_available("utf8proc") compare_dplyr_binding( .input %>% - filter(dbl > 2, stringr::str_length(verses) > 25) %>% + filter(dbl > 2, str_length(verses) > 25) %>% collect(), tbl ) @@ -400,3 +400,20 @@ test_that("filter() with .data pronoun", { tbl ) }) + +test_that("filter() with namespacing", { + compare_dplyr_binding( + .input %>% + filter(dplyr::between(dbl, 1, 2)) %>% + collect(), + tbl + ) + + skip_if_not_available("utf8proc") + compare_dplyr_binding( + .input %>% + filter(dbl > 2, stringr::str_length(verses) > 25) %>% + collect(), + tbl + ) +}) From 33a74ea01dd3cfa2344a92665e0601a3c07be265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 6 Jul 2022 15:21:21 +0100 Subject: [PATCH 050/129] fallback to `pkg::fun` when the binding does not exist --- r/R/dplyr-funcs.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 8c5a96edff4..9a2600c64b5 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -129,6 +129,15 @@ register_bindings_utils <- function() { register_binding("::", function(lhs, rhs) { lhs_name <- as.character(substitute(lhs)) rhs_name <- as.character(substitute(rhs)) - nse_funcs[[paste0(lhs_name, "::", rhs_name)]] + + fun_name <- paste0(lhs_name, "::", rhs_name) + + # if we do not have a binding for pkg::name, then fall back on to the + # regular pkg::fun function + if (!is.null(nse_funcs[[fun_name]])) { + nse_funcs[[fun_name]] + } else { + asNamespace(lhs_name)[[rhs_name]] + } }) } From ecd2a8105b04ab8fc2678dfcd0d3c5f850ebab0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 7 Jul 2022 08:29:41 +0100 Subject: [PATCH 051/129] use rlang's `%||%` operator --- r/R/dplyr-funcs.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 9a2600c64b5..3465c85835a 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -132,12 +132,8 @@ register_bindings_utils <- function() { fun_name <- paste0(lhs_name, "::", rhs_name) - # if we do not have a binding for pkg::name, then fall back on to the + # if we do not have a binding for pkg::fun, then fall back on to the # regular pkg::fun function - if (!is.null(nse_funcs[[fun_name]])) { - nse_funcs[[fun_name]] - } else { - asNamespace(lhs_name)[[rhs_name]] - } + nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] }) } From 8d2ae15a8d06d756c6dbce355cca25c56572856c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 7 Jul 2022 10:48:03 +0100 Subject: [PATCH 052/129] add namespacing test with `group_by()` --- r/tests/testthat/test-dplyr-group-by.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index a4e558a80b8..ba3d472543c 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -156,3 +156,16 @@ test_that("group_by with .drop", { example_with_logical_factors ) }) + +test_that("group_by() and namespacing", { + compare_dplyr_binding( + .input %>% + group_by(int > base::sqrt(25)) %>% + summarise(mean(dbl, na.rm = TRUE)) %>% + # group order is different from dplyr, hence reordering + arrange(`int > base::sqrt(25)`) %>% + collect(), + tbl + ) + +}) From 0a022125f9b38a44e4ea12c84313d680ebb7aa37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 7 Jul 2022 19:56:35 +0100 Subject: [PATCH 053/129] remove `tz = "UTC"` (unrelated to the scope of the PR) --- r/tests/testthat/test-dataset-dplyr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R index 73dbd228e38..fecda56c6c2 100644 --- a/r/tests/testthat/test-dataset-dplyr.R +++ b/r/tests/testthat/test-dataset-dplyr.R @@ -119,7 +119,7 @@ test_that("filter() on date32 columns", { # Also with timestamp scalar expect_equal( open_dataset(tmp) %>% - filter(date > lubridate::ymd_hms("2020-02-02 00:00:00", tz = "UTC")) %>% + filter(date > lubridate::ymd_hms("2020-02-02 00:00:00")) %>% collect() %>% nrow(), 1L From 7f82028e089becf0866494df0d02c09511d091cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 7 Jul 2022 20:50:34 +0100 Subject: [PATCH 054/129] remove `pkg::` prefixes from test-dplyr-funcs-conditional.R --- r/tests/testthat/test-dplyr-funcs-conditional.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index 7c6f48df815..4f5fdb0af4e 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -29,7 +29,7 @@ test_that("if_else and ifelse", { compare_dplyr_binding( .input %>% mutate( - y = dplyr::if_else(int > 5, 1, 0) + y = if_else(int > 5, 1, 0) ) %>% collect(), tbl @@ -65,7 +65,7 @@ test_that("if_else and ifelse", { compare_dplyr_binding( .input %>% mutate( - y = base::ifelse(int > 5, 1, 0) + y = ifelse(int > 5, 1, 0) ) %>% collect(), tbl @@ -165,7 +165,7 @@ test_that("if_else and ifelse", { test_that("case_when()", { compare_dplyr_binding( .input %>% - transmute(cw = dplyr::case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>% + transmute(cw = case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>% collect(), tbl ) @@ -293,7 +293,7 @@ test_that("coalesce()", { compare_dplyr_binding( .input %>% mutate( - cw = dplyr::coalesce(w), + cw = coalesce(w), cz = coalesce(z), cwx = coalesce(w, x), cwxy = coalesce(w, x, y), From 2e477a37ca8ff9a84e9e0835bff423438bd1ada6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 7 Jul 2022 20:58:47 +0100 Subject: [PATCH 055/129] remove `pkg::` prefixes from test-dplyr-funcs-string.R --- r/tests/testthat/test-dplyr-funcs-string.R | 65 ++++++++++------------ 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index e8afe5b687d..c4d54d325f4 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -55,7 +55,7 @@ test_that("paste, paste0, and str_c", { ) compare_dplyr_binding( .input %>% - transmute(stringr::str_c(v, w)) %>% + transmute(str_c(v, w)) %>% collect(), df ) @@ -89,7 +89,7 @@ test_that("paste, paste0, and str_c", { # non-character column in dots compare_dplyr_binding( .input %>% - transmute(base::paste0(x, y, z)) %>% + transmute(paste0(x, y, z)) %>% collect(), df ) @@ -105,7 +105,7 @@ test_that("paste, paste0, and str_c", { # literal NA in dots compare_dplyr_binding( .input %>% - transmute(base::paste(x, NA, y)) %>% + transmute(paste(x, NA, y)) %>% collect(), df ) @@ -183,13 +183,13 @@ test_that("grepl with ignore.case = FALSE and fixed = TRUE", { df <- tibble(x = c("Foo", "bar", NA_character_)) compare_dplyr_binding( .input %>% - filter(base::grepl("o", x, fixed = TRUE)) %>% + filter(grepl("o", x, fixed = TRUE)) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate(x = base::grepl("o", x, fixed = TRUE)) %>% + mutate(x = grepl("o", x, fixed = TRUE)) %>% collect(), df ) @@ -277,13 +277,13 @@ test_that("str_detect", { compare_dplyr_binding( .input %>% - filter(stringr::str_detect(x, regex("^F"))) %>% + filter(str_detect(x, regex("^F"))) %>% collect(), df ) compare_dplyr_binding( .input %>% - transmute(x = stringr::str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% + transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% collect(), df ) @@ -325,7 +325,7 @@ test_that("sub and gsub", { for (fixed in c(TRUE, FALSE)) { compare_dplyr_binding( .input %>% - transmute(x = base::sub("Foo", "baz", x, fixed = fixed)) %>% + transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>% collect(), df ) @@ -359,7 +359,7 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", { expect_equal( df %>% Table$create() %>% - transmute(x = base::gsub("o", "u", x, ignore.case = TRUE, fixed = TRUE)) %>% + transmute(x = gsub("o", "u", x, ignore.case = TRUE, fixed = TRUE)) %>% collect(), tibble(x = c("Fuu", "bar")) ) @@ -377,14 +377,14 @@ test_that("str_replace and str_replace_all", { compare_dplyr_binding( .input %>% - transmute(x = stringr::str_replace_all(x, "^F", "baz")) %>% + transmute(x = str_replace_all(x, "^F", "baz")) %>% collect(), df ) compare_dplyr_binding( .input %>% - transmute(x = stringr::str_replace_all(x, regex("^F"), "baz")) %>% + transmute(x = str_replace_all(x, regex("^F"), "baz")) %>% collect(), df ) @@ -427,7 +427,7 @@ test_that("strsplit and str_split", { compare_dplyr_binding( .input %>% - mutate(x = base::strsplit(x, "and")) %>% + mutate(x = strsplit(x, "and")) %>% collect(), df, # `ignore_attr = TRUE` because the vctr coming back from arrow (ListArray) @@ -450,7 +450,7 @@ test_that("strsplit and str_split", { ) compare_dplyr_binding( .input %>% - mutate(x = stringr::str_split(x, "and")) %>% + mutate(x = str_split(x, "and")) %>% collect(), df, ignore_attr = TRUE @@ -490,7 +490,7 @@ test_that("strrep and str_dup", { for (times in 0:8) { compare_dplyr_binding( .input %>% - mutate(x = base::strrep(x, times)) %>% + mutate(x = strrep(x, times)) %>% collect(), df ) @@ -504,16 +504,14 @@ test_that("strrep and str_dup", { } }) -test_that("str_to_lower, str_to_upper, and str_to_title tolower toupper", { +test_that("str_to_lower, str_to_upper, and str_to_title", { df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!")) compare_dplyr_binding( .input %>% transmute( - x_lower = stringr::str_to_lower(x), + x_lower = str_to_lower(x), x_upper = str_to_upper(x), - x_title = str_to_title(x), - x_tolower = base::tolower(x), - x_toupper = toupper(x) + x_title = str_to_title(x) ) %>% collect(), df @@ -761,7 +759,7 @@ test_that("stri_reverse and arrow_ascii_reverse functions", { compare_dplyr_binding( .input %>% - mutate(x = stringi::stri_reverse(x)) %>% + mutate(x = stri_reverse(x)) %>% collect(), df_utf8 ) @@ -800,7 +798,7 @@ test_that("str_like", { expect_equal( df %>% Table$create() %>% - mutate(x = stringr::str_like(x, "baz")) %>% + mutate(x = str_like(x, "baz")) %>% collect(), tibble(x = c(FALSE, FALSE)) ) @@ -856,7 +854,7 @@ test_that("str_pad", { compare_dplyr_binding( .input %>% - mutate(x = stringr::str_pad(x, width = 31)) %>% + mutate(x = str_pad(x, width = 31)) %>% collect(), df ) @@ -895,7 +893,7 @@ test_that("substr", { compare_dplyr_binding( .input %>% - mutate(y = base::substr(x, 1, 6)) %>% + mutate(y = substr(x, 1, 6)) %>% collect(), df ) @@ -974,10 +972,7 @@ test_that("substring", { compare_dplyr_binding( .input %>% - mutate( - y = substring(x, 1, 6), - z = base::substring(x, 1, 6) - ) %>% + mutate(y = substring(x, 1, 6)) %>% collect(), df ) @@ -988,7 +983,7 @@ test_that("str_sub", { compare_dplyr_binding( .input %>% - mutate(y = stringr::str_sub(x, 1, 6)) %>% + mutate(y = str_sub(x, 1, 6)) %>% collect(), df ) @@ -1072,7 +1067,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { compare_dplyr_binding( .input %>% - filter(stringr::str_starts(x, "b.*")) %>% + filter(str_starts(x, "b.*")) %>% collect(), df ) @@ -1101,7 +1096,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { compare_dplyr_binding( .input %>% transmute( - a = stringr::str_starts(x, "b.*"), + a = str_starts(x, "b.*"), b = str_starts(x, "b.*", negate = TRUE), c = str_starts(x, fixed("b")), d = str_starts(x, fixed("b"), negate = TRUE) @@ -1112,7 +1107,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { compare_dplyr_binding( .input %>% - filter(stringr::str_ends(x, "r")) %>% + filter(str_ends(x, "r")) %>% collect(), df ) @@ -1141,7 +1136,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { compare_dplyr_binding( .input %>% transmute( - a = stringr::str_ends(x, "r"), + a = str_ends(x, "r"), b = str_ends(x, "r", negate = TRUE), c = str_ends(x, fixed("r")), d = str_ends(x, fixed("r"), negate = TRUE) @@ -1151,14 +1146,14 @@ test_that("str_starts, str_ends, startsWith, endsWith", { ) compare_dplyr_binding( .input %>% - filter(base::startsWith(x, "b")) %>% + filter(startsWith(x, "b")) %>% collect(), df ) compare_dplyr_binding( .input %>% - filter(base::endsWith(x, "r")) %>% + filter(endsWith(x, "r")) %>% collect(), df ) @@ -1196,7 +1191,7 @@ test_that("str_count", { compare_dplyr_binding( .input %>% - mutate(a_count = stringr::str_count(cities, pattern = "a")) %>% + mutate(a_count = str_count(cities, pattern = "a")) %>% collect(), df ) From 8054f9647c7c8c43f85c2a4da6ea6980c47c2ece Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 7 Jul 2022 21:11:35 +0100 Subject: [PATCH 056/129] removed `pkg::` prefixes from test-dplyr-funcs-datetime.R --- r/tests/testthat/test-dplyr-funcs-datetime.R | 99 ++++++++------------ 1 file changed, 37 insertions(+), 62 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index fcc59ac6b15..d522786a1b2 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -90,7 +90,7 @@ test_that("strptime", { t_string %>% Table$create() %>% mutate( - x = base::strptime(x, tz = "UTC") + x = strptime(x, tz = "UTC") ) %>% collect(), t_stamp_with_utc_tz @@ -196,7 +196,7 @@ test_that("strftime", { compare_dplyr_binding( .input %>% - mutate(x = base::strftime(datetime, format = formats)) %>% + mutate(x = strftime(datetime, format = formats)) %>% collect(), times ) @@ -280,7 +280,7 @@ test_that("format_ISO8601", { compare_dplyr_binding( .input %>% - mutate(x = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% + mutate(x = format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% collect(), times ) @@ -340,20 +340,14 @@ test_that("is.* functions from lubridate", { # make sure all true and at least one false value is considered compare_dplyr_binding( .input %>% - mutate( - x = lubridate::is.POSIXct(datetime), - y = is.POSIXct(integer) - ) %>% + mutate(x = is.POSIXct(datetime), y = is.POSIXct(integer)) %>% collect(), test_df ) compare_dplyr_binding( .input %>% - mutate( - x = lubridate::is.Date(date), - y = is.Date(integer) - ) %>% + mutate(x = is.Date(date), y = is.Date(integer)) %>% collect(), test_df ) @@ -361,7 +355,7 @@ test_that("is.* functions from lubridate", { compare_dplyr_binding( .input %>% mutate( - x = lubridate::is.instant(datetime), + x = is.instant(datetime), y = is.instant(date), z = is.instant(integer) ) %>% @@ -372,7 +366,7 @@ test_that("is.* functions from lubridate", { compare_dplyr_binding( .input %>% mutate( - x = lubridate::is.timepoint(datetime), + x = is.timepoint(datetime), y = is.instant(date), z = is.timepoint(integer) ) %>% @@ -430,7 +424,7 @@ test_that("extract month from timestamp", { compare_dplyr_binding( .input %>% # R returns ordered factor whereas Arrow returns character - mutate(x = as.character(lubridate::month(datetime, label = TRUE))) %>% + mutate(x = as.character(month(datetime, label = TRUE))) %>% collect(), test_df, ignore_attr = TRUE @@ -475,7 +469,7 @@ test_that("extract week from timestamp", { test_that("extract day from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = lubridate::day(datetime)) %>% + mutate(x = day(datetime)) %>% collect(), test_df ) @@ -484,7 +478,7 @@ test_that("extract day from timestamp", { test_that("extract wday from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = lubridate::wday(datetime)) %>% + mutate(x = wday(datetime)) %>% collect(), test_df ) @@ -824,7 +818,7 @@ test_that("semester works with temporal types and integers", { compare_dplyr_binding( .input %>% mutate( - sem_wo_year = lubridate::semester(dates), + sem_wo_year = semester(dates), sem_w_year = semester(dates, with_year = TRUE) ) %>% collect(), @@ -1026,7 +1020,7 @@ test_that("make_date & make_datetime", { compare_dplyr_binding( .input %>% - mutate(composed_date = lubridate::make_date(year, month, day)) %>% + mutate(composed_date = make_date(year, month, day)) %>% collect(), test_df ) @@ -1040,10 +1034,7 @@ test_that("make_date & make_datetime", { compare_dplyr_binding( .input %>% - mutate( - composed_datetime = - lubridate::make_datetime(year, month, day, hour, min, sec) - ) %>% + mutate(composed_datetime = make_datetime(year, month, day, hour, min, sec)) %>% collect(), test_df, # the make_datetime binding uses strptime which does not support tz, hence @@ -1077,7 +1068,7 @@ test_that("ISO_datetime & ISOdate", { compare_dplyr_binding( .input %>% - mutate(composed_date = base::ISOdate(year, month, day)) %>% + mutate(composed_date = ISOdate(year, month, day)) %>% collect(), test_df, # the make_datetime binding uses strptime which does not support tz, hence @@ -1099,8 +1090,7 @@ test_that("ISO_datetime & ISOdate", { compare_dplyr_binding( .input %>% mutate( - composed_datetime = - base::ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") + composed_datetime = ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") ) %>% collect(), test_df, @@ -1122,7 +1112,7 @@ test_that("ISO_datetime & ISOdate", { ) }) -test_that("difftime works", { +test_that("difftime works correctly", { test_df <- tibble( time1 = as.POSIXct( c("2021-02-20", "2021-07-31 0:0:0", "2021-10-30", "2021-01-31 0:0:0") @@ -1136,7 +1126,7 @@ test_that("difftime works", { compare_dplyr_binding( .input %>% mutate( - secs2 = base::difftime(time1, time2, units = "secs") + secs2 = difftime(time1, time2, units = "secs") ) %>% collect(), test_df, @@ -1208,7 +1198,7 @@ test_that("as.difftime()", { compare_dplyr_binding( .input %>% - mutate(hms_difftime = base::as.difftime(hms_string, units = "secs")) %>% + mutate(hms_difftime = as.difftime(hms_string, units = "secs")) %>% collect(), test_df ) @@ -1278,11 +1268,11 @@ test_that("`decimal_date()` and `date_decimal()`", { compare_dplyr_binding( .input %>% mutate( - decimal_date_from_POSIXct = lubridate::decimal_date(b), + decimal_date_from_POSIXct = decimal_date(b), decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")), decimal_date_from_r_date_obj = decimal_date(as.Date("2022-03-25")), decimal_date_from_date = decimal_date(c), - date_from_decimal = lubridate::date_decimal(a), + date_from_decimal = date_decimal(a), date_from_decimal_r_obj = date_decimal(2022.178) ) %>% collect(), @@ -1303,12 +1293,12 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { compare_dplyr_binding( .input %>% mutate( - dminutes = lubridate::dminutes(x), - dhours = lubridate::dhours(x), - ddays = lubridate::ddays(x), - dweeks = lubridate::dweeks(x), - dmonths = lubridate::dmonths(x), - dyears = lubridate::dyears(x) + dminutes = dminutes(x), + dhours = dhours(x), + ddays = ddays(x), + dweeks = dweeks(x), + dmonths = dmonths(x), + dyears = dyears(x) ) %>% collect(), example_d, @@ -1321,7 +1311,7 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { dhours = dhours(x), ddays = ddays(x), new_date_1 = date_to_add + ddays, - new_date_2 = date_to_add + ddays - lubridate::dhours(3), + new_date_2 = date_to_add + ddays - dhours(3), new_duration = dhours - ddays ) %>% collect(), @@ -1366,8 +1356,8 @@ test_that("dseconds, dmilliseconds, dmicroseconds, dnanoseconds, dpicoseconds", compare_dplyr_binding( .input %>% mutate( - dseconds = lubridate::dseconds(x), - dmilliseconds = lubridate::dmilliseconds(x), + dseconds = dseconds(x), + dmilliseconds = dmilliseconds(x), dmicroseconds = dmicroseconds(x), dnanoseconds = dnanoseconds(x), ) %>% @@ -1529,7 +1519,7 @@ test_that("`as.Date()` and `as_date()`", { compare_dplyr_binding( .input %>% mutate( - date_dv1 = base::as.Date(date_var), + date_dv1 = as.Date(date_var), date_pv1 = as.Date(posixct_var), date_pv_tz1 = as.Date(posixct_var, tz = "Pacific/Marquesas"), date_utc1 = as.Date(dt_utc), @@ -1539,7 +1529,7 @@ test_that("`as.Date()` and `as_date()`", { date_int1 = as.Date(integer_var, origin = "1970-01-01"), date_int_origin1 = as.Date(integer_var, origin = "1970-01-03"), date_integerish1 = as.Date(integerish_var, origin = "1970-01-01"), - date_dv2 = lubridate::as_date(date_var), + date_dv2 = as_date(date_var), date_pv2 = as_date(posixct_var), date_pv_tz2 = as_date(posixct_var, tz = "Pacific/Marquesas"), date_utc2 = as_date(dt_utc), @@ -1672,21 +1662,6 @@ test_that("`as_datetime()`", { double_date = c(10.1, 25.2, NA) ) - compare_dplyr_binding( - .input %>% - mutate( - ddate = lubridate::as_datetime(date), - dchar_date_no_tz = lubridate::as_datetime(char_date), - dchar_date_with_tz = lubridate::as_datetime(char_date, tz = "Pacific/Marquesas"), - dint_date = lubridate::as_datetime(int_date, origin = "1970-01-02"), - dintegerish_date = lubridate::as_datetime(integerish_date, origin = "1970-01-02"), - dintegerish_date2 = lubridate::as_datetime(integerish_date, origin = "1970-01-01"), - .keep = "used" - ) %>% - collect(), - test_df - ) - compare_dplyr_binding( .input %>% mutate( @@ -1721,7 +1696,7 @@ test_that("parse_date_time() works with year, month, and date components", { compare_dplyr_binding( .input %>% mutate( - parsed_date_ymd = lubridate::parse_date_time(string_ymd, orders = "ymd"), + parsed_date_ymd = parse_date_time(string_ymd, orders = "ymd"), parsed_date_dmy = parse_date_time(string_dmy, orders = "dmy"), parsed_date_mdy = parse_date_time(string_mdy, orders = "mdy") ) %>% @@ -1809,8 +1784,8 @@ test_that("year, month, day date/time parsers", { compare_dplyr_binding( .input %>% mutate( - ymd_date = lubridate::ymd(ymd_string), - ydm_date = lubridate::ydm(ydm_string), + ymd_date = ymd(ymd_string), + ydm_date = ydm(ydm_string), mdy_date = mdy(mdy_string), myd_date = myd(myd_string), dmy_date = dmy(dmy_string), @@ -1823,8 +1798,8 @@ test_that("year, month, day date/time parsers", { compare_dplyr_binding( .input %>% mutate( - ymd_date = lubridate::ymd(ymd_string, tz = "Pacific/Marquesas"), - ydm_date = lubridate::ydm(ydm_string, tz = "Pacific/Marquesas"), + ymd_date = ymd(ymd_string, tz = "Pacific/Marquesas"), + ydm_date = ydm(ydm_string, tz = "Pacific/Marquesas"), mdy_date = mdy(mdy_string, tz = "Pacific/Marquesas"), myd_date = myd(myd_string, tz = "Pacific/Marquesas"), dmy_date = dmy(dmy_string, tz = "Pacific/Marquesas"), @@ -1932,7 +1907,7 @@ test_that("lubridate's fast_strptime", { .input %>% mutate( date_multi_formats = - lubridate::fast_strptime( + fast_strptime( x, format = c("%Y-%m-%d %H:%M:%S", "%m-%d-%Y %H:%M:%S"), lt = FALSE From f9e3a0985e42c841bd6cb843605d5695769ae6f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 09:06:45 +0100 Subject: [PATCH 057/129] remove `pkg::` prefixes from test-dplyr-funcs-type.R --- r/tests/testthat/test-dplyr-funcs-type.R | 60 +++++++++++------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index fafeaf2c8f2..539dc98f8ba 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -92,10 +92,10 @@ test_that("explicit type conversions with as.*()", { compare_dplyr_binding( .input %>% transmute( - int2chr = base::as.character(int), - int2dbl = base::as.double(int), - int2int = base::as.integer(int), - int2num = base::as.numeric(int), + int2chr = as.character(int), + int2dbl = as.double(int), + int2int = as.integer(int), + int2num = as.numeric(int), dbl2chr = as.character(dbl), dbl2dbl = as.double(dbl), dbl2int = as.integer(dbl), @@ -130,7 +130,7 @@ test_that("explicit type conversions with as.*()", { compare_dplyr_binding( .input %>% transmute( - chr2i64 = bit64::as.integer64(chr), + chr2i64 = as.integer64(chr), dbl2i64 = as.integer64(dbl), i642i64 = as.integer64(i64), rchr2i64 = as.integer64("10000000000"), @@ -143,8 +143,8 @@ test_that("explicit type conversions with as.*()", { compare_dplyr_binding( .input %>% transmute( - chr2lgl = base::as.logical(chr), - dbl2lgl = base::as.logical(dbl), + chr2lgl = as.logical(chr), + dbl2lgl = as.logical(dbl), int2lgl = as.logical(int), rchr2lgl = as.logical("TRUE"), rdbl2lgl = as.logical(0), @@ -208,9 +208,7 @@ test_that("is.finite(), is.infinite(), is.nan()", { .input %>% transmute( is_fin = is.finite(x), - namespaced_is_fin = base::is.finite(x), - is_inf = is.infinite(x), - namespaced_is_inf = base::is.infinite(x) + is_inf = is.infinite(x) ) %>% collect(), df @@ -219,8 +217,7 @@ test_that("is.finite(), is.infinite(), is.nan()", { compare_dplyr_binding( .input %>% transmute( - is_nan = is.nan(x), - namespaced_is_nan = base::is.nan(x) + is_nan = is.nan(x) ) %>% collect(), df @@ -232,8 +229,7 @@ test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", { compare_dplyr_binding( .input %>% transmute( - is_na = is.na(x), - namespaced_is_na = base::is.na(x) + is_na = is.na(x) ) %>% collect(), df @@ -252,7 +248,7 @@ test_that("type checks with is() giving Arrow types", { str = Array$create("a", arrow::string()) ) %>% transmute( - i32_is_i32 = methods::is(i32, int32()), + i32_is_i32 = is(i32, int32()), i32_is_dec = is(i32, decimal(3, 2)), i32_is_dec128 = is(i32, decimal128(3, 2)), i32_is_dec256 = is(i32, decimal256(3, 2)), @@ -440,14 +436,14 @@ test_that("type checks with is.*()", { compare_dplyr_binding( .input %>% transmute( - chr_is_chr = base::is.character(chr), - chr_is_dbl = base::is.double(chr), - chr_is_fct = base::is.factor(chr), - chr_is_int = base::is.integer(chr), - chr_is_i64 = bit64::is.integer64(chr), - chr_is_lst = base::is.list(chr), - chr_is_lgl = base::is.logical(chr), - chr_is_num = base::is.numeric(chr), + chr_is_chr = is.character(chr), + chr_is_dbl = is.double(chr), + chr_is_fct = is.factor(chr), + chr_is_int = is.integer(chr), + chr_is_i64 = is.integer64(chr), + chr_is_lst = is.list(chr), + chr_is_lgl = is.logical(chr), + chr_is_num = is.numeric(chr), dbl_is_chr = is.character(dbl), dbl_is_dbl = is.double(dbl), dbl_is_fct = is.factor(dbl), @@ -519,11 +515,11 @@ test_that("type checks with is_*()", { compare_dplyr_binding( .input %>% transmute( - chr_is_chr = rlang::is_character(chr), - chr_is_dbl = rlang::is_double(chr), - chr_is_int = rlang::is_integer(chr), - chr_is_lst = rlang::is_list(chr), - chr_is_lgl = rlang::is_logical(chr), + chr_is_chr = is_character(chr), + chr_is_dbl = is_double(chr), + chr_is_int = is_integer(chr), + chr_is_lst = is_list(chr), + chr_is_lgl = is_logical(chr), dbl_is_chr = is_character(dbl), dbl_is_dbl = is_double(dbl), dbl_is_int = is_integer(dbl), @@ -612,7 +608,7 @@ test_that("as.factor()/dictionary_encode()", { expect_warning( compare_dplyr_binding( .input %>% - transmute(x = base::as.factor(x)) %>% + transmute(x = as.factor(x)) %>% collect(), df2 ), @@ -739,7 +735,7 @@ test_that("structs/nested data frames/tibbles can be created", { compare_dplyr_binding( .input %>% transmute( - df_col = base::data.frame(regular_col1, fix.empty.names = FALSE) + df_col = data.frame(regular_col1, fix.empty.names = FALSE) ) %>% collect() %>% mutate(df_col = as.data.frame(df_col)), @@ -791,7 +787,7 @@ test_that("nested structs can be created from scalars and existing data frames", compare_dplyr_binding( .input %>% transmute( - df_col = tibble::tibble(b = 3) + df_col = tibble(b = 3) ) %>% collect(), tibble(a = 1:2) @@ -827,7 +823,7 @@ test_that("format date/time", { compare_dplyr_binding( .input %>% - mutate(x = base::format(datetime, format = formats)) %>% + mutate(x = format(datetime, format = formats)) %>% collect(), times ) From de1b18616d06ee59774cc857671ba456b2c51e62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 09:08:41 +0100 Subject: [PATCH 058/129] remove `pkg::` prefix from test-dplyr-mutate.R --- r/tests/testthat/test-dplyr-mutate.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index cb30f831e33..72352885b33 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -159,7 +159,7 @@ test_that("nchar() arguments", { .input %>% select(int, verses) %>% mutate( - line_lengths = base::nchar(verses, type = "bytes"), + line_lengths = nchar(verses, type = "bytes"), longer = line_lengths * 10 ) %>% filter(line_lengths > 15) %>% @@ -514,9 +514,9 @@ test_that("mutate and pmin/pmax", { compare_dplyr_binding( .input %>% mutate( - max_val_1 = base::pmax(val1, val2, val3), + max_val_1 = pmax(val1, val2, val3), max_val_2 = pmax(val1, val2, val3, na.rm = TRUE), - min_val_1 = base::pmin(val1, val2, val3), + min_val_1 = pmin(val1, val2, val3), min_val_2 = pmin(val1, val2, val3, na.rm = TRUE) ) %>% collect(), From 65f1e22ef23acc54512108ba00369f891481bc05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 13:32:25 +0100 Subject: [PATCH 059/129] add namespacing support in `summarise()` + unit test --- r/R/dplyr-summarize.R | 12 +++++++++++- r/R/util.R | 7 +++++++ r/tests/testthat/test-dplyr-summarize.R | 9 +++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 9226c476cb9..d12019ebb4d 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -56,7 +56,7 @@ agg_fun_output_type <- function(fun, input_type, hash) { } register_bindings_aggregate <- function() { - register_binding_agg("sum", function(..., na.rm = FALSE) { + register_binding_agg("base::sum", function(..., na.rm = FALSE) { list( fun = "sum", data = ensure_one_arg(list2(...), "sum"), @@ -159,6 +159,16 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm, min_count = 0L) ) }) + register_binding_agg("::", function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) + + fun_name <- paste0(lhs_name, "::", rhs_name) + + # if we do not have a binding for pkg::fun, then fall back on to the + # regular pkg::fun function + agg_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] + }) } # The following S3 methods are registered on load if dplyr is present diff --git a/r/R/util.R b/r/R/util.R index a51fde0c2d6..970cb249e97 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -93,6 +93,13 @@ all_funs <- function(expr) { expr <- quo_get_expr(expr) } names <- all.names(expr) + # if we have namespace-qualified functions rebuild the function name with the + # pkg:: prefix + for (i in seq_along(names)) { + if (names[i] == "::") { + names[i] <- paste0(names[i+1], names[i], names[i+2]) + } + } names[map_lgl(names, ~ is_function(expr, .))] } diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 5ad7425ee87..e4de0ccc563 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -999,3 +999,12 @@ test_that("summarise() can handle scalars and literal values", { tibble(y = 2L) ) }) + +test_that("summarise() supports namespacing", { + compare_dplyr_binding( + .input %>% + summarize(total = base::sum(int, na.rm = TRUE)) %>% + collect(), + tbl + ) +}) From 07a1948c97ceb46ecb2247dcd5f3fde75fbc4fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 14:08:23 +0100 Subject: [PATCH 060/129] lint + `if()` statement --- r/R/util.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/r/R/util.R b/r/R/util.R index 970cb249e97..affcb3ff8d6 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -95,9 +95,11 @@ all_funs <- function(expr) { names <- all.names(expr) # if we have namespace-qualified functions rebuild the function name with the # pkg:: prefix - for (i in seq_along(names)) { - if (names[i] == "::") { - names[i] <- paste0(names[i+1], names[i], names[i+2]) + if ("::" %in% names) { + for (i in seq_along(names)) { + if (names[i] == "::") { + names[i] <- paste0(names[i + 1], names[i], names[i + 2]) + } } } names[map_lgl(names, ~ is_function(expr, .))] From 607691893433ceb46c92bc273a0599dfef8a3b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 14:49:01 +0100 Subject: [PATCH 061/129] rename `name` to `unqualified_name` and `fun_name` to `qualified_name` --- r/R/dplyr-funcs.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 3465c85835a..7ea552075ce 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -57,23 +57,23 @@ NULL #' registered function existed. #' @keywords internal #' -register_binding <- function(fun_name, fun, registry = nse_funcs) { - if (fun_name == "::") { - name <- "::" +register_binding <- function(qualified_name, fun, registry = nse_funcs) { + if (qualified_name == "::") { + unqualified_name <- "::" } else { - name <- gsub("^.*?::", "", fun_name) + unqualified_name <- gsub("^.*?::", "", qualified_name) } - previous_fun <- if (name %in% names(registry)) registry[[name]] else NULL + previous_fun <- if (unqualified_name %in% names(registry)) registry[[unqualified_name]] else NULL if (is.null(fun) && !is.null(previous_fun)) { - rm(list = c(name, fun_name), envir = registry, inherits = FALSE) - # register both as `pkg::fun` and as `fun` if `fun_name` is prefixed - } else if (grepl("::", fun_name) && fun_name != "::") { - registry[[name]] <- fun - registry[[fun_name]] <- fun + rm(list = c(unqualified_name, qualified_name), envir = registry, inherits = FALSE) + # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed + } else if (grepl("::", qualified_name) && qualified_name != "::") { + registry[[unqualified_name]] <- fun + registry[[qualified_name]] <- fun } else { - registry[[name]] <- fun + registry[[unqualified_name]] <- fun } invisible(previous_fun) From b1bf744a2c6bbf2850afa019e3c292467a94bdb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 14:50:28 +0100 Subject: [PATCH 062/129] change --- r/R/dplyr-funcs.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 7ea552075ce..c20d759a4e7 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -57,7 +57,8 @@ NULL #' registered function existed. #' @keywords internal #' -register_binding <- function(qualified_name, fun, registry = nse_funcs) { +register_binding <- function(fun_name, fun, registry = nse_funcs) { + qualified_name <- fun_name if (qualified_name == "::") { unqualified_name <- "::" } else { From 11d5ab177bf7429c3b5f8c81236e366973aea2b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 15:53:17 +0100 Subject: [PATCH 063/129] deal with nested function calls + unit tests for summarise with nested calls --- r/R/dplyr-summarize.R | 5 +++-- r/tests/testthat/test-dplyr-summarize.R | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index d12019ebb4d..43695c3400a 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -166,8 +166,9 @@ register_bindings_aggregate <- function() { fun_name <- paste0(lhs_name, "::", rhs_name) # if we do not have a binding for pkg::fun, then fall back on to the - # regular pkg::fun function - agg_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] + # nse_funcs (useful when we have a regular function inside an aggregating one) + # and then regular pkg::fun function + agg_funcs[[fun_name]] %||% nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] }) } diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index e4de0ccc563..665ace93c5d 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -1007,4 +1007,20 @@ test_that("summarise() supports namespacing", { collect(), tbl ) + compare_dplyr_binding( + .input %>% + summarise( + log_total = sum(base::log(int) + 1, na.rm = TRUE) + ) %>% + collect(), + tbl + ) + compare_dplyr_binding( + .input %>% + summarise( + log_total = base::round(base::sum(base::log(int) + dbl, na.rm = TRUE)) + ) %>% + collect(), + tbl + ) }) From 7e2f1378baf9d2fbcad5198caa48433ff504f1ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 18:36:02 +0100 Subject: [PATCH 064/129] `all_funs()` unit tests --- r/tests/testthat/test-util.R | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/r/tests/testthat/test-util.R b/r/tests/testthat/test-util.R index 20fdedf3e12..59d8cd8b824 100644 --- a/r/tests/testthat/test-util.R +++ b/r/tests/testthat/test-util.R @@ -39,3 +39,34 @@ test_that("as_writable_table() errors for invalid input", { # make sure other errors make it through expect_snapshot_error(wrapper_fun(data.frame(x = I(list(1, "a"))))) }) + +test_that("all_funs() identifies namespace-qualified and unqualified functions", { + expect_equal( + arrow:::all_funs(rlang::quo(pkg::fun())), + "pkg::fun" + ) + expect_equal( + arrow:::all_funs(rlang::quo(pkg::fun(other_pkg::obj))), + "pkg::fun" + ) + expect_equal( + arrow:::all_funs(rlang::quo(other_fun(pkg::fun()))), + c("other_fun","pkg::fun") + ) + expect_equal( + arrow:::all_funs(rlang::quo(other_pkg::other_fun(pkg::fun()))), + c("other_pkg::other_fun", "pkg::fun") + ) + expect_equal( + arrow:::all_funs(rlang::quo(other_pkg::other_fun(pkg::fun(sum(base::log()))))), + c("other_pkg::other_fun", "pkg::fun", "sum", "base::log") + ) + expect_equal( + arrow:::all_funs(rlang::quo(other_fun(fun(sum(log()))))), + c("other_fun", "fun", "sum", "log") + ) + expect_equal( + arrow:::all_funs(rlang::quo(other_fun(fun(sum(base::log()))))), + c("other_fun", "fun", "sum", "base::log") + ) +}) From 52ec424309a1fb41b912a8ba05145ce3df4e69b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 20:06:31 +0100 Subject: [PATCH 065/129] lint --- r/tests/testthat/test-util.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-util.R b/r/tests/testthat/test-util.R index 59d8cd8b824..13710863268 100644 --- a/r/tests/testthat/test-util.R +++ b/r/tests/testthat/test-util.R @@ -51,7 +51,7 @@ test_that("all_funs() identifies namespace-qualified and unqualified functions", ) expect_equal( arrow:::all_funs(rlang::quo(other_fun(pkg::fun()))), - c("other_fun","pkg::fun") + c("other_fun", "pkg::fun") ) expect_equal( arrow:::all_funs(rlang::quo(other_pkg::other_fun(pkg::fun()))), From 4ee6884080402e705956485870a21170b260c0c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 20:20:19 +0100 Subject: [PATCH 066/129] cleaner test-dplyr-funcs-math.R --- r/tests/testthat/test-dplyr-funcs-math.R | 120 ++++++++++++++--------- 1 file changed, 73 insertions(+), 47 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 9935fec619a..f4a3fb7358e 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -25,10 +25,15 @@ test_that("abs()", { compare_dplyr_binding( .input %>% - transmute( - abs = abs(x), - abs_namespace = base::abs(x) - ) %>% + transmute(abs = abs(x)) %>% + collect(), + df + ) + + # with namespacing + compare_dplyr_binding( + .input %>% + transmute(abs = base::abs(x)) %>% collect(), df ) @@ -39,10 +44,15 @@ test_that("sign()", { compare_dplyr_binding( .input %>% - transmute( - sign = sign(x), - sign_namespace = base::sign(x) - ) %>% + transmute(sign = sign(x)) %>% + collect(), + df + ) + + # with namespacing + compare_dplyr_binding( + .input %>% + transmute(sign = base::sign(x)) %>% collect(), df ) @@ -55,13 +65,22 @@ test_that("ceiling(), floor(), trunc(), round()", { .input %>% mutate( c = ceiling(x), - c_namespace = base::ceiling(x), f = floor(x), - f_namespace = base::floor(x), t = trunc(x), - t_namespace = base::trunc(x), - r = round(x), - r_namespace = base::round(x) + r = round(x) + ) %>% + collect(), + df + ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate( + c = base::ceiling(x), + f = base::floor(x), + t = base::trunc(x), + r = base::round(x) ) %>% collect(), df @@ -151,7 +170,7 @@ test_that("log functions", { compare_dplyr_binding( .input %>% - mutate(y = base::log(x)) %>% + mutate(y = log(x)) %>% collect(), df ) @@ -206,7 +225,7 @@ test_that("log functions", { filter(x != 1) %>% mutate( y = log(x, base = x), - z = base::log(2, base = x) + z = log(2, base = x) ) %>% collect(), df @@ -233,39 +252,47 @@ test_that("log functions", { compare_dplyr_binding( .input %>% - mutate( - y = logb(x), - z = base::logb(x) - ) %>% + mutate(y = logb(x)) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate( - y = log1p(x), - z = base::log1p(x) - ) %>% + mutate(y = log1p(x)) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate( - y = log2(x), - z = base::log2(x) - ) %>% + mutate(y = log2(x)) %>% + collect(), + df + ) + + compare_dplyr_binding( + .input %>% + mutate(y = log10(x)) %>% collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(a = base::log(x, base = y)) %>% + collect(), + tibble(x = 10, y = 1) + ) + compare_dplyr_binding( .input %>% mutate( - y = log10(x), - z = base::log10(x) + a = base::logb(x), + b = base::log1p(x), + c = base::log2(x), + d = base::log10(x) ) %>% collect(), df @@ -277,49 +304,48 @@ test_that("trig functions", { compare_dplyr_binding( .input %>% - mutate( - y = sin(x), - y_namespace = base::sin(x) - ) %>% + mutate(y = sin(x)) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate( - y = cos(x), - y_namespace = base::cos(x) - ) %>% + mutate(y = cos(x)) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate( - y = tan(x), - y_namespace = base::tan(x) - ) %>% + mutate(y = tan(x)) %>% collect(), df ) compare_dplyr_binding( .input %>% - mutate( - y = asin(x), - y_namespace = base::asin(x) - ) %>% + mutate(y = asin(x)) %>% + collect(), + df + ) + + compare_dplyr_binding( + .input %>% + mutate(y = acos(x)) %>% collect(), df ) + # with namespacing compare_dplyr_binding( .input %>% mutate( - y = acos(x), - y_namespace = base::acos(x) + a = base::sin(x), + b = base::cos(x), + c = base::tan(x), + d = base::asin(x), + e = base::acos(x) ) %>% collect(), df From c7e82d04348067b4fdbcee944da878edb608bbc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 20:29:17 +0100 Subject: [PATCH 067/129] clean-up `Math.ArrowDatum` --- r/R/arrow-datum.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index ff3ad1a00fe..33c67a52854 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -114,9 +114,9 @@ Ops.ArrowDatum <- function(e1, e2) { Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { switch(.Generic, abs = eval_array_expression("abs_checked", x), - sign = eval_array_expression("sign", x), - floor = eval_array_expression("floor", x), ceiling = eval_array_expression("ceil", x), + sign = , + floor = , trunc = , acos = , asin = , From ab4439120430472791cd69dff8fa01c78c144036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 20:41:32 +0100 Subject: [PATCH 068/129] style dplyr-funcs-datetime.R --- r/R/dplyr-funcs-datetime.R | 83 ++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index bcffe7cf759..c948e62109f 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -29,9 +29,9 @@ register_bindings_datetime <- function() { register_bindings_datetime_utility <- function() { register_binding("base::strptime", function(x, - format = "%Y-%m-%d %H:%M:%S", - tz = "", - unit = "ms") { + format = "%Y-%m-%d %H:%M:%S", + tz = "", + unit = "ms") { # Arrow uses unit for time parsing, strptime() does not. # Arrow has no default option for strptime (format, unit), # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms", @@ -260,18 +260,20 @@ register_bindings_datetime_conversion <- function() { build_expr("strptime", x, options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L)) }) - register_binding("lubridate::make_date", function(year = 1970L, month = 1L, day = 1L) { + register_binding("lubridate::make_date", function(year = 1970L, + month = 1L, + day = 1L) { x <- call_binding("make_datetime", year, month, day) build_expr("cast", x, options = cast_options(to_type = date32())) }) register_binding("base::ISOdatetime", function(year, - month, - day, - hour, - min, - sec, - tz = "UTC") { + month, + day, + hour, + min, + sec, + tz = "UTC") { # NAs for seconds aren't propagated (but treated as 0) in the base version sec <- call_binding( @@ -285,20 +287,20 @@ register_bindings_datetime_conversion <- function() { }) register_binding("base::ISOdate", function(year, - month, - day, - hour = 12, - min = 0, - sec = 0, - tz = "UTC") { + month, + day, + hour = 12, + min = 0, + sec = 0, + tz = "UTC") { call_binding("make_datetime", year, month, day, hour, min, sec, tz) }) register_binding("base::as.Date", function(x, - format = NULL, - tryFormats = "%Y-%m-%d", - origin = "1970-01-01", - tz = "UTC") { + format = NULL, + tryFormats = "%Y-%m-%d", + origin = "1970-01-01", + tz = "UTC") { if (is.null(format) && length(tryFormats) > 1) { abort( paste( @@ -347,9 +349,9 @@ register_bindings_datetime_conversion <- function() { }) register_binding("lubridate::as_datetime", function(x, - origin = "1970-01-01", - tz = "UTC", - format = NULL) { + origin = "1970-01-01", + tz = "UTC", + format = NULL) { if (call_binding("is.numeric", x)) { delta <- call_binding("difftime", origin, "1970-01-01") delta <- build_expr("cast", delta, options = cast_options(to_type = int64())) @@ -382,7 +384,8 @@ register_bindings_datetime_conversion <- function() { y + sofar$cast(int64()) / total }) - register_binding("lubridate::date_decimal", function(decimal, tz = "UTC") { + register_binding("lubridate::date_decimal", function(decimal, + tz = "UTC") { y <- build_expr("floor", decimal) start <- call_binding("make_datetime", year = y, tz = tz) @@ -402,9 +405,9 @@ register_bindings_datetime_conversion <- function() { register_bindings_duration <- function() { register_binding("base::difftime", function(time1, - time2, - tz, - units = "secs") { + time2, + tz, + units = "secs") { if (units != "secs") { abort("`difftime()` with units other than `secs` not supported in Arrow") } @@ -443,8 +446,8 @@ register_bindings_duration <- function() { build_expr("cast", subtract_output, options = cast_options(to_type = duration("s"))) }) register_binding("base::as.difftime", function(x, - format = "%X", - units = "secs") { + format = "%X", + units = "secs") { # windows doesn't seem to like "%X" if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") { format <- "%H:%M:%S" @@ -478,8 +481,8 @@ register_bindings_duration <- function() { register_bindings_duration_constructor <- function() { register_binding("lubridate::make_difftime", function(num = NULL, - units = "secs", - ...) { + units = "secs", + ...) { if (units != "secs") { abort("`make_difftime()` with units other than 'secs' not supported in Arrow") } @@ -529,11 +532,11 @@ register_bindings_duration_helpers <- function() { register_bindings_datetime_parsers <- function() { register_binding("lubridate::parse_date_time", function(x, - orders, - tz = "UTC", - truncated = 0, - quiet = TRUE, - exact = FALSE) { + orders, + tz = "UTC", + truncated = 0, + quiet = TRUE, + exact = FALSE) { if (!quiet) { arrow_not_supported("`quiet = FALSE`") } @@ -592,10 +595,10 @@ register_bindings_datetime_parsers <- function() { } register_binding("lubridate::fast_strptime", function(x, - format, - tz = "UTC", - lt = FALSE, - cutoff_2000 = 68L) { + format, + tz = "UTC", + lt = FALSE, + cutoff_2000 = 68L) { # `lt` controls the output `lt = TRUE` returns a POSIXlt (which doesn't play # well with mutate, for example) if (lt) { From 33183de806e653e4b5467a488d7caa8dfb4ddbbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 21:03:17 +0100 Subject: [PATCH 069/129] minor improvements --- r/R/util.R | 4 ++-- r/tests/testthat/test-dplyr-filter.R | 2 +- r/tests/testthat/test-dplyr-group-by.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/r/R/util.R b/r/R/util.R index affcb3ff8d6..55ff29db73a 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -93,8 +93,8 @@ all_funs <- function(expr) { expr <- quo_get_expr(expr) } names <- all.names(expr) - # if we have namespace-qualified functions rebuild the function name with the - # pkg:: prefix + # if we have namespace-qualified functions, we rebuild the function name with + # the `pkg::` prefix if ("::" %in% names) { for (i in seq_along(names)) { if (names[i] == "::") { diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 0315989b2d5..aed46d801ce 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -401,7 +401,7 @@ test_that("filter() with .data pronoun", { ) }) -test_that("filter() with namespacing", { +test_that("filter() with namespaced functions", { compare_dplyr_binding( .input %>% filter(dplyr::between(dbl, 1, 2)) %>% diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index ba3d472543c..5698d4c6ced 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -157,7 +157,7 @@ test_that("group_by with .drop", { ) }) -test_that("group_by() and namespacing", { +test_that("group_by() and namespaced functions", { compare_dplyr_binding( .input %>% group_by(int > base::sqrt(25)) %>% From 8edc4c8af39dbbe165e8dd03f5cb0ab29cd09e97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 21:19:12 +0100 Subject: [PATCH 070/129] `mutate` and `transmute` namespacing tests --- r/tests/testthat/test-dplyr-mutate.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 72352885b33..0718ed9d0de 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -533,3 +533,24 @@ test_that("mutate and pmin/pmax", { df ) }) + +test_that("mutate() and transmute() with namespaced functions", { + compare_dplyr_binding( + .input %>% + mutate( + a = base::round(dbl) + base::log(int), + b = stringr::str_detect(verses, "ur") + ) %>% + collect(), + tbl + ) + compare_dplyr_binding( + .input %>% + transmute( + a = base::round(dbl) + base::log(int), + b = stringr::str_detect(verses, "ur") + ) %>% + collect(), + tbl + ) +}) From 067c73981444a95b6350187790dd2229f436c596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 21:19:31 +0100 Subject: [PATCH 071/129] register all agg bindings with namespace --- r/R/dplyr-summarize.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 43695c3400a..99f6acb7a92 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -63,42 +63,42 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("any", function(..., na.rm = FALSE) { + register_binding_agg("base::any", function(..., na.rm = FALSE) { list( fun = "any", data = ensure_one_arg(list2(...), "any"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("all", function(..., na.rm = FALSE) { + register_binding_agg("base::all", function(..., na.rm = FALSE) { list( fun = "all", data = ensure_one_arg(list2(...), "all"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("mean", function(x, na.rm = FALSE) { + register_binding_agg("base::mean", function(x, na.rm = FALSE) { list( fun = "mean", data = x, options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("sd", function(x, na.rm = FALSE, ddof = 1) { + register_binding_agg("stats::sd", function(x, na.rm = FALSE, ddof = 1) { list( fun = "stddev", data = x, options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) }) - register_binding_agg("var", function(x, na.rm = FALSE, ddof = 1) { + register_binding_agg("stats::var", function(x, na.rm = FALSE, ddof = 1) { list( fun = "variance", data = x, options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) }) - register_binding_agg("quantile", function(x, probs, na.rm = FALSE) { + register_binding_agg("stats::quantile", function(x, probs, na.rm = FALSE) { if (length(probs) != 1) { arrow_not_supported("quantile() with length(probs) != 1") } @@ -116,7 +116,7 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm, q = probs) ) }) - register_binding_agg("median", function(x, na.rm = FALSE) { + register_binding_agg("stats::median", function(x, na.rm = FALSE) { # TODO: Bind to the Arrow function that returns an exact median and remove # this warning (ARROW-14021) warn( @@ -131,28 +131,28 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm) ) }) - register_binding_agg("n_distinct", function(..., na.rm = FALSE) { + register_binding_agg("dplyr::n_distinct", function(..., na.rm = FALSE) { list( fun = "count_distinct", data = ensure_one_arg(list2(...), "n_distinct"), options = list(na.rm = na.rm) ) }) - register_binding_agg("n", function() { + register_binding_agg("dplyr::n", function() { list( fun = "sum", data = Expression$scalar(1L), options = list() ) }) - register_binding_agg("min", function(..., na.rm = FALSE) { + register_binding_agg("base::min", function(..., na.rm = FALSE) { list( fun = "min", data = ensure_one_arg(list2(...), "min"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("max", function(..., na.rm = FALSE) { + register_binding_agg("base::max", function(..., na.rm = FALSE) { list( fun = "max", data = ensure_one_arg(list2(...), "max"), From 48187c33955bf207547f6dd6c2f02e2a99cb7080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 21:25:12 +0100 Subject: [PATCH 072/129] merge conflict --- r/R/dplyr-funcs-datetime.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index c948e62109f..c611dd5c74d 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -297,10 +297,10 @@ register_bindings_datetime_conversion <- function() { }) register_binding("base::as.Date", function(x, - format = NULL, - tryFormats = "%Y-%m-%d", - origin = "1970-01-01", - tz = "UTC") { + format = NULL, + tryFormats = "%Y-%m-%d", + origin = "1970-01-01", + tz = "UTC") { if (is.null(format) && length(tryFormats) > 1) { abort( paste( From 09e76af022b5584e24e06c1527fa74c252f7b8bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 21:27:09 +0100 Subject: [PATCH 073/129] unit tests for `quantile()` with namespacing --- r/tests/testthat/test-dplyr-summarize.R | 53 +++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 665ace93c5d..2cae7d15823 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -459,6 +459,59 @@ test_that("quantile()", { ) }) +test_that("quantile() with namespacing", { + suppressWarnings( + expect_warning( + expect_equal( + tbl %>% + group_by(some_grouping) %>% + summarize( + q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE), + q_int = as.double( + quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE) + ) + ) %>% + arrange(some_grouping), + Table$create(tbl) %>% + group_by(some_grouping) %>% + summarize( + q_dbl = stats::quantile(dbl, probs = 0.5, na.rm = TRUE), + q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE)) + ) %>% + arrange(some_grouping) %>% + collect() + ), + "quantile() currently returns an approximate quantile in Arrow", + fixed = TRUE + ), + classes = "arrow.quantile.approximate" + ) + + # without groups + suppressWarnings( + expect_warning( + expect_equal( + tbl %>% + summarize( + q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE), + q_int = as.double( + quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE) + ) + ), + Table$create(tbl) %>% + summarize( + q_dbl = stats::quantile(dbl, probs = 0.5, na.rm = TRUE), + q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE)) + ) %>% + collect() + ), + "quantile() currently returns an approximate quantile in Arrow", + fixed = TRUE + ), + classes = "arrow.quantile.approximate" + ) +}) + test_that("summarize() with min() and max()", { compare_dplyr_binding( .input %>% From 66d2dddd8e8bf51b3c4ed332c899f67571f88ec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 8 Jul 2022 21:29:55 +0100 Subject: [PATCH 074/129] style --- r/R/dplyr-funcs-datetime.R | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index c611dd5c74d..080ac082e17 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -76,9 +76,9 @@ register_bindings_datetime_utility <- function() { }) register_binding("base::strftime", function(x, - format = "", - tz = "", - usetz = FALSE) { + format = "", + tz = "", + usetz = FALSE) { if (usetz) { format <- paste(format, "%Z") } @@ -175,9 +175,9 @@ register_bindings_datetime_components <- function() { }) register_binding("lubridate::month", function(x, - label = FALSE, - abbr = TRUE, - locale = Sys.getlocale("LC_TIME")) { + label = FALSE, + abbr = TRUE, + locale = Sys.getlocale("LC_TIME")) { if (call_binding("is.integer", x)) { x <- call_binding( "if_else", @@ -243,12 +243,12 @@ register_bindings_datetime_components <- function() { register_bindings_datetime_conversion <- function() { register_binding("lubridate::make_datetime", function(year = 1970L, - month = 1L, - day = 1L, - hour = 0L, - min = 0L, - sec = 0, - tz = "UTC") { + month = 1L, + day = 1L, + hour = 0L, + min = 0L, + sec = 0, + tz = "UTC") { # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820). # Stop if tz other than 'UTC' is provided. @@ -329,9 +329,9 @@ register_bindings_datetime_conversion <- function() { }) register_binding("lubridate::as_date", function(x, - format = NULL, - origin = "1970-01-01", - tz = NULL) { + format = NULL, + origin = "1970-01-01", + tz = NULL) { # base::as.Date() and lubridate::as_date() differ in the way they use the # `tz` argument. Both cast to the desired timezone, if present. The # difference appears when the `tz` argument is not set: `as.Date()` uses the @@ -591,7 +591,8 @@ register_bindings_datetime_parsers <- function() { for (ymd_order in ymd_parser_vec) { register_binding( paste0("lubridate::", ymd_order), - ymd_parser_map_factory(ymd_order)) + ymd_parser_map_factory(ymd_order) + ) } register_binding("lubridate::fast_strptime", function(x, From d34af62a1d71bb1847f273961af425a37e8742ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Sun, 10 Jul 2022 17:17:02 +0100 Subject: [PATCH 075/129] bump ci From 27e43e2d6d3bbb286f5799f058b685633660b935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 11 Jul 2022 16:20:42 +0100 Subject: [PATCH 076/129] solve `quantile` issue + test for `median()` --- r/R/dplyr-summarize.R | 4 ++-- r/tests/testthat/test-dplyr-summarize.R | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 99f6acb7a92..68059be5fe0 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -359,7 +359,7 @@ summarize_eval <- function(name, quosure, ctx, hash) { # the list output from the Arrow hash_tdigest kernel to flatten it into a # column of type float64. We do that by modifying the unevaluated expression # to replace quantile(...) with arrow_list_element(quantile(...), 0L) - if (hash && "quantile" %in% funs_in_expr) { + if (hash && ("quantile" %in% funs_in_expr || "stats::quantile" %in% funs_in_expr)) { expr <- wrap_hash_quantile(expr) funs_in_expr <- all_funs(expr) } @@ -475,7 +475,7 @@ wrap_hash_quantile <- function(expr) { if (length(expr) == 1) { return(expr) } else { - if (is.call(expr) && expr[[1]] == quote(quantile)) { + if (is.call(expr) && (expr[[1]] == quote(quantile) || expr[[1]] == quote(stats::quantile))) { return(str2lang(paste0("arrow_list_element(", deparse1(expr), ", 0L)"))) } else { return(as.call(lapply(expr, wrap_hash_quantile))) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 2cae7d15823..543ea470a1b 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -354,6 +354,22 @@ test_that("median()", { ) }) +test_that("median() with namespacing", { + suppressWarnings( + compare_dplyr_binding( + .input %>% + summarize( + med_dbl_narmt = stats::median(dbl, na.rm = TRUE), + med_int_narmt = base::as.double(stats::median(int, TRUE)) + ) %>% + collect(), + tbl, + warning = "median\\(\\) currently returns an approximate median in Arrow" + ), + classes = "arrow.median.approximate" + ) +}) + test_that("quantile()", { # The default method for stats::quantile() throws an error when na.rm = FALSE # and the input contains NA or NaN, whereas the Arrow tdigest kernels return From eaf1852ec46a8f6b769830eb9e6e37f5d336284c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 11 Jul 2022 18:22:32 +0100 Subject: [PATCH 077/129] split test and skip when RE2 not available --- r/tests/testthat/test-dplyr-mutate.R | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 0718ed9d0de..4d325b4e068 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -538,7 +538,25 @@ test_that("mutate() and transmute() with namespaced functions", { compare_dplyr_binding( .input %>% mutate( - a = base::round(dbl) + base::log(int), + a = base::round(dbl) + base::log(int) + ) %>% + collect(), + tbl + ) + compare_dplyr_binding( + .input %>% + transmute( + a = base::round(dbl) + base::log(int) + ) %>% + collect(), + tbl + ) + + # stringr::str_detect binding depends on RE2 + skip_if_not_available("re2") + compare_dplyr_binding( + .input %>% + mutate( b = stringr::str_detect(verses, "ur") ) %>% collect(), @@ -547,7 +565,6 @@ test_that("mutate() and transmute() with namespaced functions", { compare_dplyr_binding( .input %>% transmute( - a = base::round(dbl) + base::log(int), b = stringr::str_detect(verses, "ur") ) %>% collect(), From a96879c4d931efe515a0ba37a0db7540615fea34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 13:46:40 +0100 Subject: [PATCH 078/129] minor change to test-dplyr-mutate.R --- r/tests/testthat/test-dplyr-mutate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 4d325b4e068..a961f3b6c40 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -552,7 +552,7 @@ test_that("mutate() and transmute() with namespaced functions", { tbl ) - # stringr::str_detect binding depends on RE2 + # str_detect binding depends on RE2 skip_if_not_available("re2") compare_dplyr_binding( .input %>% From 03152d88217d853d83dd43dfaa17d0986aed3053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 13:47:11 +0100 Subject: [PATCH 079/129] tests for unary and binary string bindings --- r/tests/testthat/test-dplyr-funcs-string.R | 78 ++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index c4d54d325f4..b26a62c7039 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -23,6 +23,14 @@ library(lubridate) library(stringr) library(stringi) +tbl <- example_data +# Add some better string data +tbl$verses <- verses[[1]] +# c(" a ", " b ", " c ", ...) increasing padding +# nchar = 3 5 7 9 11 13 15 17 19 21 +tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") +tbl$some_grouping <- rep(c(1, 2), 5) + test_that("paste, paste0, and str_c", { df <- tibble( v = c("A", "B", "C"), @@ -1242,3 +1250,73 @@ test_that("str_count", { df ) }) + +test_that("stringi::stri_reverse", { + compare_dplyr_binding( + .input %>% + mutate( + verse_length = stri_reverse(verses) + ) %>% + collect(), + tbl + ) +}) + +test_that("base::tolower and base::toupper", { + compare_dplyr_binding( + .input %>% + mutate( + verse_to_upper = toupper(verses), + verse_to_lower = tolower(verses) + ) %>% + collect(), + tbl + ) +}) + +test_that("stringr::str_dup and base::strrep", { + compare_dplyr_binding( + .input %>% + mutate( + duped_verses_stringr = str_dup(verses, times = 2L), + duped_verses_base = strrep(verses, times = 3L) + ) %>% + collect(), + tbl + ) +}) + +test_that("namespaced unary and binary string functions", { + # str_length and stringi::stri_reverse + compare_dplyr_binding( + .input %>% + mutate( + verse_length = stringr::str_length(verses), + reverses_verse = stringi::stri_reverse(verses) + ) %>% + collect(), + tbl + ) + + # base::tolower and base::toupper + compare_dplyr_binding( + .input %>% + mutate( + verse_to_upper = base::toupper(verses), + verse_to_lower = base::tolower(verses) + ) %>% + collect(), + tbl + ) + + # stringr::str_dup and base::strrep + compare_dplyr_binding( + .input %>% + mutate( + duped_verses_stringr = stringr::str_dup(verses, times = 2L), + duped_verses_base = base::strrep(verses, times = 3L) + ) %>% + collect(), + tbl + ) +}) From 6125527bd727058f236f627faf0f6837f1971e87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 13:55:54 +0100 Subject: [PATCH 080/129] removed duplicate tests --- r/tests/testthat/test-dplyr-funcs-string.R | 37 ++++++++++------------ 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index b26a62c7039..0f0ae57882e 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -1274,18 +1274,6 @@ test_that("base::tolower and base::toupper", { ) }) -test_that("stringr::str_dup and base::strrep", { - compare_dplyr_binding( - .input %>% - mutate( - duped_verses_stringr = str_dup(verses, times = 2L), - duped_verses_base = strrep(verses, times = 3L) - ) %>% - collect(), - tbl - ) -}) - test_that("namespaced unary and binary string functions", { # str_length and stringi::stri_reverse compare_dplyr_binding( @@ -1310,13 +1298,20 @@ test_that("namespaced unary and binary string functions", { ) # stringr::str_dup and base::strrep - compare_dplyr_binding( - .input %>% - mutate( - duped_verses_stringr = stringr::str_dup(verses, times = 2L), - duped_verses_base = base::strrep(verses, times = 3L) - ) %>% - collect(), - tbl - ) + df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!")) + for (times in 0:8) { + compare_dplyr_binding( + .input %>% + mutate(x = base::strrep(x, times)) %>% + collect(), + df + ) + + compare_dplyr_binding( + .input %>% + mutate(x = stringr::str_dup(x, times)) %>% + collect(), + df + ) + } }) From b5a8ecf365742d6412266a84faed2820ba2a62d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 14:15:57 +0100 Subject: [PATCH 081/129] added tests for namespaced versions of the datetime bindings --- r/tests/testthat/test-dplyr-funcs-datetime.R | 104 +++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index d522786a1b2..f961857f21d 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -402,6 +402,14 @@ test_that("extract epiyear from timestamp", { collect(), test_df ) + + # namespaced epiyear + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::epiyear(datetime)) %>% + collect(), + test_df + ) }) test_that("extract quarter from timestamp", { @@ -446,6 +454,14 @@ test_that("extract isoweek from timestamp", { collect(), test_df ) + + # namespaced isoweek + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::isoweek(datetime)) %>% + collect(), + test_df + ) }) test_that("extract epiweek from timestamp", { @@ -530,6 +546,14 @@ test_that("extract yday from timestamp", { collect(), test_df ) + + # namespaced yday + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::yday(datetime)) %>% + collect(), + test_df + ) }) test_that("extract hour from timestamp", { @@ -539,6 +563,14 @@ test_that("extract hour from timestamp", { collect(), test_df ) + + # namespaced hour + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::hour(datetime)) %>% + collect(), + test_df + ) }) test_that("extract minute from timestamp", { @@ -548,6 +580,14 @@ test_that("extract minute from timestamp", { collect(), test_df ) + + # namespaced minute + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::minute(datetime)) %>% + collect(), + test_df + ) }) test_that("extract second from timestamp", { @@ -570,6 +610,14 @@ test_that("extract year from date", { collect(), test_df ) + + # namespaced year + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::year(date)) %>% + collect(), + test_df + ) }) test_that("extract isoyear from date", { @@ -579,6 +627,14 @@ test_that("extract isoyear from date", { collect(), test_df ) + + # namespaced isoyear + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::isoyear(date)) %>% + collect(), + test_df + ) }) test_that("extract epiyear from date", { @@ -597,6 +653,14 @@ test_that("extract quarter from date", { collect(), test_df ) + + # namespaced quarter + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::quarter(date)) %>% + collect(), + test_df + ) }) test_that("extract isoweek from date", { @@ -615,6 +679,14 @@ test_that("extract epiweek from date", { collect(), test_df ) + + # namespaced epiweek + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::epiweek(date)) %>% + collect(), + test_df + ) }) test_that("extract week from date", { @@ -659,6 +731,14 @@ test_that("extract day from date", { collect(), test_df ) + + # namespaced day + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::day(date)) %>% + collect(), + test_df + ) }) test_that("extract wday from date", { @@ -707,6 +787,14 @@ test_that("extract mday from date", { collect(), test_df ) + + # namespaced mday + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::mday(date)) %>% + collect(), + test_df + ) }) test_that("extract yday from date", { @@ -746,6 +834,14 @@ test_that("leap_year mirror lubridate", { )) ) ) + + # namespaced leap_year + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::leap_year(date)) %>% + collect(), + test_df + ) }) test_that("am/pm mirror lubridate", { @@ -853,6 +949,14 @@ test_that("dst extracts daylight savings time correctly", { collect(), test_df ) + + # namespaced dst + compare_dplyr_binding( + .input %>% + mutate(dst = lubridate::dst(dates)) %>% + collect(), + test_df + ) }) test_that("month() supports integer input", { From 0ef791b9d2b7aac3fa71c415c29b71440981fabd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 14:16:14 +0100 Subject: [PATCH 082/129] test for `base::as.factor()` --- r/tests/testthat/test-dplyr-funcs-type.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 539dc98f8ba..b0ced52901a 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -605,6 +605,14 @@ test_that("as.factor()/dictionary_encode()", { df1 ) + # namespaced as.factor + compare_dplyr_binding( + .input %>% + transmute(x = base::as.factor(x)) %>% + collect(), + df1 + ) + expect_warning( compare_dplyr_binding( .input %>% From 73b0390e1866443a690b50235ad6c4731c10a584 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 15:00:56 +0100 Subject: [PATCH 083/129] unit tests for conditional bindings --- .../testthat/test-dplyr-funcs-conditional.R | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index 4f5fdb0af4e..54d47b0bfab 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -44,6 +44,16 @@ test_that("if_else and ifelse", { tbl ) + # namespaced if_else + compare_dplyr_binding( + .input %>% + mutate( + y = dplyr::if_else(int > 5, 1, 0) + ) %>% + collect(), + tbl + ) + expect_error( Table$create(tbl) %>% mutate( @@ -71,6 +81,16 @@ test_that("if_else and ifelse", { tbl ) + # namespaced ifelse + compare_dplyr_binding( + .input %>% + mutate( + y = base::ifelse(int > 5, 1, 0) + ) %>% + collect(), + tbl + ) + compare_dplyr_binding( .input %>% mutate( @@ -192,6 +212,25 @@ test_that("case_when()", { tbl ) + # namespaced case_when + compare_dplyr_binding( + .input %>% + transmute(cw = dplyr::case_when(chr %in% letters[1:3] ~ 1L) + 41L) %>% + collect(), + tbl + ) + + compare_dplyr_binding( + .input %>% + filter(dplyr::case_when( + dbl + int - 1.1 == dbl2 ~ TRUE, + NA ~ NA, + TRUE ~ FALSE + ) & !is.na(dbl2)) %>% + collect(), + tbl + ) + # dplyr::case_when() errors if values on right side of formulas do not have # exactly the same type, but the Arrow case_when kernel allows compatible types expect_equal( @@ -303,6 +342,20 @@ test_that("coalesce()", { df ) + # namespaced coalesce + compare_dplyr_binding( + .input %>% + mutate( + cw = dplyr::coalesce(w), + cz = dplyr::coalesce(z), + cwx = dplyr::coalesce(w, x), + cwxy = dplyr::coalesce(w, x, y), + cwxyz = dplyr::coalesce(w, x, y, z) + ) %>% + collect(), + df + ) + # factor df_fct <- df %>% transmute(across(everything(), ~ factor(.x, levels = c("a", "b", "c")))) From ca7ae5792f71a68d0c58059b7584f5b4e1edb5a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 15:21:19 +0100 Subject: [PATCH 084/129] unit tests for namespaced date/time bindings - part 1 --- r/tests/testthat/test-dplyr-funcs-datetime.R | 153 +++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index f961857f21d..a162b474386 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -70,6 +70,17 @@ test_that("strptime", { collect(), t_stamp_with_pm_tz ) + + # namespaced strptime + expect_equal( + t_string %>% + record_batch() %>% + mutate( + x = base::strptime(x, format = "%Y-%m-%d %H:%M:%S") + ) %>% + collect(), + t_stamp_with_pm_tz + ) }) # adding a timezone to a timezone-naive timestamp works @@ -201,6 +212,14 @@ test_that("strftime", { times ) + # namespaced strftime + compare_dplyr_binding( + .input %>% + mutate(x = base::strftime(datetime, format = formats)) %>% + collect(), + times + ) + compare_dplyr_binding( .input %>% mutate(x = strftime(date, format = formats_date)) %>% @@ -285,6 +304,14 @@ test_that("format_ISO8601", { times ) + # namespaced format_ISO8601 + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% + collect(), + times + ) + if (getRversion() < "3.5") { # before 3.5, times$x will have no timezone attribute, so Arrow faithfully # errors that there is no timezone to format: @@ -345,6 +372,17 @@ test_that("is.* functions from lubridate", { test_df ) + # namespaced is.POSIXct + compare_dplyr_binding( + .input %>% + mutate( + x = lubridate::is.POSIXct(datetime), + y = lubridate::is.POSIXct(integer) + ) %>% + collect(), + test_df + ) + compare_dplyr_binding( .input %>% mutate(x = is.Date(date), y = is.Date(integer)) %>% @@ -352,6 +390,17 @@ test_that("is.* functions from lubridate", { test_df ) + # namespaced is.Date + compare_dplyr_binding( + .input %>% + mutate( + x = lubridate::is.Date(date), + y = lubridate::is.Date(integer) + ) %>% + collect(), + test_df + ) + compare_dplyr_binding( .input %>% mutate( @@ -373,6 +422,18 @@ test_that("is.* functions from lubridate", { collect(), test_df ) + + # namespaced is.timepoint and is.instant + compare_dplyr_binding( + .input %>% + mutate( + x = lubridate::is.timepoint(datetime), + y = lubridate::is.instant(date), + z = lubridate::is.timepoint(integer) + ) %>% + collect(), + test_df + ) }) # These tests test component extraction from timestamp objects @@ -429,6 +490,14 @@ test_that("extract month from timestamp", { test_df ) + # namespaced month + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::month(datetime)) %>% + collect(), + test_df + ) + compare_dplyr_binding( .input %>% # R returns ordered factor whereas Arrow returns character @@ -480,6 +549,14 @@ test_that("extract week from timestamp", { collect(), test_df ) + + # namespaced week + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::week(datetime)) %>% + collect(), + test_df + ) }) test_that("extract day from timestamp", { @@ -599,6 +676,16 @@ test_that("extract second from timestamp", { # arrow supports nanosecond resolution but lubridate does not tolerance = 1e-6 ) + + # namespaced second + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::second(datetime)) %>% + collect(), + test_df, + # arrow supports nanosecond resolution but lubridate does not + tolerance = 1e-6 + ) }) # These tests test extraction of components from date32 objects @@ -756,6 +843,14 @@ test_that("extract wday from date", { test_df ) + # namespaced wday + compare_dplyr_binding( + .input %>% + mutate(x = lubridate::wday(date, week_start = 3)) %>% + collect(), + test_df + ) + compare_dplyr_binding( .input %>% mutate(x = wday(date, week_start = 1)) %>% @@ -863,6 +958,26 @@ test_that("am/pm mirror lubridate", { ) ) ) + + # namespaced am and pm + compare_dplyr_binding( + .input %>% + mutate( + am = lubridate::am(test_time), + pm = lubridate::pm(test_time) + ) %>% + collect(), + data.frame( + test_time = strptime( + x = c( + "2022-01-25 11:50:59", + "2022-01-25 12:00:00", + "2022-01-25 00:00:00" + ), + format = "%Y-%m-%d %H:%M:%S" + ) + ) + ) }) test_that("extract tz", { @@ -877,6 +992,14 @@ test_that("extract tz", { df ) + # namespaced tz + compare_dplyr_binding( + .input %>% + mutate(timezone_posixct_date = lubridate::tz(posixct_date)) %>% + collect(), + df + ) + # test a few types directly from R objects expect_error( call_binding("tz", "2020-10-01"), @@ -921,6 +1044,17 @@ test_that("semester works with temporal types and integers", { test_df ) + # namespaced semester + compare_dplyr_binding( + .input %>% + mutate( + sem_wo_year = lubridate::semester(dates), + sem_w_year = lubridate::semester(dates, with_year = TRUE) + ) %>% + collect(), + test_df + ) + compare_dplyr_binding( .input %>% mutate(sem_month_as_int = semester(month_as_int)) %>% @@ -1129,6 +1263,14 @@ test_that("make_date & make_datetime", { test_df ) + # namespaced make_date + compare_dplyr_binding( + .input %>% + mutate(composed_date = lubridate::make_date(year, month, day)) %>% + collect(), + test_df + ) + compare_dplyr_binding( .input %>% mutate(composed_date_r_obj = make_date(1999, 12, 31)) %>% @@ -1146,6 +1288,17 @@ test_that("make_date & make_datetime", { ignore_attr = TRUE ) + # namespaced make_datetime + compare_dplyr_binding( + .input %>% + mutate(composed_datetime = lubridate::make_datetime(year, month, day, hour, min, sec)) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) + compare_dplyr_binding( .input %>% mutate( From 6c13a4ecaa31466f91dee14705159011d76a9157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 15:55:58 +0100 Subject: [PATCH 085/129] unit tests for namespaced date/time bindings - part 2 --- r/tests/testthat/test-dplyr-funcs-datetime.R | 190 ++++++++++++++++++- 1 file changed, 189 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index a162b474386..c6d4016725e 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1333,6 +1333,17 @@ test_that("ISO_datetime & ISOdate", { ignore_attr = TRUE ) + # namespaced ISOdate + compare_dplyr_binding( + .input %>% + mutate(composed_date = base::ISOdate(year, month, day)) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) + compare_dplyr_binding( .input %>% mutate(composed_date_r_obj = ISOdate(1999, 12, 31)) %>% @@ -1356,6 +1367,19 @@ test_that("ISO_datetime & ISOdate", { ignore_attr = TRUE ) + # namespaced ISOdatetime + compare_dplyr_binding( + .input %>% + mutate( + composed_datetime = base::ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") + ) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) + compare_dplyr_binding( .input %>% mutate( @@ -1369,7 +1393,7 @@ test_that("ISO_datetime & ISOdate", { ) }) -test_that("difftime works correctly", { +test_that("difftime()", { test_df <- tibble( time1 = as.POSIXct( c("2021-02-20", "2021-07-31 0:0:0", "2021-10-30", "2021-01-31 0:0:0") @@ -1390,6 +1414,17 @@ test_that("difftime works correctly", { ignore_attr = TRUE ) + # namespaced difftime + compare_dplyr_binding( + .input %>% + mutate( + secs2 = base::difftime(time1, time2, units = "secs") + ) %>% + collect(), + test_df, + ignore_attr = TRUE + ) + # units other than "secs" not supported in arrow compare_dplyr_binding( .input %>% @@ -1460,6 +1495,14 @@ test_that("as.difftime()", { test_df ) + # namespaced as.difftime + compare_dplyr_binding( + .input %>% + mutate(hms_difftime = base::as.difftime(hms_string, units = "secs")) %>% + collect(), + test_df + ) + # TODO add test with `format` mismatch returning NA once # https://issues.apache.org/jira/browse/ARROW-15659 is solved # for example: as.difftime("07:", format = "%H:%M") should return NA @@ -1536,6 +1579,22 @@ test_that("`decimal_date()` and `date_decimal()`", { test_df, ignore_attr = "tzone" ) + + # namespaced tests + compare_dplyr_binding( + .input %>% + mutate( + decimal_date_from_POSIXct = lubridate::decimal_date(b), + decimal_date_from_r_POSIXct_obj = lubridate::decimal_date(as.POSIXct("2022-03-25 15:37:01")), + decimal_date_from_r_date_obj = lubridate::decimal_date(as.Date("2022-03-25")), + decimal_date_from_date = lubridate::decimal_date(c), + date_from_decimal = lubridate::date_decimal(a), + date_from_decimal_r_obj = lubridate::date_decimal(2022.178) + ) %>% + collect(), + test_df, + ignore_attr = "tzone" + ) }) test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { @@ -1591,6 +1650,22 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { ignore_attr = TRUE ) + # namespaced dminutes, dhours, ddays, dweeks, dyears + compare_dplyr_binding( + .input %>% + mutate( + r_obj_dminutes = lubridate::dminutes(1), + r_obj_dhours = lubridate::dhours(2), + r_obj_ddays = lubridate::ddays(3), + r_obj_dweeks = lubridate::dweeks(4), + r_obj_dmonths = lubridate::dmonths(5), + r_obj_dyears = lubridate::dyears(6) + ) %>% + collect(), + tibble(), + ignore_attr = TRUE + ) + # double -> duration not supported in Arrow. # Error is generated in the C++ code expect_error( @@ -1623,6 +1698,21 @@ test_that("dseconds, dmilliseconds, dmicroseconds, dnanoseconds, dpicoseconds", ignore_attr = TRUE ) + # namespaced dseconds, dmillisecodns, dmicroseconds, dnanoseconds + compare_dplyr_binding( + .input %>% + mutate( + dseconds = lubridate::dseconds(x), + dmilliseconds = lubridate::dmilliseconds(x), + dmicroseconds = lubridate::dmicroseconds(x), + dnanoseconds = lubridate::dnanoseconds(x), + ) %>% + collect(), + example_d, + ignore_attr = TRUE + ) + + compare_dplyr_binding( .input %>% mutate( @@ -1655,6 +1745,11 @@ test_that("dseconds, dmilliseconds, dmicroseconds, dnanoseconds, dpicoseconds", "Duration in picoseconds not supported in Arrow" ) + expect_error( + call_binding("lubridate::dpicoseconds"), + "Duration in picoseconds not supported in Arrow" + ) + # double -> duration not supported in Arrow. # Error is generated in the C++ code expect_error( @@ -1705,6 +1800,23 @@ test_that("make_difftime()", { test_df ) + # namespaced make_difftime + compare_dplyr_binding( + .input %>% + mutate( + duration_from_parts = lubridate::make_difftime( + second = seconds, + minute = minutes, + hour = hours, + day = days, + week = weeks, + units = "secs" + ) + ) %>% + collect(), + test_df + ) + # named difftime parts other than `second`, `minute`, `hour`, `day` and `week` # are not supported expect_error( @@ -1801,6 +1913,21 @@ test_that("`as.Date()` and `as_date()`", { test_df ) + # namespaced as.Date() and as_date + compare_dplyr_binding( + .input %>% + mutate( + date_dv1 = base::as.Date(date_var), + date_pv1 = base::as.Date(posixct_var), + date_pv_tz1 = base::as.Date(posixct_var, tz = "Pacific/Marquesas"), + date_dv2 = lubridate::as_date(date_var), + date_pv2 = lubridate::as_date(posixct_var), + date_pv_tz2 = lubridate::as_date(posixct_var, tz = "Pacific/Marquesas") + ) %>% + collect(), + test_df + ) + # we do not support multiple tryFormats # this is not a simple warning, therefore we cannot use compare_dplyr_binding() # with `warning = TRUE` @@ -1933,6 +2060,21 @@ test_that("`as_datetime()`", { test_df ) + # namespaced as_datetime + compare_dplyr_binding( + .input %>% + mutate( + ddate = lubridate::as_datetime(date), + dchar_date_no_tz = lubridate::as_datetime(char_date), + dchar_date_with_tz = lubridate::as_datetime(char_date, tz = "Pacific/Marquesas"), + dint_date = lubridate::as_datetime(int_date, origin = "1970-01-02"), + dintegerish_date = lubridate::as_datetime(integerish_date, origin = "1970-01-02"), + dintegerish_date2 = lubridate::as_datetime(integerish_date, origin = "1970-01-01") + ) %>% + collect(), + test_df + ) + # Arrow does not support conversion of double to date # the below should error with an error message originating in the C++ code expect_error( @@ -1977,6 +2119,22 @@ test_that("parse_date_time() works with year, month, and date components", { ) ) + # namespaced parse_date_time + compare_dplyr_binding( + .input %>% + mutate( + parsed_date_ymd = lubridate::parse_date_time(string_ymd, orders = "ymd"), + ) %>% + collect(), + tibble::tibble( + string_ymd = c( + "2021-09-1", "2021/09///2", "2021.09.03", "2021,09,4", "2021:09::5", + "2021 09 6", "21-09-07", "21/09/08", "21.09.9", "21,09,10", "21:09:11", + "20210912", "210913", NA + ) + ) + ) + # TODO(ARROW-16443): locale (affecting "%b% and "%B") does not work on Windows skip_on_os("windows") compare_dplyr_binding( @@ -2052,6 +2210,21 @@ test_that("year, month, day date/time parsers", { test_df ) + # namespaced individual ymd parsers + compare_dplyr_binding( + .input %>% + mutate( + ymd_date = lubridate::ymd(ymd_string), + ydm_date = lubridate::ydm(ydm_string), + mdy_date = lubridate::mdy(mdy_string), + myd_date = lubridate::myd(myd_string), + dmy_date = lubridate::dmy(dmy_string), + dym_date = lubridate::dym(dym_string) + ) %>% + collect(), + test_df + ) + compare_dplyr_binding( .input %>% mutate( @@ -2124,6 +2297,21 @@ test_that("ym, my & yq parsers", { collect(), test_df ) + + # namespaced ym, mq and yq parsers + compare_dplyr_binding( + .input %>% + mutate( + ym_date = lubridate::ym(ym_string), + ym_datetime = lubridate::ym(ym_string, tz = "Pacific/Marquesas"), + my_date = lubridate::my(my_string), + my_datetime = lubridate::my(my_string, tz = "Pacific/Marquesas"), + yq_date_from_string = lubridate::yq(yq_string), + yq_datetime_from_string = lubridate::yq(yq_string, tz = "Pacific/Marquesas") + ) %>% + collect(), + test_df + ) }) test_that("lubridate's fast_strptime", { From 46247d7ae83e13d99c2d410ef7abd9732f41dcdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 16:34:40 +0100 Subject: [PATCH 086/129] register namespaced `sqrt()` and `exp()` bindings + add math unit tests --- r/R/dplyr-funcs-math.R | 4 +-- r/tests/testthat/test-dplyr-funcs-math.R | 32 ++++++++++++++++++++++++ r/tests/testthat/test-dplyr-mutate.R | 13 ++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-funcs-math.R b/r/R/dplyr-funcs-math.R index a6b47a9738d..e7667532000 100644 --- a/r/R/dplyr-funcs-math.R +++ b/r/R/dplyr-funcs-math.R @@ -81,14 +81,14 @@ register_bindings_math <- function() { ) }) - register_binding("sqrt", function(x) { + register_binding("base::sqrt", function(x) { build_expr( "sqrt_checked", x ) }) - register_binding("exp", function(x) { + register_binding("base::exp", function(x) { build_expr( "power_checked", exp(1), diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index f4a3fb7358e..cfcba5da244 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -175,6 +175,14 @@ test_that("log functions", { df ) + # namespaced log + compare_dplyr_binding( + .input %>% + mutate(y = base::log(x)) %>% + collect(), + df + ) + compare_dplyr_binding( .input %>% mutate(y = log(x, base = exp(1))) %>% @@ -257,6 +265,14 @@ test_that("log functions", { df ) + # namespaced logb + compare_dplyr_binding( + .input %>% + mutate(y = base::logb(x)) %>% + collect(), + df + ) + compare_dplyr_binding( .input %>% mutate(y = log1p(x)) %>% @@ -403,6 +419,14 @@ test_that("exp()", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(y = base::exp(x)) %>% + collect(), + df + ) }) test_that("sqrt()", { @@ -414,4 +438,12 @@ test_that("sqrt()", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(y = base::sqrt(x)) %>% + collect(), + df + ) }) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index a961f3b6c40..550cd66a409 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -523,6 +523,19 @@ test_that("mutate and pmin/pmax", { df ) + # namespaced pmin/pmax + compare_dplyr_binding( + .input %>% + mutate( + max_val_1 = base::pmax(val1, val2, val3), + max_val_2 = base::pmax(val1, val2, val3, na.rm = TRUE), + min_val_1 = base::pmin(val1, val2, val3), + min_val_2 = base::pmin(val1, val2, val3, na.rm = TRUE) + ) %>% + collect(), + df + ) + compare_dplyr_binding( .input %>% mutate( From 6bc27d65d9e12eec0cbe84eefec75791788b554e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 12 Jul 2022 16:35:03 +0100 Subject: [PATCH 087/129] unit test for namespaced `fast_strptime` --- r/tests/testthat/test-dplyr-funcs-datetime.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index c6d4016725e..5b7b9d10fb7 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -2331,6 +2331,23 @@ test_that("lubridate's fast_strptime", { ) ) + # namespaced fast_strptime + compare_dplyr_binding( + .input %>% + mutate( + y = + lubridate::fast_strptime( + x, + format = "%Y-%m-%d %H:%M:%S", + lt = FALSE + ) + ) %>% + collect(), + tibble( + x = c("2018-10-07 19:04:05", "2022-05-17 21:23:45", NA) + ) + ) + # R object compare_dplyr_binding( .input %>% From e1c202c126e74937e1e521ad0f5ece340e533016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 13 Jul 2022 06:56:30 +0100 Subject: [PATCH 088/129] bump ci --- r/tests/testthat/test-dplyr-funcs-string.R | 89 ++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 0f0ae57882e..15c2c078b06 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -49,6 +49,13 @@ test_that("paste, paste0, and str_c", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute(base::paste(v, w)) %>% + collect(), + df + ) compare_dplyr_binding( .input %>% transmute(paste(v, w, sep = "-")) %>% @@ -61,12 +68,26 @@ test_that("paste, paste0, and str_c", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute(base::paste0(v, w)) %>% + collect(), + df + ) compare_dplyr_binding( .input %>% transmute(str_c(v, w)) %>% collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute(stringr::str_c(v, w)) %>% + collect(), + df + ) compare_dplyr_binding( .input %>% transmute(str_c(v, w, sep = "+")) %>% @@ -244,6 +265,13 @@ test_that("grepl", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + filter(base::grepl("Foo", x, fixed = fixed)) %>% + collect(), + df + ) } }) @@ -295,6 +323,13 @@ test_that("str_detect", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute(x = stringr::str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% + collect(), + df + ) compare_dplyr_binding( .input %>% transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE), negate = TRUE)) %>% @@ -810,6 +845,14 @@ test_that("str_like", { collect(), tibble(x = c(FALSE, FALSE)) ) + # with namespacing + expect_equal( + df %>% + Table$create() %>% + mutate(x = stringr::str_like(x, "baz")) %>% + collect(), + tibble(x = c(FALSE, FALSE)) + ) # Match - entire string expect_equal( @@ -1113,6 +1156,19 @@ test_that("str_starts, str_ends, startsWith, endsWith", { df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + a = stringr::str_starts(x, "b.*"), + b = stringr::str_starts(x, "b.*", negate = TRUE), + c = stringr::str_starts(x, fixed("b")), + d = stringr::str_starts(x, fixed("b"), negate = TRUE) + ) %>% + collect(), + df + ) + compare_dplyr_binding( .input %>% filter(str_ends(x, "r")) %>% @@ -1152,6 +1208,20 @@ test_that("str_starts, str_ends, startsWith, endsWith", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + a = stringr::str_ends(x, "r"), + b = stringr::str_ends(x, "r", negate = TRUE), + c = stringr::str_ends(x, fixed("r")), + d = stringr::str_ends(x, fixed("r"), negate = TRUE) + ) %>% + collect(), + df + ) + compare_dplyr_binding( .input %>% filter(startsWith(x, "b")) %>% @@ -1189,6 +1259,17 @@ test_that("str_starts, str_ends, startsWith, endsWith", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + a = base::startsWith(x, "b"), + b = base::endsWith(x, "r") + ) %>% + collect(), + df + ) }) test_that("str_count", { @@ -1204,6 +1285,14 @@ test_that("str_count", { df ) + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(a_count = str_count(cities, pattern = "a")) %>% + collect(), + df + ) + compare_dplyr_binding( .input %>% mutate(p_count = str_count(cities, pattern = "d")) %>% From 6e70f0f8abae8202d95f728ce32114374929d3c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 13 Jul 2022 11:34:35 +0100 Subject: [PATCH 089/129] bump ci From 83a504030262e859eb85e18b27795445276d2c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 09:42:13 +0100 Subject: [PATCH 090/129] unit tests for namespaced string bindings --- r/tests/testthat/test-dplyr-funcs-string.R | 124 +++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 15c2c078b06..ba535366d76 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -415,6 +415,22 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", { ) }) +test_that("sub and gsub with namespacing", { + compare_dplyr_binding( + .input %>% + mutate(verses_new = base::gsub("o", "u", verses, fixed = TRUE)) %>% + collect(), + tbl + ) + + compare_dplyr_binding( + .input %>% + mutate(verses_new = base::sub("o", "u", verses, fixed = TRUE)) %>% + collect(), + tbl + ) +}) + test_that("str_replace and str_replace_all", { df <- tibble(x = c("Foo", "bar")) @@ -463,6 +479,20 @@ test_that("str_replace and str_replace_all", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + transmute(x = stringr::str_replace_all(x, fixed("o"), "u")) %>% + collect(), + df + ) + compare_dplyr_binding( + .input %>% + transmute(x = stringr::str_replace(x, fixed("O"), "u")) %>% + collect(), + df + ) }) test_that("strsplit and str_split", { @@ -526,6 +556,22 @@ test_that("strsplit and str_split", { df, ignore_attr = TRUE ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(x = base::strsplit(x, " +and +")) %>% + collect(), + df, + ignore_attr = TRUE + ) + compare_dplyr_binding( + .input %>% + mutate(x = stringr::str_split(x, "and")) %>% + collect(), + df, + ignore_attr = TRUE + ) }) test_that("strrep and str_dup", { @@ -560,6 +606,18 @@ test_that("str_to_lower, str_to_upper, and str_to_title", { df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + x_lower = stringr::str_to_lower(x), + x_upper = stringr::str_to_upper(x), + x_title = stringr::str_to_title(x) + ) %>% + collect(), + df + ) + # Error checking a single function because they all use the same code path. expect_error( call_binding("str_to_lower", "Apache Arrow", locale = "sp"), @@ -937,6 +995,14 @@ test_that("str_pad", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(x = stringr::str_pad(x, width = 31, side = "both")) %>% + collect(), + df + ) }) test_that("substr", { @@ -1014,6 +1080,14 @@ test_that("substr", { call_binding("substr", "Apache Arrow", 1, c(2, 3)), "`stop` must be length 1 - other lengths are not supported in Arrow" ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(y = base::substr(x, -5, -1)) %>% + collect(), + df + ) }) test_that("substring", { @@ -1027,6 +1101,14 @@ test_that("substring", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(y = base::substring(x, 1, 6)) %>% + collect(), + df + ) }) test_that("str_sub", { @@ -1102,6 +1184,14 @@ test_that("str_sub", { df ) + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(y = stringr::str_sub(x, -5, -1)) %>% + collect(), + df + ) + expect_error( call_binding("str_sub", "Apache Arrow", c(1, 2), 3), "`start` must be length 1 - other lengths are not supported in Arrow" @@ -1404,3 +1494,37 @@ test_that("namespaced unary and binary string functions", { ) } }) + +test_that("nchar with namespacing", { + compare_dplyr_binding( + .input %>% + mutate(verses_nchar = base::nchar(verses)) %>% + collect(), + tbl + ) +}) + +test_that("str_trim", { + compare_dplyr_binding( + .input %>% + mutate( + left_trimmed_padded_string = str_trim(padded_strings, "left"), + right_trimmed_padded_string = str_trim(padded_strings, "right"), + trimmed_padded_string = str_trim(padded_strings, "both") + ) %>% + collect(), + tbl + ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate( + left_trimmed_padded_string = stringr::str_trim(padded_strings, "left"), + right_trimmed_padded_string = stringr::str_trim(padded_strings, "right"), + trimmed_padded_string = stringr::str_trim(padded_strings, "both") + ) %>% + collect(), + tbl + ) +}) From b2f81da9668a7bb044af5c6088e7eb78f10f2ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 10:26:27 +0100 Subject: [PATCH 091/129] unit tests for type bindings --- r/tests/testthat/test-dplyr-filter.R | 8 + r/tests/testthat/test-dplyr-funcs-type.R | 204 +++++++++++++++++++++++ 2 files changed, 212 insertions(+) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index aed46d801ce..3a45c5fd420 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -239,6 +239,14 @@ test_that("filter() with between()", { filter(between(chr, 1, 2)) %>% collect() ) + + # with namespacing + expect_error( + tbl %>% + record_batch() %>% + filter(dplyr::between(chr, 1, 2)) %>% + collect() + ) }) test_that("filter() with string ops", { diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index b0ced52901a..88691bfbc14 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -127,6 +127,22 @@ test_that("explicit type conversions with as.*()", { collect(), tibble(chr = c("1", "2", "3")) ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + chr2chr = base::as.character(chr), + chr2dbl = base::as.double(chr), + chr2int = base::as.integer(chr), + chr2num = base::as.numeric(chr), + rchr2chr = base::as.character("string"), + rchr2dbl = base::as.double("1.5"), + rchr2int = base::as.integer("1"), + rchr2num = base::as.numeric("1.5") + ) %>% + collect(), + tibble(chr = c("1", "2", "3")) + ) compare_dplyr_binding( .input %>% transmute( @@ -140,6 +156,20 @@ test_that("explicit type conversions with as.*()", { collect(), tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10)) ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + chr2i64 = bit64::as.integer64(chr), + dbl2i64 = bit64::as.integer64(dbl), + i642i64 = bit64::as.integer64(i64), + rchr2i64 = bit64::as.integer64("10000000000"), + rdbl2i64 = bit64::as.integer64(10000000000), + ri642i64 = bit64::as.integer64(as.integer64(1e10)) + ) %>% + collect(), + tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10)) + ) compare_dplyr_binding( .input %>% transmute( @@ -157,6 +187,24 @@ test_that("explicit type conversions with as.*()", { int = c(1L, 0L, -99L, 0L) ) ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + chr2lgl = base::as.logical(chr), + dbl2lgl = base::as.logical(dbl), + int2lgl = base::as.logical(int), + rchr2lgl = base::as.logical("TRUE"), + rdbl2lgl = base::as.logical(0), + rint2lgl = base::as.logical(1L) + ) %>% + collect(), + tibble( + chr = c("TRUE", "FALSE", "true", "false"), + dbl = c(1, 0, -99, 0), + int = c(1L, 0L, -99L, 0L) + ) + ) compare_dplyr_binding( .input %>% transmute( @@ -213,6 +261,16 @@ test_that("is.finite(), is.infinite(), is.nan()", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + is_fin = base::is.finite(x), + is_inf = base::is.infinite(x) + ) %>% + collect(), + df + ) # is.nan() evaluates to FALSE on NA_real_ (ARROW-12850) compare_dplyr_binding( .input %>% @@ -222,6 +280,15 @@ test_that("is.finite(), is.infinite(), is.nan()", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + is_nan = base::is.nan(x) + ) %>% + collect(), + df + ) }) test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", { @@ -234,6 +301,15 @@ test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + is_na = base::is.na(x) + ) %>% + collect(), + df + ) }) test_that("type checks with is() giving Arrow types", { @@ -313,6 +389,28 @@ test_that("type checks with is() giving Arrow types", { as.vector(), c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) ) + # with class2=string and namespacing + expect_equal( + Table$create( + i32 = Array$create(1, int32()), + f64 = Array$create(1.1, float64()), + str = Array$create("a", arrow::string()) + ) %>% transmute( + i32_is_i32 = methods::is(i32, "int32"), + i32_is_i64 = methods::is(i32, "double"), + i32_is_str = methods::is(i32, "string"), + f64_is_i32 = methods::is(f64, "int32"), + f64_is_i64 = methods::is(f64, "double"), + f64_is_str = methods::is(f64, "string"), + str_is_i32 = methods::is(str, "int32"), + str_is_i64 = methods::is(str, "double"), + str_is_str = methods::is(str, "string") + ) %>% + collect() %>% + t() %>% + as.vector(), + c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) + ) # with class2=string alias expect_equal( Table$create( @@ -480,6 +578,54 @@ test_that("type checks with is.*()", { collect(), tbl ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + chr_is_chr = base::is.character(chr), + chr_is_dbl = base::is.double(chr), + chr_is_fct = base::is.factor(chr), + chr_is_int = base::is.integer(chr), + chr_is_i64 = bit64::is.integer64(chr), + chr_is_lst = base::is.list(chr), + chr_is_lgl = base::is.logical(chr), + chr_is_num = base::is.numeric(chr), + dbl_is_chr = base::is.character(dbl), + dbl_is_dbl = base::is.double(dbl), + dbl_is_fct = base::is.factor(dbl), + dbl_is_int = base::is.integer(dbl), + dbl_is_i64 = bit64::is.integer64(dbl), + dbl_is_lst = base::is.list(dbl), + dbl_is_lgl = base::is.logical(dbl), + dbl_is_num = base::is.numeric(dbl), + fct_is_chr = base::is.character(fct), + fct_is_dbl = base::is.double(fct), + fct_is_fct = base::is.factor(fct), + fct_is_int = base::is.integer(fct), + fct_is_i64 = bit64::is.integer64(fct), + fct_is_lst = base::is.list(fct), + fct_is_lgl = base::is.logical(fct), + fct_is_num = base::is.numeric(fct), + int_is_chr = base::is.character(int), + int_is_dbl = base::is.double(int), + int_is_fct = base::is.factor(int), + int_is_int = base::is.integer(int), + int_is_i64 = bit64::is.integer64(int), + int_is_lst = base::is.list(int), + int_is_lgl = base::is.logical(int), + int_is_num = base::is.numeric(int), + lgl_is_chr = base::is.character(lgl), + lgl_is_dbl = base::is.double(lgl), + lgl_is_fct = base::is.factor(lgl), + lgl_is_int = base::is.integer(lgl), + lgl_is_i64 = bit64::is.integer64(lgl), + lgl_is_lst = base::is.list(lgl), + lgl_is_lgl = base::is.logical(lgl), + lgl_is_num = base::is.numeric(lgl) + ) %>% + collect(), + tbl + ) compare_dplyr_binding( .input %>% transmute( @@ -539,6 +685,34 @@ test_that("type checks with is_*()", { collect(), tbl ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + chr_is_chr = rlang::is_character(chr), + chr_is_dbl = rlang::is_double(chr), + chr_is_int = rlang::is_integer(chr), + chr_is_lst = rlang::is_list(chr), + chr_is_lgl = rlang::is_logical(chr), + dbl_is_chr = rlang::is_character(dbl), + dbl_is_dbl = rlang::is_double(dbl), + dbl_is_int = rlang::is_integer(dbl), + dbl_is_lst = rlang::is_list(dbl), + dbl_is_lgl = rlang::is_logical(dbl), + int_is_chr = rlang::is_character(int), + int_is_dbl = rlang::is_double(int), + int_is_int = rlang::is_integer(int), + int_is_lst = rlang::is_list(int), + int_is_lgl = rlang::is_logical(int), + lgl_is_chr = rlang::is_character(lgl), + lgl_is_dbl = rlang::is_double(lgl), + lgl_is_int = rlang::is_integer(lgl), + lgl_is_lst = rlang::is_list(lgl), + lgl_is_lgl = rlang::is_logical(lgl) + ) %>% + collect(), + tbl + ) }) test_that("type checks on expressions", { @@ -703,6 +877,18 @@ test_that("structs/nested data frames/tibbles can be created", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + df_col = tibble::tibble( + regular_col1 = regular_col1, + regular_col2 = regular_col2 + ) + ) %>% + collect(), + df + ) # check auto column naming compare_dplyr_binding( @@ -770,6 +956,16 @@ test_that("structs/nested data frames/tibbles can be created", { mutate(df_col = as.data.frame(df_col)), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + transmute( + df_col = base::data.frame(regular_col1, regular_col1, check.names = FALSE) + ) %>% + collect() %>% + mutate(df_col = as.data.frame(df_col)), + df + ) # ...and that other arguments are not supported expect_warning( @@ -836,6 +1032,14 @@ test_that("format date/time", { times ) + # with namespacing + compare_dplyr_binding( + .input %>% + mutate(x = base::format(datetime, format = formats)) %>% + collect(), + times + ) + compare_dplyr_binding( .input %>% mutate(x = format(date, format = formats_date)) %>% From 3d9bad947336311c97800918d18532bce279d82d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 11:02:07 +0100 Subject: [PATCH 092/129] style --- r/R/dplyr-funcs-datetime.R | 3 +-- r/R/dplyr-funcs-type.R | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 080ac082e17..7d11cdc1134 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -384,8 +384,7 @@ register_bindings_datetime_conversion <- function() { y + sofar$cast(int64()) / total }) - register_binding("lubridate::date_decimal", function(decimal, - tz = "UTC") { + register_binding("lubridate::date_decimal", function(decimal, tz = "UTC") { y <- build_expr("floor", decimal) start <- call_binding("make_datetime", year = y, tz = tz) diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R index 657dbcbb683..9925d0347f7 100644 --- a/r/R/dplyr-funcs-type.R +++ b/r/R/dplyr-funcs-type.R @@ -125,11 +125,11 @@ register_bindings_type_cast <- function() { }) register_binding("base::data.frame", function(..., - row.names = NULL, - check.rows = NULL, - check.names = TRUE, - fix.empty.names = TRUE, - stringsAsFactors = FALSE) { + row.names = NULL, + check.rows = NULL, + check.names = TRUE, + fix.empty.names = TRUE, + stringsAsFactors = FALSE) { # we need a specific value of stringsAsFactors because the default was # TRUE in R <= 3.6 if (!identical(stringsAsFactors, FALSE)) { From f1dfda491d2370b7bca73a12693eebd1507ed5ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 11:35:33 +0100 Subject: [PATCH 093/129] comment --- r/R/dplyr-funcs.R | 2 ++ r/R/dplyr-summarize.R | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index c20d759a4e7..49022c9a06b 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -126,6 +126,8 @@ nse_funcs <- new.env(parent = emptyenv()) agg_funcs <- new.env(parent = emptyenv()) .cache <- new.env(parent = emptyenv()) +# we register 2 version of the "::" binding - one for use with nse_funcs (below) +# and another one for use with agg_funcs (in dplyr-summarize.R) register_bindings_utils <- function() { register_binding("::", function(lhs, rhs) { lhs_name <- as.character(substitute(lhs)) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 68059be5fe0..4ac97993b89 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -159,6 +159,8 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm, min_count = 0L) ) }) + # we register 2 version of the "::" binding - one for use with nse_funcs + # and another one for use with agg_funcs (below) register_binding_agg("::", function(lhs, rhs) { lhs_name <- as.character(substitute(lhs)) rhs_name <- as.character(substitute(rhs)) @@ -167,7 +169,8 @@ register_bindings_aggregate <- function() { # if we do not have a binding for pkg::fun, then fall back on to the # nse_funcs (useful when we have a regular function inside an aggregating one) - # and then regular pkg::fun function + # and then, if searching nse_funcs fails too, fall back to the + # regular `pkg::fun()` function agg_funcs[[fun_name]] %||% nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] }) } From be7beb8fdd263894530388ead4eaed8ef556791c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 11:36:06 +0100 Subject: [PATCH 094/129] comment + remove duplicate test --- r/tests/testthat/test-dplyr-funcs-datetime.R | 84 ++++++++++---------- r/tests/testthat/test-dplyr-funcs-math.R | 4 +- r/tests/testthat/test-dplyr-funcs-string.R | 13 +-- 3 files changed, 45 insertions(+), 56 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 5b7b9d10fb7..cd52b455241 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -71,7 +71,7 @@ test_that("strptime", { t_stamp_with_pm_tz ) - # namespaced strptime + # with namespacing expect_equal( t_string %>% record_batch() %>% @@ -212,7 +212,7 @@ test_that("strftime", { times ) - # namespaced strftime + # with namespacing compare_dplyr_binding( .input %>% mutate(x = base::strftime(datetime, format = formats)) %>% @@ -304,7 +304,7 @@ test_that("format_ISO8601", { times ) - # namespaced format_ISO8601 + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% @@ -372,7 +372,7 @@ test_that("is.* functions from lubridate", { test_df ) - # namespaced is.POSIXct + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -390,7 +390,7 @@ test_that("is.* functions from lubridate", { test_df ) - # namespaced is.Date + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -423,7 +423,7 @@ test_that("is.* functions from lubridate", { test_df ) - # namespaced is.timepoint and is.instant + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -464,7 +464,7 @@ test_that("extract epiyear from timestamp", { test_df ) - # namespaced epiyear + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::epiyear(datetime)) %>% @@ -490,7 +490,7 @@ test_that("extract month from timestamp", { test_df ) - # namespaced month + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::month(datetime)) %>% @@ -524,7 +524,7 @@ test_that("extract isoweek from timestamp", { test_df ) - # namespaced isoweek + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::isoweek(datetime)) %>% @@ -550,7 +550,7 @@ test_that("extract week from timestamp", { test_df ) - # namespaced week + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::week(datetime)) %>% @@ -624,7 +624,7 @@ test_that("extract yday from timestamp", { test_df ) - # namespaced yday + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::yday(datetime)) %>% @@ -641,7 +641,7 @@ test_that("extract hour from timestamp", { test_df ) - # namespaced hour + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::hour(datetime)) %>% @@ -658,7 +658,7 @@ test_that("extract minute from timestamp", { test_df ) - # namespaced minute + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::minute(datetime)) %>% @@ -677,7 +677,7 @@ test_that("extract second from timestamp", { tolerance = 1e-6 ) - # namespaced second + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::second(datetime)) %>% @@ -698,7 +698,7 @@ test_that("extract year from date", { test_df ) - # namespaced year + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::year(date)) %>% @@ -715,7 +715,7 @@ test_that("extract isoyear from date", { test_df ) - # namespaced isoyear + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::isoyear(date)) %>% @@ -741,7 +741,7 @@ test_that("extract quarter from date", { test_df ) - # namespaced quarter + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::quarter(date)) %>% @@ -767,7 +767,7 @@ test_that("extract epiweek from date", { test_df ) - # namespaced epiweek + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::epiweek(date)) %>% @@ -819,7 +819,7 @@ test_that("extract day from date", { test_df ) - # namespaced day + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::day(date)) %>% @@ -843,7 +843,7 @@ test_that("extract wday from date", { test_df ) - # namespaced wday + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::wday(date, week_start = 3)) %>% @@ -883,7 +883,7 @@ test_that("extract mday from date", { test_df ) - # namespaced mday + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::mday(date)) %>% @@ -930,7 +930,7 @@ test_that("leap_year mirror lubridate", { ) ) - # namespaced leap_year + # with namespacing compare_dplyr_binding( .input %>% mutate(x = lubridate::leap_year(date)) %>% @@ -959,7 +959,7 @@ test_that("am/pm mirror lubridate", { ) ) - # namespaced am and pm + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -992,7 +992,7 @@ test_that("extract tz", { df ) - # namespaced tz + # with namespacing compare_dplyr_binding( .input %>% mutate(timezone_posixct_date = lubridate::tz(posixct_date)) %>% @@ -1044,7 +1044,7 @@ test_that("semester works with temporal types and integers", { test_df ) - # namespaced semester + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -1084,7 +1084,7 @@ test_that("dst extracts daylight savings time correctly", { test_df ) - # namespaced dst + # with namespacing compare_dplyr_binding( .input %>% mutate(dst = lubridate::dst(dates)) %>% @@ -1263,7 +1263,7 @@ test_that("make_date & make_datetime", { test_df ) - # namespaced make_date + # with namespacing compare_dplyr_binding( .input %>% mutate(composed_date = lubridate::make_date(year, month, day)) %>% @@ -1288,7 +1288,7 @@ test_that("make_date & make_datetime", { ignore_attr = TRUE ) - # namespaced make_datetime + # with namespacing compare_dplyr_binding( .input %>% mutate(composed_datetime = lubridate::make_datetime(year, month, day, hour, min, sec)) %>% @@ -1333,7 +1333,7 @@ test_that("ISO_datetime & ISOdate", { ignore_attr = TRUE ) - # namespaced ISOdate + # with namespacing compare_dplyr_binding( .input %>% mutate(composed_date = base::ISOdate(year, month, day)) %>% @@ -1367,7 +1367,7 @@ test_that("ISO_datetime & ISOdate", { ignore_attr = TRUE ) - # namespaced ISOdatetime + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -1414,7 +1414,7 @@ test_that("difftime()", { ignore_attr = TRUE ) - # namespaced difftime + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -1495,7 +1495,7 @@ test_that("as.difftime()", { test_df ) - # namespaced as.difftime + # with namespacing compare_dplyr_binding( .input %>% mutate(hms_difftime = base::as.difftime(hms_string, units = "secs")) %>% @@ -1580,7 +1580,7 @@ test_that("`decimal_date()` and `date_decimal()`", { ignore_attr = "tzone" ) - # namespaced tests + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -1650,7 +1650,7 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { ignore_attr = TRUE ) - # namespaced dminutes, dhours, ddays, dweeks, dyears + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -1698,7 +1698,7 @@ test_that("dseconds, dmilliseconds, dmicroseconds, dnanoseconds, dpicoseconds", ignore_attr = TRUE ) - # namespaced dseconds, dmillisecodns, dmicroseconds, dnanoseconds + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -1800,7 +1800,7 @@ test_that("make_difftime()", { test_df ) - # namespaced make_difftime + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -1913,7 +1913,7 @@ test_that("`as.Date()` and `as_date()`", { test_df ) - # namespaced as.Date() and as_date + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -2060,7 +2060,7 @@ test_that("`as_datetime()`", { test_df ) - # namespaced as_datetime + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -2119,7 +2119,7 @@ test_that("parse_date_time() works with year, month, and date components", { ) ) - # namespaced parse_date_time + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -2210,7 +2210,7 @@ test_that("year, month, day date/time parsers", { test_df ) - # namespaced individual ymd parsers + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -2298,7 +2298,7 @@ test_that("ym, my & yq parsers", { test_df ) - # namespaced ym, mq and yq parsers + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -2331,7 +2331,7 @@ test_that("lubridate's fast_strptime", { ) ) - # namespaced fast_strptime + # with namespacing compare_dplyr_binding( .input %>% mutate( diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index cfcba5da244..ae3ca5e10b5 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -175,7 +175,7 @@ test_that("log functions", { df ) - # namespaced log + # with namespacing compare_dplyr_binding( .input %>% mutate(y = base::log(x)) %>% @@ -265,7 +265,7 @@ test_that("log functions", { df ) - # namespaced logb + # with namespacing compare_dplyr_binding( .input %>% mutate(y = base::logb(x)) %>% diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index ba535366d76..84a4d7c8718 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -1378,7 +1378,7 @@ test_that("str_count", { # with namespacing compare_dplyr_binding( .input %>% - mutate(a_count = str_count(cities, pattern = "a")) %>% + mutate(a_count = stringr::str_count(cities, pattern = "a")) %>% collect(), df ) @@ -1430,17 +1430,6 @@ test_that("str_count", { ) }) -test_that("stringi::stri_reverse", { - compare_dplyr_binding( - .input %>% - mutate( - verse_length = stri_reverse(verses) - ) %>% - collect(), - tbl - ) -}) - test_that("base::tolower and base::toupper", { compare_dplyr_binding( .input %>% From 36eb48566280407dcc69b200f9f7d65bdb9104d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 11:46:46 +0100 Subject: [PATCH 095/129] unit tests for namespaced aggregating bindings --- r/tests/testthat/test-dplyr-summarize.R | 74 ++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 543ea470a1b..3f945db94a8 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -107,6 +107,15 @@ test_that("Group by mean on dataset", { collect(), tbl ) + + # with namespacing + compare_dplyr_binding( + .input %>% + group_by(some_grouping) %>% + summarize(mean = base::mean(int, na.rm = TRUE)) %>% + collect(), + tbl + ) }) test_that("Group by sd on dataset", { @@ -125,6 +134,15 @@ test_that("Group by sd on dataset", { collect(), tbl ) + + # with namespacing + compare_dplyr_binding( + .input %>% + group_by(some_grouping) %>% + summarize(sd = stats::sd(int, na.rm = TRUE)) %>% + collect(), + tbl + ) }) test_that("Group by var on dataset", { @@ -143,6 +161,15 @@ test_that("Group by var on dataset", { collect(), tbl ) + + # with namespacing + compare_dplyr_binding( + .input %>% + group_by(some_grouping) %>% + summarize(var = stats::var(int, na.rm = TRUE)) %>% + collect(), + tbl + ) }) test_that("n()", { @@ -161,6 +188,16 @@ test_that("n()", { collect(), tbl ) + + # with namespacing + compare_dplyr_binding( + .input %>% + group_by(some_grouping) %>% + summarize(counts = dplyr::n()) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) }) test_that("Group by any/all", { @@ -216,10 +253,26 @@ test_that("Group by any/all", { collect(), tbl ) + + # with namespacing + compare_dplyr_binding( + .input %>% + group_by(some_grouping) %>% + summarize(base::any(lgl, na.rm = TRUE)) %>% + collect(), + tbl + ) + compare_dplyr_binding( + .input %>% + group_by(some_grouping) %>% + summarize(base::all(lgl, na.rm = TRUE)) %>% + collect(), + tbl + ) }) test_that("n_distinct() on dataset", { - # With groupby + # With group_by compare_dplyr_binding( .input %>% group_by(some_grouping) %>% @@ -247,6 +300,13 @@ test_that("n_distinct() on dataset", { collect(), tbl ) + # with namespacing + compare_dplyr_binding( + .input %>% + summarize(distinct = dplyr::n_distinct(lgl, na.rm = TRUE)) %>% + collect(), + tbl + ) compare_dplyr_binding( .input %>% @@ -605,6 +665,18 @@ test_that("summarize() with min() and max()", { collect(), tbl, ) + + # with namespacing + compare_dplyr_binding( + .input %>% + select(int) %>% + summarize( + min_int = base::min(int, na.rm = TRUE), + max_int = base::max(int, na.rm = TRUE) + ) %>% + collect(), + tbl, + ) }) test_that("min() and max() on character strings", { From f115ef6a14a49b47b1eca2cebd7f27dc27cdf60a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 12:53:00 +0100 Subject: [PATCH 096/129] comments --- r/tests/testthat/test-dplyr-funcs-conditional.R | 8 ++++---- r/tests/testthat/test-dplyr-funcs-type.R | 2 +- r/tests/testthat/test-dplyr-group-by.R | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index 54d47b0bfab..c582678865d 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -44,7 +44,7 @@ test_that("if_else and ifelse", { tbl ) - # namespaced if_else + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -81,7 +81,7 @@ test_that("if_else and ifelse", { tbl ) - # namespaced ifelse + # with namespacing compare_dplyr_binding( .input %>% mutate( @@ -212,7 +212,7 @@ test_that("case_when()", { tbl ) - # namespaced case_when + # with namespacing compare_dplyr_binding( .input %>% transmute(cw = dplyr::case_when(chr %in% letters[1:3] ~ 1L) + 41L) %>% @@ -342,7 +342,7 @@ test_that("coalesce()", { df ) - # namespaced coalesce + # with namespacing compare_dplyr_binding( .input %>% mutate( diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 88691bfbc14..1c8a5dd8bfb 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -779,7 +779,7 @@ test_that("as.factor()/dictionary_encode()", { df1 ) - # namespaced as.factor + # with namespacing compare_dplyr_binding( .input %>% transmute(x = base::as.factor(x)) %>% diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index 5698d4c6ced..3484e45ebc2 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -157,7 +157,7 @@ test_that("group_by with .drop", { ) }) -test_that("group_by() and namespaced functions", { +test_that("group_by() with namespaced functions", { compare_dplyr_binding( .input %>% group_by(int > base::sqrt(25)) %>% From 086529c76ff7d79f32605f42e0261e0a3c2c7e8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 14:04:20 +0100 Subject: [PATCH 097/129] comments --- r/tests/testthat/test-dplyr-mutate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 550cd66a409..86322a90590 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -523,7 +523,7 @@ test_that("mutate and pmin/pmax", { df ) - # namespaced pmin/pmax + # with namespacing compare_dplyr_binding( .input %>% mutate( From 8760c34ae355519a5677d985b3bdcff177e00263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 14:39:13 +0100 Subject: [PATCH 098/129] warn and test for duplicate bindings --- r/R/dplyr-funcs.R | 11 +++++++++++ r/tests/testthat/test-dplyr-funcs.R | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 49022c9a06b..594bd42c5a6 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -67,6 +67,17 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { previous_fun <- if (unqualified_name %in% names(registry)) registry[[unqualified_name]] else NULL + # if th unqualified name exists in the register, warn + if(!is.null(fun) && !is.null(previous_fun)) { + warn( + paste0( + "A \"", + unqualified_name, + "\" binding already exists in the register and will be overwritten.") + ) + } + + # if fun is NULL remove entries from the function registry if (is.null(fun) && !is.null(previous_fun)) { rm(list = c(unqualified_name, qualified_name), envir = registry, inherits = FALSE) # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index 28a3ac2579a..95b2e87b9ca 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -18,6 +18,7 @@ test_that("register_binding() works", { fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL + fun2 <- function() "Hello" expect_null(register_binding("some.pkg::some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) @@ -29,6 +30,11 @@ test_that("register_binding() works", { expect_null(register_binding("somePkg::some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) + + expect_warning( + register_binding("some.pkg2::some_fun", fun2, fake_registry), + "A \"some_fun\" binding already exists in the register and will be overwritten." + ) }) test_that("register_binding_agg() works", { From e3b1e8ff47d20c832aa647db5dd1d42b0bc667b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 14:52:52 +0100 Subject: [PATCH 099/129] short paragraph in "Writing bindings" --- r/vignettes/developers/bindings.Rmd | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/r/vignettes/developers/bindings.Rmd b/r/vignettes/developers/bindings.Rmd index 95dc5c9f61e..ce253390492 100644 --- a/r/vignettes/developers/bindings.Rmd +++ b/r/vignettes/developers/bindings.Rmd @@ -191,11 +191,11 @@ As `startsWith()` requires options, direct mapping is not appropriate. If the function cannot be mapped directly, some extra work may be needed to ensure that calling the arrow version of the function results in the same result as calling the R version of the function. In this case, the function will need -adding to the `nse_funcs` list in `arrow/r/R/dplyr-functions.R`. Here is how -this might look for `startsWith()`: +adding to the `nse_funcs` function register. Here is how this might look for +`startsWith()`: ```{r, eval = FALSE} -register_binding("startsWith", function(x, prefix) { +register_binding("base::startsWith", function(x, prefix) { Expression$create( "starts_with", x, @@ -211,6 +211,15 @@ closest analog to the function whose binding is being defined and define the new binding in a similar location. For example, the binding for `startsWith()` is registered in `dplyr-funcs-string.R` next to the binding for `endsWith()`. +Note: we use the namespace-qualified name (i.e. `"base::startsWith"`) for a +binding. This will register the same binding both as `startsWith()` and as +`base::startsWith()`, which will allow us to use the `pkg::` prefix in a call. + +```{r} +arrow_table(starwars) %>% + filter(stringr::str_detect(name, "Darth")) +``` + Hint: you can use `call_function()` to call a compute function directly from R. This might be useful if you want to experiment with a compute function while you're writing bindings for it, e.g. From 34bf4b61cf339e22f7d5c2860a3fcaf5e9ef8b0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 15:15:59 +0100 Subject: [PATCH 100/129] lint --- r/R/dplyr-funcs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 594bd42c5a6..cfe2085b463 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -68,7 +68,7 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { previous_fun <- if (unqualified_name %in% names(registry)) registry[[unqualified_name]] else NULL # if th unqualified name exists in the register, warn - if(!is.null(fun) && !is.null(previous_fun)) { + if (!is.null(fun) && !is.null(previous_fun)) { warn( paste0( "A \"", From 46f6f6a72f221213de75d80c442d9cc31201b3dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 21:09:46 +0100 Subject: [PATCH 101/129] remove `arrow:::` from the `all_funs()` test --- r/tests/testthat/test-util.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/r/tests/testthat/test-util.R b/r/tests/testthat/test-util.R index 13710863268..15aece7c3fe 100644 --- a/r/tests/testthat/test-util.R +++ b/r/tests/testthat/test-util.R @@ -42,31 +42,31 @@ test_that("as_writable_table() errors for invalid input", { test_that("all_funs() identifies namespace-qualified and unqualified functions", { expect_equal( - arrow:::all_funs(rlang::quo(pkg::fun())), + all_funs(rlang::quo(pkg::fun())), "pkg::fun" ) expect_equal( - arrow:::all_funs(rlang::quo(pkg::fun(other_pkg::obj))), + all_funs(rlang::quo(pkg::fun(other_pkg::obj))), "pkg::fun" ) expect_equal( - arrow:::all_funs(rlang::quo(other_fun(pkg::fun()))), + all_funs(rlang::quo(other_fun(pkg::fun()))), c("other_fun", "pkg::fun") ) expect_equal( - arrow:::all_funs(rlang::quo(other_pkg::other_fun(pkg::fun()))), + all_funs(rlang::quo(other_pkg::other_fun(pkg::fun()))), c("other_pkg::other_fun", "pkg::fun") ) expect_equal( - arrow:::all_funs(rlang::quo(other_pkg::other_fun(pkg::fun(sum(base::log()))))), + all_funs(rlang::quo(other_pkg::other_fun(pkg::fun(sum(base::log()))))), c("other_pkg::other_fun", "pkg::fun", "sum", "base::log") ) expect_equal( - arrow:::all_funs(rlang::quo(other_fun(fun(sum(log()))))), + all_funs(rlang::quo(other_fun(fun(sum(log()))))), c("other_fun", "fun", "sum", "log") ) expect_equal( - arrow:::all_funs(rlang::quo(other_fun(fun(sum(base::log()))))), + all_funs(rlang::quo(other_fun(fun(sum(base::log()))))), c("other_fun", "fun", "sum", "base::log") ) }) From 7cd221bf8a5b1e32695db7756dc7990e47abcf46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 21:11:13 +0100 Subject: [PATCH 102/129] typo --- r/vignettes/developers/bindings.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/vignettes/developers/bindings.Rmd b/r/vignettes/developers/bindings.Rmd index ce253390492..efe729c5f5c 100644 --- a/r/vignettes/developers/bindings.Rmd +++ b/r/vignettes/developers/bindings.Rmd @@ -191,7 +191,7 @@ As `startsWith()` requires options, direct mapping is not appropriate. If the function cannot be mapped directly, some extra work may be needed to ensure that calling the arrow version of the function results in the same result as calling the R version of the function. In this case, the function will need -adding to the `nse_funcs` function register. Here is how this might look for +adding to the `nse_funcs` function registry. Here is how this might look for `startsWith()`: ```{r, eval = FALSE} From b23561785c551e31bec042b463d127891d16a11d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 21:18:35 +0100 Subject: [PATCH 103/129] simplify `stats::quantile` if statement --- r/R/dplyr-summarize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 4ac97993b89..cde4936f9c7 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -362,7 +362,7 @@ summarize_eval <- function(name, quosure, ctx, hash) { # the list output from the Arrow hash_tdigest kernel to flatten it into a # column of type float64. We do that by modifying the unevaluated expression # to replace quantile(...) with arrow_list_element(quantile(...), 0L) - if (hash && ("quantile" %in% funs_in_expr || "stats::quantile" %in% funs_in_expr)) { + if (hash && any(c("quantile", "stats::quantile") %in% funs_in_expr)) { expr <- wrap_hash_quantile(expr) funs_in_expr <- all_funs(expr) } From d05ac0c39f96d233cdd26983cdf914a379117fc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 21:33:11 +0100 Subject: [PATCH 104/129] keep a single `dplyr::case_when()` test --- r/tests/testthat/test-dplyr-funcs-conditional.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index c582678865d..ce7543b8050 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -213,13 +213,6 @@ test_that("case_when()", { ) # with namespacing - compare_dplyr_binding( - .input %>% - transmute(cw = dplyr::case_when(chr %in% letters[1:3] ~ 1L) + 41L) %>% - collect(), - tbl - ) - compare_dplyr_binding( .input %>% filter(dplyr::case_when( From 707fbd215b56d3306dfb55144bb1b29f094cb4e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 21:33:57 +0100 Subject: [PATCH 105/129] simplify the registration of the `"::"` binding and the `register_binding()` function --- r/R/dplyr-funcs.R | 27 ++++++++++----------------- r/R/dplyr-summarize.R | 29 +++++++++++++++-------------- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index cfe2085b463..c6462e1413a 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -59,13 +59,9 @@ NULL #' register_binding <- function(fun_name, fun, registry = nse_funcs) { qualified_name <- fun_name - if (qualified_name == "::") { - unqualified_name <- "::" - } else { - unqualified_name <- gsub("^.*?::", "", qualified_name) - } + unqualified_name <- gsub("^.*?::", "", qualified_name) - previous_fun <- if (unqualified_name %in% names(registry)) registry[[unqualified_name]] else NULL + previous_fun <- registry[[unqualified_name]] # if th unqualified name exists in the register, warn if (!is.null(fun) && !is.null(previous_fun)) { @@ -126,7 +122,6 @@ create_binding_cache <- function() { register_bindings_math() register_bindings_string() register_bindings_type() - register_bindings_utils() # We only create the cache for nse_funcs and not agg_funcs .cache$functions <- c(as.list(nse_funcs), arrow_funcs) @@ -137,17 +132,15 @@ nse_funcs <- new.env(parent = emptyenv()) agg_funcs <- new.env(parent = emptyenv()) .cache <- new.env(parent = emptyenv()) -# we register 2 version of the "::" binding - one for use with nse_funcs (below) +# we register 2 versions of the "::" binding - one for use with nse_funcs (below) # and another one for use with agg_funcs (in dplyr-summarize.R) -register_bindings_utils <- function() { - register_binding("::", function(lhs, rhs) { - lhs_name <- as.character(substitute(lhs)) - rhs_name <- as.character(substitute(rhs)) +nse_funcs[["::"]] <-function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) - fun_name <- paste0(lhs_name, "::", rhs_name) + fun_name <- paste0(lhs_name, "::", rhs_name) - # if we do not have a binding for pkg::fun, then fall back on to the - # regular pkg::fun function - nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] - }) + # if we do not have a binding for pkg::fun, then fall back on to the + # regular pkg::fun function + nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] } diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index cde4936f9c7..2380b15b2df 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -159,20 +159,21 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm, min_count = 0L) ) }) - # we register 2 version of the "::" binding - one for use with nse_funcs - # and another one for use with agg_funcs (below) - register_binding_agg("::", function(lhs, rhs) { - lhs_name <- as.character(substitute(lhs)) - rhs_name <- as.character(substitute(rhs)) - - fun_name <- paste0(lhs_name, "::", rhs_name) - - # if we do not have a binding for pkg::fun, then fall back on to the - # nse_funcs (useful when we have a regular function inside an aggregating one) - # and then, if searching nse_funcs fails too, fall back to the - # regular `pkg::fun()` function - agg_funcs[[fun_name]] %||% nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] - }) +} + +# we register 2 version of the "::" binding - one for use with nse_funcs +# and another one for use with agg_funcs (below) +agg_funcs[["::"]]<- function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) + + fun_name <- paste0(lhs_name, "::", rhs_name) + + # if we do not have a binding for pkg::fun, then fall back on to the + # nse_funcs (useful when we have a regular function inside an aggregating one) + # and then, if searching nse_funcs fails too, fall back to the + # regular `pkg::fun()` function + agg_funcs[[fun_name]] %||% nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] } # The following S3 methods are registered on load if dplyr is present From 75f32463a2acbd4aece9fb6f78bf8909bb693739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 21:34:50 +0100 Subject: [PATCH 106/129] typo --- r/R/dplyr-funcs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index c6462e1413a..294bdbbef81 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -63,7 +63,7 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { previous_fun <- registry[[unqualified_name]] - # if th unqualified name exists in the register, warn + # if the unqualified name exists in the register, warn if (!is.null(fun) && !is.null(previous_fun)) { warn( paste0( From 4a3a8b74c8b1bdffa6a6c0f278be0bfc9af1ef15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 21:39:21 +0100 Subject: [PATCH 107/129] change to a `compare_dplyr_binding()` test for `dplyr::between()` --- r/tests/testthat/test-dplyr-filter.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 3a45c5fd420..ca21ca4ca3a 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -241,11 +241,11 @@ test_that("filter() with between()", { ) # with namespacing - expect_error( - tbl %>% - record_batch() %>% - filter(dplyr::between(chr, 1, 2)) %>% - collect() + compare_dplyr_binding( + .input %>% + filter(dplyr::between(dbl, 1, 2)) %>% + collect(), + tbl ) }) From d68d173f55d12d6cdbe6e4925bbe4c985537f1a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 22:30:43 +0100 Subject: [PATCH 108/129] shrink datetime unit tests --- r/tests/testthat/test-dplyr-funcs-datetime.R | 616 +++++-------------- 1 file changed, 148 insertions(+), 468 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index cd52b455241..edb393d9b56 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -71,7 +71,6 @@ test_that("strptime", { t_stamp_with_pm_tz ) - # with namespacing expect_equal( t_string %>% record_batch() %>% @@ -207,15 +206,10 @@ test_that("strftime", { compare_dplyr_binding( .input %>% - mutate(x = strftime(datetime, format = formats)) %>% - collect(), - times - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = base::strftime(datetime, format = formats)) %>% + mutate( + x = strftime(datetime, format = formats), + x2 = base::strftime(datetime, format = formats) + ) %>% collect(), times ) @@ -299,15 +293,18 @@ test_that("format_ISO8601", { compare_dplyr_binding( .input %>% - mutate(x = format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% + mutate( + x = format_ISO8601(x, precision = "ymd", usetz = FALSE) + ) %>% collect(), times ) - # with namespacing compare_dplyr_binding( .input %>% - mutate(x = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% + mutate( + x = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE) + ) %>% collect(), times ) @@ -365,37 +362,25 @@ test_that("format_ISO8601", { test_that("is.* functions from lubridate", { # make sure all true and at least one false value is considered - compare_dplyr_binding( - .input %>% - mutate(x = is.POSIXct(datetime), y = is.POSIXct(integer)) %>% - collect(), - test_df - ) - - # with namespacing compare_dplyr_binding( .input %>% mutate( - x = lubridate::is.POSIXct(datetime), - y = lubridate::is.POSIXct(integer) + x = is.POSIXct(datetime), + y = is.POSIXct(integer), + x2 = lubridate::is.POSIXct(datetime), + y2 = lubridate::is.POSIXct(integer) ) %>% collect(), test_df ) - compare_dplyr_binding( - .input %>% - mutate(x = is.Date(date), y = is.Date(integer)) %>% - collect(), - test_df - ) - - # with namespacing compare_dplyr_binding( .input %>% mutate( - x = lubridate::is.Date(date), - y = lubridate::is.Date(integer) + x = is.Date(date), + y = is.Date(integer), + x2 = lubridate::is.Date(date), + y2 = lubridate::is.Date(integer) ) %>% collect(), test_df @@ -417,19 +402,10 @@ test_that("is.* functions from lubridate", { mutate( x = is.timepoint(datetime), y = is.instant(date), - z = is.timepoint(integer) - ) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - x = lubridate::is.timepoint(datetime), - y = lubridate::is.instant(date), - z = lubridate::is.timepoint(integer) + z = is.timepoint(integer), + x2 = lubridate::is.timepoint(datetime), + y2 = lubridate::is.instant(date), + z2 = lubridate::is.timepoint(integer) ) %>% collect(), test_df @@ -459,15 +435,10 @@ test_that("extract isoyear from timestamp", { test_that("extract epiyear from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = epiyear(datetime)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::epiyear(datetime)) %>% + mutate( + x = epiyear(datetime), + x2 = lubridate::epiyear(datetime) + ) %>% collect(), test_df ) @@ -485,15 +456,10 @@ test_that("extract quarter from timestamp", { test_that("extract month from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = month(datetime)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::month(datetime)) %>% + mutate( + x = month(datetime), + x2 = lubridate::month(datetime) + ) %>% collect(), test_df ) @@ -519,15 +485,10 @@ test_that("extract month from timestamp", { test_that("extract isoweek from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = isoweek(datetime)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::isoweek(datetime)) %>% + mutate( + x = isoweek(datetime), + x2 = lubridate::isoweek(datetime) + ) %>% collect(), test_df ) @@ -545,15 +506,10 @@ test_that("extract epiweek from timestamp", { test_that("extract week from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = week(datetime)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::week(datetime)) %>% + mutate( + x = week(datetime), + x = lubridate::week(datetime) + ) %>% collect(), test_df ) @@ -619,15 +575,10 @@ test_that("extract mday from timestamp", { test_that("extract yday from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = yday(datetime)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::yday(datetime)) %>% + mutate( + x = yday(datetime), + x2 = lubridate::yday(datetime) + ) %>% collect(), test_df ) @@ -636,15 +587,10 @@ test_that("extract yday from timestamp", { test_that("extract hour from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = hour(datetime)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::hour(datetime)) %>% + mutate( + x = hour(datetime), + x2 = lubridate::hour(datetime) + ) %>% collect(), test_df ) @@ -653,15 +599,10 @@ test_that("extract hour from timestamp", { test_that("extract minute from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = minute(datetime)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::minute(datetime)) %>% + mutate( + x = minute(datetime), + x2 = lubridate::minute(datetime) + ) %>% collect(), test_df ) @@ -670,17 +611,10 @@ test_that("extract minute from timestamp", { test_that("extract second from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = second(datetime)) %>% - collect(), - test_df, - # arrow supports nanosecond resolution but lubridate does not - tolerance = 1e-6 - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::second(datetime)) %>% + mutate( + x = second(datetime), + x2 = lubridate::second(datetime) + ) %>% collect(), test_df, # arrow supports nanosecond resolution but lubridate does not @@ -693,15 +627,10 @@ test_that("extract second from timestamp", { test_that("extract year from date", { compare_dplyr_binding( .input %>% - mutate(x = year(date)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::year(date)) %>% + mutate( + x = year(date), + x2 = lubridate::year(date) + ) %>% collect(), test_df ) @@ -710,15 +639,10 @@ test_that("extract year from date", { test_that("extract isoyear from date", { compare_dplyr_binding( .input %>% - mutate(x = isoyear(date)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::isoyear(date)) %>% + mutate( + x = isoyear(date), + x2 = lubridate::isoyear(date) + ) %>% collect(), test_df ) @@ -736,15 +660,10 @@ test_that("extract epiyear from date", { test_that("extract quarter from date", { compare_dplyr_binding( .input %>% - mutate(x = quarter(date)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::quarter(date)) %>% + mutate( + x = quarter(date), + x2 = lubridate::quarter(date) + ) %>% collect(), test_df ) @@ -762,15 +681,10 @@ test_that("extract isoweek from date", { test_that("extract epiweek from date", { compare_dplyr_binding( .input %>% - mutate(x = epiweek(date)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::epiweek(date)) %>% + mutate( + x = epiweek(date), + x2 = lubridate::epiweek(date) + ) %>% collect(), test_df ) @@ -814,15 +728,10 @@ test_that("extract month from date", { test_that("extract day from date", { compare_dplyr_binding( .input %>% - mutate(x = day(date)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::day(date)) %>% + mutate( + x = day(date), + x2 = lubridate::day(date) + ) %>% collect(), test_df ) @@ -838,15 +747,10 @@ test_that("extract wday from date", { compare_dplyr_binding( .input %>% - mutate(x = wday(date, week_start = 3)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::wday(date, week_start = 3)) %>% + mutate( + x = wday(date, week_start = 3), + x2 = lubridate::wday(date, week_start = 3) + ) %>% collect(), test_df ) @@ -878,15 +782,10 @@ test_that("extract wday from date", { test_that("extract mday from date", { compare_dplyr_binding( .input %>% - mutate(x = mday(date)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::mday(date)) %>% + mutate( + x = mday(date), + x2 = lubridate::mday(date) + ) %>% collect(), test_df ) @@ -904,7 +803,10 @@ test_that("extract yday from date", { test_that("leap_year mirror lubridate", { compare_dplyr_binding( .input %>% - mutate(x = leap_year(date)) %>% + mutate( + x = leap_year(date), + x2 = lubridate::leap_year(date) + ) %>% collect(), test_df ) @@ -929,14 +831,6 @@ test_that("leap_year mirror lubridate", { )) ) ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = lubridate::leap_year(date)) %>% - collect(), - test_df - ) }) test_that("am/pm mirror lubridate", { @@ -944,27 +838,9 @@ test_that("am/pm mirror lubridate", { .input %>% mutate( am = am(test_time), - pm = pm(test_time) - ) %>% - collect(), - data.frame( - test_time = strptime( - x = c( - "2022-01-25 11:50:59", - "2022-01-25 12:00:00", - "2022-01-25 00:00:00" - ), - format = "%Y-%m-%d %H:%M:%S" - ) - ) - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - am = lubridate::am(test_time), - pm = lubridate::pm(test_time) + pm = pm(test_time), + am2 = lubridate::am(test_time), + pm2 = lubridate::pm(test_time) ) %>% collect(), data.frame( @@ -987,15 +863,10 @@ test_that("extract tz", { compare_dplyr_binding( .input %>% - mutate(timezone_posixct_date = tz(posixct_date)) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(timezone_posixct_date = lubridate::tz(posixct_date)) %>% + mutate( + timezone_posixct_date = tz(posixct_date), + timezone_posixct_date2 = lubridate::tz(posixct_date) + ) %>% collect(), df ) @@ -1038,23 +909,13 @@ test_that("semester works with temporal types and integers", { .input %>% mutate( sem_wo_year = semester(dates), + sem_wo_year2 = lubridate::semester(dates), sem_w_year = semester(dates, with_year = TRUE) ) %>% collect(), test_df ) - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - sem_wo_year = lubridate::semester(dates), - sem_w_year = lubridate::semester(dates, with_year = TRUE) - ) %>% - collect(), - test_df - ) - compare_dplyr_binding( .input %>% mutate(sem_month_as_int = semester(month_as_int)) %>% @@ -1079,15 +940,10 @@ test_that("dst extracts daylight savings time correctly", { compare_dplyr_binding( .input %>% - mutate(dst = dst(dates)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(dst = lubridate::dst(dates)) %>% + mutate( + dst = dst(dates), + dst2 = lubridate::dst(dates) + ) %>% collect(), test_df ) @@ -1258,15 +1114,10 @@ test_that("make_date & make_datetime", { compare_dplyr_binding( .input %>% - mutate(composed_date = make_date(year, month, day)) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(composed_date = lubridate::make_date(year, month, day)) %>% + mutate( + composed_date = make_date(year, month, day), + composed_date2 = lubridate::make_date(year, month, day) + ) %>% collect(), test_df ) @@ -1280,18 +1131,10 @@ test_that("make_date & make_datetime", { compare_dplyr_binding( .input %>% - mutate(composed_datetime = make_datetime(year, month, day, hour, min, sec)) %>% - collect(), - test_df, - # the make_datetime binding uses strptime which does not support tz, hence - # a mismatch in tzone attribute (ARROW-12820) - ignore_attr = TRUE - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(composed_datetime = lubridate::make_datetime(year, month, day, hour, min, sec)) %>% + mutate( + composed_datetime = make_datetime(year, month, day, hour, min, sec), + composed_datetime2 = lubridate::make_datetime(year, month, day, hour, min, sec) + ) %>% collect(), test_df, # the make_datetime binding uses strptime which does not support tz, hence @@ -1325,18 +1168,10 @@ test_that("ISO_datetime & ISOdate", { compare_dplyr_binding( .input %>% - mutate(composed_date = ISOdate(year, month, day)) %>% - collect(), - test_df, - # the make_datetime binding uses strptime which does not support tz, hence - # a mismatch in tzone attribute (ARROW-12820) - ignore_attr = TRUE - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(composed_date = base::ISOdate(year, month, day)) %>% + mutate( + composed_date = ISOdate(year, month, day), + composed_date2 = base::ISOdate(year, month, day) + ) %>% collect(), test_df, # the make_datetime binding uses strptime which does not support tz, hence @@ -1358,20 +1193,8 @@ test_that("ISO_datetime & ISOdate", { compare_dplyr_binding( .input %>% mutate( - composed_datetime = ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") - ) %>% - collect(), - test_df, - # the make_datetime binding uses strptime which does not support tz, hence - # a mismatch in tzone attribute (ARROW-12820) - ignore_attr = TRUE - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - composed_datetime = base::ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") + composed_datetime = ISOdatetime(year, month, day, hour, min, sec, tz = "UTC"), + composed_datetime2 = base::ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") ) %>% collect(), test_df, @@ -1407,17 +1230,7 @@ test_that("difftime()", { compare_dplyr_binding( .input %>% mutate( - secs2 = difftime(time1, time2, units = "secs") - ) %>% - collect(), - test_df, - ignore_attr = TRUE - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( + secs = difftime(time1, time2, units = "secs"), secs2 = base::difftime(time1, time2, units = "secs") ) %>% collect(), @@ -1490,15 +1303,10 @@ test_that("as.difftime()", { compare_dplyr_binding( .input %>% - mutate(hms_difftime = as.difftime(hms_string, units = "secs")) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(hms_difftime = base::as.difftime(hms_string, units = "secs")) %>% + mutate( + hms_difftime = as.difftime(hms_string, units = "secs"), + hms_difftime2 = base::as.difftime(hms_string, units = "secs") + ) %>% collect(), test_df ) @@ -1569,32 +1377,18 @@ test_that("`decimal_date()` and `date_decimal()`", { .input %>% mutate( decimal_date_from_POSIXct = decimal_date(b), + decimal_date_from_POSIXct2 = lubridate::decimal_date(b), decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")), decimal_date_from_r_date_obj = decimal_date(as.Date("2022-03-25")), decimal_date_from_date = decimal_date(c), date_from_decimal = date_decimal(a), + date_from_decimal2 = lubridate::date_decimal(a), date_from_decimal_r_obj = date_decimal(2022.178) ) %>% collect(), test_df, ignore_attr = "tzone" ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - decimal_date_from_POSIXct = lubridate::decimal_date(b), - decimal_date_from_r_POSIXct_obj = lubridate::decimal_date(as.POSIXct("2022-03-25 15:37:01")), - decimal_date_from_r_date_obj = lubridate::decimal_date(as.Date("2022-03-25")), - decimal_date_from_date = lubridate::decimal_date(c), - date_from_decimal = lubridate::date_decimal(a), - date_from_decimal_r_obj = lubridate::date_decimal(2022.178) - ) %>% - collect(), - test_df, - ignore_attr = "tzone" - ) }) test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { @@ -1643,23 +1437,13 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { r_obj_ddays = ddays(3), r_obj_dweeks = dweeks(4), r_obj_dmonths = dmonths(5), - r_obj_dyears = dyears(6) - ) %>% - collect(), - tibble(), - ignore_attr = TRUE - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - r_obj_dminutes = lubridate::dminutes(1), - r_obj_dhours = lubridate::dhours(2), - r_obj_ddays = lubridate::ddays(3), - r_obj_dweeks = lubridate::dweeks(4), - r_obj_dmonths = lubridate::dmonths(5), - r_obj_dyears = lubridate::dyears(6) + r_obj_dyears = dyears(6), + r_obj_dminutes2 = lubridate::dminutes(1), + r_obj_dhours2 = lubridate::dhours(2), + r_obj_ddays2 = lubridate::ddays(3), + r_obj_dweeks2 = lubridate::dweeks(4), + r_obj_dmonths2 = lubridate::dmonths(5), + r_obj_dyears2 = lubridate::dyears(6) ) %>% collect(), tibble(), @@ -1692,27 +1476,16 @@ test_that("dseconds, dmilliseconds, dmicroseconds, dnanoseconds, dpicoseconds", dmilliseconds = dmilliseconds(x), dmicroseconds = dmicroseconds(x), dnanoseconds = dnanoseconds(x), + dseconds2 = lubridate::dseconds(x), + dmilliseconds2 = lubridate::dmilliseconds(x), + dmicroseconds2 = lubridate::dmicroseconds(x), + dnanoseconds2 = lubridate::dnanoseconds(x), ) %>% collect(), example_d, ignore_attr = TRUE ) - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - dseconds = lubridate::dseconds(x), - dmilliseconds = lubridate::dmilliseconds(x), - dmicroseconds = lubridate::dmicroseconds(x), - dnanoseconds = lubridate::dnanoseconds(x), - ) %>% - collect(), - example_d, - ignore_attr = TRUE - ) - - compare_dplyr_binding( .input %>% mutate( @@ -1794,17 +1567,8 @@ test_that("make_difftime()", { day = 2, week = 4, units = "secs" - ) - ) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - duration_from_parts = lubridate::make_difftime( + ), + duration_from_parts2 = lubridate::make_difftime( second = seconds, minute = minutes, hour = hours, @@ -1889,6 +1653,7 @@ test_that("`as.Date()` and `as_date()`", { .input %>% mutate( date_dv1 = as.Date(date_var), + date_dv1_nmspc = base::as.Date(date_var), date_pv1 = as.Date(posixct_var), date_pv_tz1 = as.Date(posixct_var, tz = "Pacific/Marquesas"), date_utc1 = as.Date(dt_utc), @@ -1899,6 +1664,7 @@ test_that("`as.Date()` and `as_date()`", { date_int_origin1 = as.Date(integer_var, origin = "1970-01-03"), date_integerish1 = as.Date(integerish_var, origin = "1970-01-01"), date_dv2 = as_date(date_var), + date_dv2_nmspc = lubridate::as_date(date_var), date_pv2 = as_date(posixct_var), date_pv_tz2 = as_date(posixct_var, tz = "Pacific/Marquesas"), date_utc2 = as_date(dt_utc), @@ -1913,21 +1679,6 @@ test_that("`as.Date()` and `as_date()`", { test_df ) - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - date_dv1 = base::as.Date(date_var), - date_pv1 = base::as.Date(posixct_var), - date_pv_tz1 = base::as.Date(posixct_var, tz = "Pacific/Marquesas"), - date_dv2 = lubridate::as_date(date_var), - date_pv2 = lubridate::as_date(posixct_var), - date_pv_tz2 = lubridate::as_date(posixct_var, tz = "Pacific/Marquesas") - ) %>% - collect(), - test_df - ) - # we do not support multiple tryFormats # this is not a simple warning, therefore we cannot use compare_dplyr_binding() # with `warning = TRUE` @@ -2050,6 +1801,7 @@ test_that("`as_datetime()`", { .input %>% mutate( ddate = as_datetime(date), + ddate2 = lubridate::as_datetime(date), dchar_date_no_tz = as_datetime(char_date), dchar_date_with_tz = as_datetime(char_date, tz = "Pacific/Marquesas"), dint_date = as_datetime(int_date, origin = "1970-01-02"), @@ -2060,21 +1812,6 @@ test_that("`as_datetime()`", { test_df ) - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - ddate = lubridate::as_datetime(date), - dchar_date_no_tz = lubridate::as_datetime(char_date), - dchar_date_with_tz = lubridate::as_datetime(char_date, tz = "Pacific/Marquesas"), - dint_date = lubridate::as_datetime(int_date, origin = "1970-01-02"), - dintegerish_date = lubridate::as_datetime(integerish_date, origin = "1970-01-02"), - dintegerish_date2 = lubridate::as_datetime(integerish_date, origin = "1970-01-01") - ) %>% - collect(), - test_df - ) - # Arrow does not support conversion of double to date # the below should error with an error message originating in the C++ code expect_error( @@ -2096,6 +1833,7 @@ test_that("parse_date_time() works with year, month, and date components", { .input %>% mutate( parsed_date_ymd = parse_date_time(string_ymd, orders = "ymd"), + parsed_date_ymd2 = lubridate::parse_date_time(string_ymd, orders = "ymd"), parsed_date_dmy = parse_date_time(string_dmy, orders = "dmy"), parsed_date_mdy = parse_date_time(string_mdy, orders = "mdy") ) %>% @@ -2119,22 +1857,6 @@ test_that("parse_date_time() works with year, month, and date components", { ) ) - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - parsed_date_ymd = lubridate::parse_date_time(string_ymd, orders = "ymd"), - ) %>% - collect(), - tibble::tibble( - string_ymd = c( - "2021-09-1", "2021/09///2", "2021.09.03", "2021,09,4", "2021:09::5", - "2021 09 6", "21-09-07", "21/09/08", "21.09.9", "21,09,10", "21:09:11", - "20210912", "210913", NA - ) - ) - ) - # TODO(ARROW-16443): locale (affecting "%b% and "%B") does not work on Windows skip_on_os("windows") compare_dplyr_binding( @@ -2204,22 +1926,13 @@ test_that("year, month, day date/time parsers", { mdy_date = mdy(mdy_string), myd_date = myd(myd_string), dmy_date = dmy(dmy_string), - dym_date = dym(dym_string) - ) %>% - collect(), - test_df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - ymd_date = lubridate::ymd(ymd_string), - ydm_date = lubridate::ydm(ydm_string), - mdy_date = lubridate::mdy(mdy_string), - myd_date = lubridate::myd(myd_string), - dmy_date = lubridate::dmy(dmy_string), - dym_date = lubridate::dym(dym_string) + dym_date = dym(dym_string), + ymd_date2 = lubridate::ymd(ymd_string), + ydm_date2 = lubridate::ydm(ydm_string), + mdy_date2 = lubridate::mdy(mdy_string), + myd_date2 = lubridate::myd(myd_string), + dmy_date2 = lubridate::dmy(dmy_string), + dym_date2 = lubridate::dym(dym_string) ) %>% collect(), test_df @@ -2261,14 +1974,17 @@ test_that("ym, my & yq parsers", { .input %>% mutate( ym_date = ym(ym_string), + ym_date2 = lubridate::ym(ym_string), ym_datetime = ym(ym_string, tz = "Pacific/Marquesas"), Ym_date = ym(Ym_string), Ym_datetime = ym(Ym_string, tz = "Pacific/Marquesas"), my_date = my(my_string), + my_date2 = lubridate::my(my_string), my_datetime = my(my_string, tz = "Pacific/Marquesas"), mY_date = my(mY_string), mY_datetime = my(mY_string, tz = "Pacific/Marquesas"), yq_date_from_string = yq(yq_string), + yq_date_from_string2 = lubridate::yq(yq_string), yq_datetime_from_string = yq(yq_string, tz = "Pacific/Marquesas"), yq_date_from_numeric = yq(yq_numeric), yq_datetime_from_numeric = yq(yq_numeric, tz = "Pacific/Marquesas"), @@ -2297,50 +2013,14 @@ test_that("ym, my & yq parsers", { collect(), test_df ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - ym_date = lubridate::ym(ym_string), - ym_datetime = lubridate::ym(ym_string, tz = "Pacific/Marquesas"), - my_date = lubridate::my(my_string), - my_datetime = lubridate::my(my_string, tz = "Pacific/Marquesas"), - yq_date_from_string = lubridate::yq(yq_string), - yq_datetime_from_string = lubridate::yq(yq_string, tz = "Pacific/Marquesas") - ) %>% - collect(), - test_df - ) }) test_that("lubridate's fast_strptime", { compare_dplyr_binding( .input %>% mutate( - y = - fast_strptime( - x, - format = "%Y-%m-%d %H:%M:%S", - lt = FALSE - ) - ) %>% - collect(), - tibble( - x = c("2018-10-07 19:04:05", "2022-05-17 21:23:45", NA) - ) - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - y = - lubridate::fast_strptime( - x, - format = "%Y-%m-%d %H:%M:%S", - lt = FALSE - ) + y = fast_strptime(x, format = "%Y-%m-%d %H:%M:%S", lt = FALSE), + y2 = lubridate::fast_strptime(x, format = "%Y-%m-%d %H:%M:%S", lt = FALSE) ) %>% collect(), tibble( From 627b52037d733af65c3eaa7a9035ac175edbc244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 22:46:15 +0100 Subject: [PATCH 109/129] shrink type unit tests --- r/tests/testthat/test-dplyr-funcs-type.R | 264 ++++------------------- 1 file changed, 46 insertions(+), 218 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 1c8a5dd8bfb..3f274b97f7f 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -119,6 +119,10 @@ test_that("explicit type conversions with as.*()", { chr2dbl = as.double(chr), chr2int = as.integer(chr), chr2num = as.numeric(chr), + chr2chr2 = base::as.character(chr), + chr2dbl2 = base::as.double(chr), + chr2int2 = base::as.integer(chr), + chr2num2 = base::as.numeric(chr), rchr2chr = as.character("string"), rchr2dbl = as.double("1.5"), rchr2int = as.integer("1"), @@ -127,26 +131,11 @@ test_that("explicit type conversions with as.*()", { collect(), tibble(chr = c("1", "2", "3")) ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - chr2chr = base::as.character(chr), - chr2dbl = base::as.double(chr), - chr2int = base::as.integer(chr), - chr2num = base::as.numeric(chr), - rchr2chr = base::as.character("string"), - rchr2dbl = base::as.double("1.5"), - rchr2int = base::as.integer("1"), - rchr2num = base::as.numeric("1.5") - ) %>% - collect(), - tibble(chr = c("1", "2", "3")) - ) compare_dplyr_binding( .input %>% transmute( chr2i64 = as.integer64(chr), + chr2i64_nmspc = bit64::as.integer64(chr), dbl2i64 = as.integer64(dbl), i642i64 = as.integer64(i64), rchr2i64 = as.integer64("10000000000"), @@ -156,24 +145,11 @@ test_that("explicit type conversions with as.*()", { collect(), tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10)) ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - chr2i64 = bit64::as.integer64(chr), - dbl2i64 = bit64::as.integer64(dbl), - i642i64 = bit64::as.integer64(i64), - rchr2i64 = bit64::as.integer64("10000000000"), - rdbl2i64 = bit64::as.integer64(10000000000), - ri642i64 = bit64::as.integer64(as.integer64(1e10)) - ) %>% - collect(), - tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10)) - ) compare_dplyr_binding( .input %>% transmute( chr2lgl = as.logical(chr), + chr2lgl2 = base::as.logical(chr), dbl2lgl = as.logical(dbl), int2lgl = as.logical(int), rchr2lgl = as.logical("TRUE"), @@ -187,24 +163,6 @@ test_that("explicit type conversions with as.*()", { int = c(1L, 0L, -99L, 0L) ) ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - chr2lgl = base::as.logical(chr), - dbl2lgl = base::as.logical(dbl), - int2lgl = base::as.logical(int), - rchr2lgl = base::as.logical("TRUE"), - rdbl2lgl = base::as.logical(0), - rint2lgl = base::as.logical(1L) - ) %>% - collect(), - tibble( - chr = c("TRUE", "FALSE", "true", "false"), - dbl = c(1, 0, -99, 0), - int = c(1L, 0L, -99L, 0L) - ) - ) compare_dplyr_binding( .input %>% transmute( @@ -256,17 +214,9 @@ test_that("is.finite(), is.infinite(), is.nan()", { .input %>% transmute( is_fin = is.finite(x), - is_inf = is.infinite(x) - ) %>% - collect(), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - is_fin = base::is.finite(x), - is_inf = base::is.infinite(x) + is_inf = is.infinite(x), + is_fin2 = base::is.finite(x), + is_inf2 = base::is.infinite(x) ) %>% collect(), df @@ -275,16 +225,8 @@ test_that("is.finite(), is.infinite(), is.nan()", { compare_dplyr_binding( .input %>% transmute( - is_nan = is.nan(x) - ) %>% - collect(), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - is_nan = base::is.nan(x) + is_nan = is.nan(x), + is_nan2 = base::is.nan(x) ) %>% collect(), df @@ -296,16 +238,8 @@ test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", { compare_dplyr_binding( .input %>% transmute( - is_na = is.na(x) - ) %>% - collect(), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - is_na = base::is.na(x) + is_na = is.na(x), + is_na2 = base::is.na(x) ) %>% collect(), df @@ -377,6 +311,9 @@ test_that("type checks with is() giving Arrow types", { i32_is_i32 = is(i32, "int32"), i32_is_i64 = is(i32, "double"), i32_is_str = is(i32, "string"), + i32_is_i32_nmspc = methods::is(i32, "int32"), + i32_is_i64_nmspc = methods::is(i32, "double"), + i32_is_str_nmspc = methods::is(i32, "string"), f64_is_i32 = is(f64, "int32"), f64_is_i64 = is(f64, "double"), f64_is_str = is(f64, "string"), @@ -387,29 +324,7 @@ test_that("type checks with is() giving Arrow types", { collect() %>% t() %>% as.vector(), - c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) - ) - # with class2=string and namespacing - expect_equal( - Table$create( - i32 = Array$create(1, int32()), - f64 = Array$create(1.1, float64()), - str = Array$create("a", arrow::string()) - ) %>% transmute( - i32_is_i32 = methods::is(i32, "int32"), - i32_is_i64 = methods::is(i32, "double"), - i32_is_str = methods::is(i32, "string"), - f64_is_i32 = methods::is(f64, "int32"), - f64_is_i64 = methods::is(f64, "double"), - f64_is_str = methods::is(f64, "string"), - str_is_i32 = methods::is(str, "int32"), - str_is_i64 = methods::is(str, "double"), - str_is_str = methods::is(str, "string") - ) %>% - collect() %>% - t() %>% - as.vector(), - c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) + c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) ) # with class2=string alias expect_equal( @@ -542,6 +457,14 @@ test_that("type checks with is.*()", { chr_is_lst = is.list(chr), chr_is_lgl = is.logical(chr), chr_is_num = is.numeric(chr), + chr_is_chr2 = base::is.character(chr), + chr_is_dbl2 = base::is.double(chr), + chr_is_fct2 = base::is.factor(chr), + chr_is_int2 = base::is.integer(chr), + chr_is_i642 = bit64::is.integer64(chr), + chr_is_lst2 = base::is.list(chr), + chr_is_lgl2 = base::is.logical(chr), + chr_is_num2 = base::is.numeric(chr), dbl_is_chr = is.character(dbl), dbl_is_dbl = is.double(dbl), dbl_is_fct = is.factor(dbl), @@ -578,54 +501,6 @@ test_that("type checks with is.*()", { collect(), tbl ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - chr_is_chr = base::is.character(chr), - chr_is_dbl = base::is.double(chr), - chr_is_fct = base::is.factor(chr), - chr_is_int = base::is.integer(chr), - chr_is_i64 = bit64::is.integer64(chr), - chr_is_lst = base::is.list(chr), - chr_is_lgl = base::is.logical(chr), - chr_is_num = base::is.numeric(chr), - dbl_is_chr = base::is.character(dbl), - dbl_is_dbl = base::is.double(dbl), - dbl_is_fct = base::is.factor(dbl), - dbl_is_int = base::is.integer(dbl), - dbl_is_i64 = bit64::is.integer64(dbl), - dbl_is_lst = base::is.list(dbl), - dbl_is_lgl = base::is.logical(dbl), - dbl_is_num = base::is.numeric(dbl), - fct_is_chr = base::is.character(fct), - fct_is_dbl = base::is.double(fct), - fct_is_fct = base::is.factor(fct), - fct_is_int = base::is.integer(fct), - fct_is_i64 = bit64::is.integer64(fct), - fct_is_lst = base::is.list(fct), - fct_is_lgl = base::is.logical(fct), - fct_is_num = base::is.numeric(fct), - int_is_chr = base::is.character(int), - int_is_dbl = base::is.double(int), - int_is_fct = base::is.factor(int), - int_is_int = base::is.integer(int), - int_is_i64 = bit64::is.integer64(int), - int_is_lst = base::is.list(int), - int_is_lgl = base::is.logical(int), - int_is_num = base::is.numeric(int), - lgl_is_chr = base::is.character(lgl), - lgl_is_dbl = base::is.double(lgl), - lgl_is_fct = base::is.factor(lgl), - lgl_is_int = base::is.integer(lgl), - lgl_is_i64 = bit64::is.integer64(lgl), - lgl_is_lst = base::is.list(lgl), - lgl_is_lgl = base::is.logical(lgl), - lgl_is_num = base::is.numeric(lgl) - ) %>% - collect(), - tbl - ) compare_dplyr_binding( .input %>% transmute( @@ -666,6 +541,11 @@ test_that("type checks with is_*()", { chr_is_int = is_integer(chr), chr_is_lst = is_list(chr), chr_is_lgl = is_logical(chr), + chr_is_chr2 = rlang::is_character(chr), + chr_is_dbl2 = rlang::is_double(chr), + chr_is_int2 = rlang::is_integer(chr), + chr_is_lst2 = rlang::is_list(chr), + chr_is_lgl2 = rlang::is_logical(chr), dbl_is_chr = is_character(dbl), dbl_is_dbl = is_double(dbl), dbl_is_int = is_integer(dbl), @@ -685,34 +565,6 @@ test_that("type checks with is_*()", { collect(), tbl ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - chr_is_chr = rlang::is_character(chr), - chr_is_dbl = rlang::is_double(chr), - chr_is_int = rlang::is_integer(chr), - chr_is_lst = rlang::is_list(chr), - chr_is_lgl = rlang::is_logical(chr), - dbl_is_chr = rlang::is_character(dbl), - dbl_is_dbl = rlang::is_double(dbl), - dbl_is_int = rlang::is_integer(dbl), - dbl_is_lst = rlang::is_list(dbl), - dbl_is_lgl = rlang::is_logical(dbl), - int_is_chr = rlang::is_character(int), - int_is_dbl = rlang::is_double(int), - int_is_int = rlang::is_integer(int), - int_is_lst = rlang::is_list(int), - int_is_lgl = rlang::is_logical(int), - lgl_is_chr = rlang::is_character(lgl), - lgl_is_dbl = rlang::is_double(lgl), - lgl_is_int = rlang::is_integer(lgl), - lgl_is_lst = rlang::is_list(lgl), - lgl_is_lgl = rlang::is_logical(lgl) - ) %>% - collect(), - tbl - ) }) test_that("type checks on expressions", { @@ -774,15 +626,10 @@ test_that("as.factor()/dictionary_encode()", { compare_dplyr_binding( .input %>% - transmute(x = as.factor(x)) %>% - collect(), - df1 - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - transmute(x = base::as.factor(x)) %>% + transmute( + x = as.factor(x), + x2 = base::as.factor(x) + ) %>% collect(), df1 ) @@ -872,16 +719,8 @@ test_that("structs/nested data frames/tibbles can be created", { df_col = tibble( regular_col1 = regular_col1, regular_col2 = regular_col2 - ) - ) %>% - collect(), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - df_col = tibble::tibble( + ), + df_col2 = tibble::tibble( regular_col1 = regular_col1, regular_col2 = regular_col2 ) @@ -950,20 +789,14 @@ test_that("structs/nested data frames/tibbles can be created", { compare_dplyr_binding( .input %>% transmute( - df_col = data.frame(regular_col1, regular_col1, check.names = FALSE) - ) %>% - collect() %>% - mutate(df_col = as.data.frame(df_col)), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - df_col = base::data.frame(regular_col1, regular_col1, check.names = FALSE) + df_col = data.frame(regular_col1, regular_col1, check.names = FALSE), + df_col2 = base::data.frame(regular_col1, regular_col1, check.names = FALSE) ) %>% collect() %>% - mutate(df_col = as.data.frame(df_col)), + mutate( + df_col = as.data.frame(df_col), + df_col2 = as.data.frame(df_col2) + ), df ) @@ -1027,15 +860,10 @@ test_that("format date/time", { compare_dplyr_binding( .input %>% - mutate(x = format(datetime, format = formats)) %>% - collect(), - times - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = base::format(datetime, format = formats)) %>% + mutate( + x = format(datetime, format = formats), + x2 = base::format(datetime, format = formats) + ) %>% collect(), times ) From 0085cd50ffba90af1a85af31cc23e9289b8804c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 22:55:40 +0100 Subject: [PATCH 110/129] shrink summarise unit tests --- r/tests/testthat/test-dplyr-summarize.R | 153 +++++------------------- 1 file changed, 33 insertions(+), 120 deletions(-) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 3f945db94a8..c2207a1f273 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -103,16 +103,10 @@ test_that("Group by mean on dataset", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(mean = mean(int, na.rm = FALSE)) %>% - collect(), - tbl - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - group_by(some_grouping) %>% - summarize(mean = base::mean(int, na.rm = TRUE)) %>% + summarize( + mean = mean(int, na.rm = FALSE), + mean2 = base::mean(int, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -130,16 +124,10 @@ test_that("Group by sd on dataset", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(sd = sd(int, na.rm = FALSE)) %>% - collect(), - tbl - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - group_by(some_grouping) %>% - summarize(sd = stats::sd(int, na.rm = TRUE)) %>% + summarize( + sd = sd(int, na.rm = FALSE), + sd2 = stats::sd(int, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -157,16 +145,10 @@ test_that("Group by var on dataset", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(var = var(int, na.rm = FALSE)) %>% - collect(), - tbl - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - group_by(some_grouping) %>% - summarize(var = stats::var(int, na.rm = TRUE)) %>% + summarize( + var = var(int, na.rm = FALSE), + var2 = stats::var(int, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -183,17 +165,10 @@ test_that("n()", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(counts = n()) %>% - arrange(some_grouping) %>% - collect(), - tbl - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - group_by(some_grouping) %>% - summarize(counts = dplyr::n()) %>% + summarize( + counts = n(), + counts2 = dplyr::n() + ) %>% arrange(some_grouping) %>% collect(), tbl @@ -204,14 +179,20 @@ test_that("Group by any/all", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(any(lgl, na.rm = TRUE)) %>% + summarize( + any(lgl, na.rm = TRUE), + base::any(lgl, na.rm = TRUE) + ) %>% collect(), tbl ) compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(all(lgl, na.rm = TRUE)) %>% + summarize( + all(lgl, na.rm = TRUE), + base::all(lgl, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -253,22 +234,6 @@ test_that("Group by any/all", { collect(), tbl ) - - # with namespacing - compare_dplyr_binding( - .input %>% - group_by(some_grouping) %>% - summarize(base::any(lgl, na.rm = TRUE)) %>% - collect(), - tbl - ) - compare_dplyr_binding( - .input %>% - group_by(some_grouping) %>% - summarize(base::all(lgl, na.rm = TRUE)) %>% - collect(), - tbl - ) }) test_that("n_distinct() on dataset", { @@ -296,14 +261,10 @@ test_that("n_distinct() on dataset", { ) compare_dplyr_binding( .input %>% - summarize(distinct = n_distinct(lgl, na.rm = TRUE)) %>% - collect(), - tbl - ) - # with namespacing - compare_dplyr_binding( - .input %>% - summarize(distinct = dplyr::n_distinct(lgl, na.rm = TRUE)) %>% + summarize( + distinct = n_distinct(lgl, na.rm = TRUE), + distinct2 = dplyr::n_distinct(lgl, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -403,6 +364,8 @@ test_that("median()", { summarize( med_dbl = median(dbl), med_int = as.double(median(int)), + med_dbl2 = stats::median(dbl), + med_int2 = base::as.double(stats::median(int)), med_dbl_narmf = median(dbl, FALSE), med_int_narmf = as.double(median(int, na.rm = FALSE)) ) %>% @@ -414,22 +377,6 @@ test_that("median()", { ) }) -test_that("median() with namespacing", { - suppressWarnings( - compare_dplyr_binding( - .input %>% - summarize( - med_dbl_narmt = stats::median(dbl, na.rm = TRUE), - med_int_narmt = base::as.double(stats::median(int, TRUE)) - ) %>% - collect(), - tbl, - warning = "median\\(\\) currently returns an approximate median in Arrow" - ), - classes = "arrow.median.approximate" - ) -}) - test_that("quantile()", { # The default method for stats::quantile() throws an error when na.rm = FALSE # and the input contains NA or NaN, whereas the Arrow tdigest kernels return @@ -562,30 +509,6 @@ test_that("quantile() with namespacing", { ), classes = "arrow.quantile.approximate" ) - - # without groups - suppressWarnings( - expect_warning( - expect_equal( - tbl %>% - summarize( - q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE), - q_int = as.double( - quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE) - ) - ), - Table$create(tbl) %>% - summarize( - q_dbl = stats::quantile(dbl, probs = 0.5, na.rm = TRUE), - q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE)) - ) %>% - collect() - ), - "quantile() currently returns an approximate quantile in Arrow", - fixed = TRUE - ), - classes = "arrow.quantile.approximate" - ) }) test_that("summarize() with min() and max()", { @@ -620,7 +543,9 @@ test_that("summarize() with min() and max()", { select(int) %>% summarize( min_int = min(int, na.rm = TRUE), - max_int = max(int, na.rm = TRUE) + max_int = max(int, na.rm = TRUE), + min_int2 = base::min(int, na.rm = TRUE), + max_int2 = base::max(int, na.rm = TRUE) ) %>% collect(), tbl, @@ -665,18 +590,6 @@ test_that("summarize() with min() and max()", { collect(), tbl, ) - - # with namespacing - compare_dplyr_binding( - .input %>% - select(int) %>% - summarize( - min_int = base::min(int, na.rm = TRUE), - max_int = base::max(int, na.rm = TRUE) - ) %>% - collect(), - tbl, - ) }) test_that("min() and max() on character strings", { From 0c74e871073b804b092f0128c32b506b8cb58acf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 22:55:50 +0100 Subject: [PATCH 111/129] lints --- r/R/dplyr-funcs.R | 2 +- r/R/dplyr-summarize.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 294bdbbef81..f7610d82686 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -134,7 +134,7 @@ agg_funcs <- new.env(parent = emptyenv()) # we register 2 versions of the "::" binding - one for use with nse_funcs (below) # and another one for use with agg_funcs (in dplyr-summarize.R) -nse_funcs[["::"]] <-function(lhs, rhs) { +nse_funcs[["::"]] <- function(lhs, rhs) { lhs_name <- as.character(substitute(lhs)) rhs_name <- as.character(substitute(rhs)) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 2380b15b2df..9c8be1beacf 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -163,7 +163,7 @@ register_bindings_aggregate <- function() { # we register 2 version of the "::" binding - one for use with nse_funcs # and another one for use with agg_funcs (below) -agg_funcs[["::"]]<- function(lhs, rhs) { +agg_funcs[["::"]] <- function(lhs, rhs) { lhs_name <- as.character(substitute(lhs)) rhs_name <- as.character(substitute(rhs)) From 5368c39b2b44a04c90a2f7cc39f3f09f860da9f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 14 Jul 2022 22:57:14 +0100 Subject: [PATCH 112/129] shrink mutate unit tests --- r/tests/testthat/test-dplyr-mutate.R | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 86322a90590..2a79fc0c454 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -517,20 +517,11 @@ test_that("mutate and pmin/pmax", { max_val_1 = pmax(val1, val2, val3), max_val_2 = pmax(val1, val2, val3, na.rm = TRUE), min_val_1 = pmin(val1, val2, val3), - min_val_2 = pmin(val1, val2, val3, na.rm = TRUE) - ) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - max_val_1 = base::pmax(val1, val2, val3), - max_val_2 = base::pmax(val1, val2, val3, na.rm = TRUE), - min_val_1 = base::pmin(val1, val2, val3), - min_val_2 = base::pmin(val1, val2, val3, na.rm = TRUE) + min_val_2 = pmin(val1, val2, val3, na.rm = TRUE), + max_val_1_nmspc = base::pmax(val1, val2, val3), + max_val_2_nmspc = base::pmax(val1, val2, val3, na.rm = TRUE), + min_val_1_nmspc = base::pmin(val1, val2, val3), + min_val_2_nmspc = base::pmin(val1, val2, val3, na.rm = TRUE) ) %>% collect(), df From 62786ceb4a51c42c57b1dc2813cebe8bb37992fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 08:37:52 +0100 Subject: [PATCH 113/129] shrink string unit tests --- r/tests/testthat/test-dplyr-funcs-string.R | 245 ++++++--------------- 1 file changed, 67 insertions(+), 178 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 84a4d7c8718..27720fd3c3b 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -45,14 +45,10 @@ test_that("paste, paste0, and str_c", { # no NAs in data compare_dplyr_binding( .input %>% - transmute(paste(v, w)) %>% - collect(), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute(base::paste(v, w)) %>% + transmute( + a = paste(v, w), + a2 = base::paste(v, w) + ) %>% collect(), df ) @@ -64,27 +60,18 @@ test_that("paste, paste0, and str_c", { ) compare_dplyr_binding( .input %>% - transmute(paste0(v, w)) %>% - collect(), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute(base::paste0(v, w)) %>% + transmute( + a = paste0(v, w), + a2 = base::paste0(v, w)) %>% collect(), df ) compare_dplyr_binding( .input %>% - transmute(str_c(v, w)) %>% - collect(), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute(stringr::str_c(v, w)) %>% + transmute( + a = str_c(v, w), + a2 = stringr::str_c(v, w) + ) %>% collect(), df ) @@ -319,14 +306,10 @@ test_that("str_detect", { ) compare_dplyr_binding( .input %>% - transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% - collect(), - df - ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute(x = stringr::str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% + transmute( + a = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE)), + a2 = stringr::str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE)) + ) %>% collect(), df ) @@ -463,13 +446,19 @@ test_that("str_replace and str_replace_all", { ) compare_dplyr_binding( .input %>% - transmute(x = str_replace_all(x, fixed("o"), "u")) %>% + transmute( + x = str_replace_all(x, fixed("o"), "u"), + x2 = stringr::str_replace_all(x, fixed("o"), "u") + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - transmute(x = str_replace(x, fixed("O"), "u")) %>% + transmute( + x = str_replace(x, fixed("O"), "u"), + x2 = stringr::str_replace(x, fixed("O"), "u") + ) %>% collect(), df ) @@ -479,20 +468,6 @@ test_that("str_replace and str_replace_all", { collect(), df ) - - # with namespacing - compare_dplyr_binding( - .input %>% - transmute(x = stringr::str_replace_all(x, fixed("o"), "u")) %>% - collect(), - df - ) - compare_dplyr_binding( - .input %>% - transmute(x = stringr::str_replace(x, fixed("O"), "u")) %>% - collect(), - df - ) }) test_that("strsplit and str_split", { @@ -516,14 +491,20 @@ test_that("strsplit and str_split", { ) compare_dplyr_binding( .input %>% - mutate(x = strsplit(x, " +and +")) %>% + mutate( + a = strsplit(x, " +and +"), + a2 = base::strsplit(x, " +and +") + ) %>% collect(), df, ignore_attr = TRUE ) compare_dplyr_binding( .input %>% - mutate(x = str_split(x, "and")) %>% + mutate( + a = str_split(x, "and"), + a2 = stringr::str_split(x, "and") + ) %>% collect(), df, ignore_attr = TRUE @@ -556,22 +537,6 @@ test_that("strsplit and str_split", { df, ignore_attr = TRUE ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = base::strsplit(x, " +and +")) %>% - collect(), - df, - ignore_attr = TRUE - ) - compare_dplyr_binding( - .input %>% - mutate(x = stringr::str_split(x, "and")) %>% - collect(), - df, - ignore_attr = TRUE - ) }) test_that("strrep and str_dup", { @@ -600,19 +565,10 @@ test_that("str_to_lower, str_to_upper, and str_to_title", { transmute( x_lower = str_to_lower(x), x_upper = str_to_upper(x), - x_title = str_to_title(x) - ) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - x_lower = stringr::str_to_lower(x), - x_upper = stringr::str_to_upper(x), - x_title = stringr::str_to_title(x) + x_title = str_to_title(x), + x_lower_nmspc = stringr::str_to_lower(x), + x_upper_nmspc = stringr::str_to_upper(x), + x_title_nmspc = stringr::str_to_title(x) ) %>% collect(), df @@ -991,15 +947,10 @@ test_that("str_pad", { compare_dplyr_binding( .input %>% - mutate(x = str_pad(x, width = 31, side = "both")) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(x = stringr::str_pad(x, width = 31, side = "both")) %>% + mutate( + a = str_pad(x, width = 31, side = "both"), + a2 = stringr::str_pad(x, width = 31, side = "both") + ) %>% collect(), df ) @@ -1066,7 +1017,10 @@ test_that("substr", { compare_dplyr_binding( .input %>% - mutate(y = substr(x, -5, -1)) %>% + mutate( + y = substr(x, -5, -1), + y2 = base::substr(x, -5, -1) + ) %>% collect(), df ) @@ -1080,14 +1034,6 @@ test_that("substr", { call_binding("substr", "Apache Arrow", 1, c(2, 3)), "`stop` must be length 1 - other lengths are not supported in Arrow" ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(y = base::substr(x, -5, -1)) %>% - collect(), - df - ) }) test_that("substring", { @@ -1097,15 +1043,10 @@ test_that("substring", { compare_dplyr_binding( .input %>% - mutate(y = substring(x, 1, 6)) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(y = base::substring(x, 1, 6)) %>% + mutate( + y = substring(x, 1, 6), + y2 = base::substring(x, 1, 6) + ) %>% collect(), df ) @@ -1179,15 +1120,10 @@ test_that("str_sub", { compare_dplyr_binding( .input %>% - mutate(y = str_sub(x, -5, -1)) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(y = stringr::str_sub(x, -5, -1)) %>% + mutate( + y = str_sub(x, -5, -1), + y2 = stringr::str_sub(x, -5, -1) + ) %>% collect(), df ) @@ -1238,6 +1174,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { .input %>% transmute( a = str_starts(x, "b.*"), + a2 = stringr::str_starts(x, "b.*"), b = str_starts(x, "b.*", negate = TRUE), c = str_starts(x, fixed("b")), d = str_starts(x, fixed("b"), negate = TRUE) @@ -1246,19 +1183,6 @@ test_that("str_starts, str_ends, startsWith, endsWith", { df ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - a = stringr::str_starts(x, "b.*"), - b = stringr::str_starts(x, "b.*", negate = TRUE), - c = stringr::str_starts(x, fixed("b")), - d = stringr::str_starts(x, fixed("b"), negate = TRUE) - ) %>% - collect(), - df - ) - compare_dplyr_binding( .input %>% filter(str_ends(x, "r")) %>% @@ -1291,6 +1215,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { .input %>% transmute( a = str_ends(x, "r"), + a2 = stringr::str_ends(x, "r"), b = str_ends(x, "r", negate = TRUE), c = str_ends(x, fixed("r")), d = str_ends(x, fixed("r"), negate = TRUE) @@ -1299,19 +1224,6 @@ test_that("str_starts, str_ends, startsWith, endsWith", { df ) - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - a = stringr::str_ends(x, "r"), - b = stringr::str_ends(x, "r", negate = TRUE), - c = stringr::str_ends(x, fixed("r")), - d = stringr::str_ends(x, fixed("r"), negate = TRUE) - ) %>% - collect(), - df - ) - compare_dplyr_binding( .input %>% filter(startsWith(x, "b")) %>% @@ -1344,18 +1256,9 @@ test_that("str_starts, str_ends, startsWith, endsWith", { .input %>% transmute( a = startsWith(x, "b"), - b = endsWith(x, "r") - ) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - transmute( - a = base::startsWith(x, "b"), - b = base::endsWith(x, "r") + b = endsWith(x, "r"), + a2 = base::startsWith(x, "b"), + b2 = base::endsWith(x, "r") ) %>% collect(), df @@ -1370,15 +1273,10 @@ test_that("str_count", { compare_dplyr_binding( .input %>% - mutate(a_count = str_count(cities, pattern = "a")) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(a_count = stringr::str_count(cities, pattern = "a")) %>% + mutate( + a_count = str_count(cities, pattern = "a"), + a_count_nmspc = stringr::str_count(cities, pattern = "a") + ) %>% collect(), df ) @@ -1493,25 +1391,16 @@ test_that("nchar with namespacing", { ) }) -test_that("str_trim", { - compare_dplyr_binding( - .input %>% - mutate( - left_trimmed_padded_string = str_trim(padded_strings, "left"), - right_trimmed_padded_string = str_trim(padded_strings, "right"), - trimmed_padded_string = str_trim(padded_strings, "both") - ) %>% - collect(), - tbl - ) - - # with namespacing +test_that("str_trim()", { compare_dplyr_binding( .input %>% mutate( - left_trimmed_padded_string = stringr::str_trim(padded_strings, "left"), - right_trimmed_padded_string = stringr::str_trim(padded_strings, "right"), - trimmed_padded_string = stringr::str_trim(padded_strings, "both") + left_trim_padded_string = str_trim(padded_strings, "left"), + right_trim_padded_string = str_trim(padded_strings, "right"), + both_trim_padded_string = str_trim(padded_strings, "both"), + left_trim_padded_string_nmspc = stringr::str_trim(padded_strings, "left"), + right_trim_padded_string_nmspc = stringr::str_trim(padded_strings, "right"), + both_trim_padded_string_nmspc = stringr::str_trim(padded_strings, "both") ) %>% collect(), tbl From 0c7d59935b90fbdd71917644164a90f52a0d9436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 08:53:55 +0100 Subject: [PATCH 114/129] shrink math unit tests --- r/tests/testthat/test-dplyr-funcs-math.R | 98 ++++++------------------ 1 file changed, 24 insertions(+), 74 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index ae3ca5e10b5..5f7da452395 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -25,15 +25,9 @@ test_that("abs()", { compare_dplyr_binding( .input %>% - transmute(abs = abs(x)) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - transmute(abs = base::abs(x)) %>% + transmute( + abs = abs(x), + abs2 = base::abs(x)) %>% collect(), df ) @@ -44,15 +38,10 @@ test_that("sign()", { compare_dplyr_binding( .input %>% - transmute(sign = sign(x)) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - transmute(sign = base::sign(x)) %>% + transmute( + sign = sign(x), + sign2 = base::sign(x) + ) %>% collect(), df ) @@ -67,20 +56,11 @@ test_that("ceiling(), floor(), trunc(), round()", { c = ceiling(x), f = floor(x), t = trunc(x), - r = round(x) - ) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - c = base::ceiling(x), - f = base::floor(x), - t = base::trunc(x), - r = base::round(x) + r = round(x), + c2 = base::ceiling(x), + f2 = base::floor(x), + t2 = base::trunc(x), + r2 = base::round(x) ) %>% collect(), df @@ -170,15 +150,10 @@ test_that("log functions", { compare_dplyr_binding( .input %>% - mutate(y = log(x)) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(y = base::log(x)) %>% + mutate( + y = log(x), + y2 = base::log(x) + ) %>% collect(), df ) @@ -265,14 +240,6 @@ test_that("log functions", { df ) - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(y = base::logb(x)) %>% - collect(), - df - ) - compare_dplyr_binding( .input %>% mutate(y = log1p(x)) %>% @@ -295,13 +262,6 @@ test_that("log functions", { ) # with namespacing - compare_dplyr_binding( - .input %>% - mutate(a = base::log(x, base = y)) %>% - collect(), - tibble(x = 10, y = 1) - ) - compare_dplyr_binding( .input %>% mutate( @@ -415,15 +375,10 @@ test_that("exp()", { compare_dplyr_binding( .input %>% - mutate(y = exp(x)) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(y = base::exp(x)) %>% + mutate( + y = exp(x), + y2 = base::exp(x) + ) %>% collect(), df ) @@ -434,15 +389,10 @@ test_that("sqrt()", { compare_dplyr_binding( .input %>% - mutate(y = sqrt(x)) %>% - collect(), - df - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate(y = base::sqrt(x)) %>% + mutate( + y = sqrt(x), + y2 = base::sqrt(x) + ) %>% collect(), df ) From f6ba7b42dd1d209106d7ae73c9097d760b7c0fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 09:00:51 +0100 Subject: [PATCH 115/129] removed empty line --- r/tests/testthat/test-dplyr-group-by.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index 3484e45ebc2..08d6a77d3d1 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -167,5 +167,4 @@ test_that("group_by() with namespaced functions", { collect(), tbl ) - }) From e8fcc5b438c658e1b1dddcfbc2824131627824fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 09:01:04 +0100 Subject: [PATCH 116/129] trim string test file --- r/tests/testthat/test-dplyr-funcs-string.R | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 27720fd3c3b..423fe1ccd8e 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -1333,7 +1333,9 @@ test_that("base::tolower and base::toupper", { .input %>% mutate( verse_to_upper = toupper(verses), - verse_to_lower = tolower(verses) + verse_to_lower = tolower(verses), + verse_to_upper_nmspc = base::toupper(verses), + verse_to_lower_nmspc = base::tolower(verses) ) %>% collect(), tbl @@ -1352,17 +1354,6 @@ test_that("namespaced unary and binary string functions", { tbl ) - # base::tolower and base::toupper - compare_dplyr_binding( - .input %>% - mutate( - verse_to_upper = base::toupper(verses), - verse_to_lower = base::tolower(verses) - ) %>% - collect(), - tbl - ) - # stringr::str_dup and base::strrep df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!")) for (times in 0:8) { From 4413eab699148c08ebc6316a7b283f773fe08810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 09:01:19 +0100 Subject: [PATCH 117/129] trim conditional unit tests file --- .../testthat/test-dplyr-funcs-conditional.R | 26 +++---------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index ce7543b8050..4898d1e9e3e 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -29,7 +29,8 @@ test_that("if_else and ifelse", { compare_dplyr_binding( .input %>% mutate( - y = if_else(int > 5, 1, 0) + y = if_else(int > 5, 1, 0), + y2 = dplyr::if_else(int > 6, 1, 0) ) %>% collect(), tbl @@ -44,16 +45,6 @@ test_that("if_else and ifelse", { tbl ) - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - y = dplyr::if_else(int > 5, 1, 0) - ) %>% - collect(), - tbl - ) - expect_error( Table$create(tbl) %>% mutate( @@ -75,17 +66,8 @@ test_that("if_else and ifelse", { compare_dplyr_binding( .input %>% mutate( - y = ifelse(int > 5, 1, 0) - ) %>% - collect(), - tbl - ) - - # with namespacing - compare_dplyr_binding( - .input %>% - mutate( - y = base::ifelse(int > 5, 1, 0) + y = ifelse(int > 5, 1, 0), + y2 = base::ifelse(int > 6, 1, 0) ) %>% collect(), tbl From 9b93ff51b21c53542741abb2168daec981622256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 09:16:19 +0100 Subject: [PATCH 118/129] improved comments --- r/R/dplyr-funcs.R | 5 +++-- r/R/dplyr-summarize.R | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index f7610d82686..f0549a0af09 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -132,8 +132,9 @@ nse_funcs <- new.env(parent = emptyenv()) agg_funcs <- new.env(parent = emptyenv()) .cache <- new.env(parent = emptyenv()) -# we register 2 versions of the "::" binding - one for use with nse_funcs (below) -# and another one for use with agg_funcs (in dplyr-summarize.R) +# we register 2 versions of the "::" binding - one for use with nse_funcs +# (registered below) and another one for use with agg_funcs (registered in +# dplyr-summarize.R) nse_funcs[["::"]] <- function(lhs, rhs) { lhs_name <- as.character(substitute(lhs)) rhs_name <- as.character(substitute(rhs)) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 9c8be1beacf..cd7154f8d94 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -161,8 +161,9 @@ register_bindings_aggregate <- function() { }) } -# we register 2 version of the "::" binding - one for use with nse_funcs -# and another one for use with agg_funcs (below) +# we register 2 versions of the "::" binding - one for use with agg_funcs +# (registered below) and another one for use with nse_funcs +# (registered in dplyr-funcs.R) agg_funcs[["::"]] <- function(lhs, rhs) { lhs_name <- as.character(substitute(lhs)) rhs_name <- as.character(substitute(rhs)) From 0b54a281183dc6590b3e6bfaec76747d72cbdf8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 09:41:09 +0100 Subject: [PATCH 119/129] remove the ability to remove a binding + typo --- r/R/dplyr-funcs.R | 11 ++++------- r/tests/testthat/test-dplyr-funcs.R | 3 +-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index f0549a0af09..5c3399e8fae 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -63,21 +63,18 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { previous_fun <- registry[[unqualified_name]] - # if the unqualified name exists in the register, warn + # if the unqualified name exists in the registry, warn if (!is.null(fun) && !is.null(previous_fun)) { warn( paste0( "A \"", unqualified_name, - "\" binding already exists in the register and will be overwritten.") + "\" binding already exists in the registry and will be overwritten.") ) } - # if fun is NULL remove entries from the function registry - if (is.null(fun) && !is.null(previous_fun)) { - rm(list = c(unqualified_name, qualified_name), envir = registry, inherits = FALSE) - # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed - } else if (grepl("::", qualified_name) && qualified_name != "::") { + # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed + if (grepl("::", qualified_name) && qualified_name != "::") { registry[[unqualified_name]] <- fun registry[[qualified_name]] <- fun } else { diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index 95b2e87b9ca..2156ad9af06 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -25,7 +25,6 @@ test_that("register_binding() works", { expect_identical(fake_registry$`some.pkg::some_fun`, fun1) expect_identical(register_binding("some.pkg::some_fun", NULL, fake_registry), fun1) - expect_false("some_fun" %in% names(fake_registry)) expect_silent(expect_null(register_binding("some.pkg::some_fun", NULL, fake_registry))) expect_null(register_binding("somePkg::some_fun", fun1, fake_registry)) @@ -33,7 +32,7 @@ test_that("register_binding() works", { expect_warning( register_binding("some.pkg2::some_fun", fun2, fake_registry), - "A \"some_fun\" binding already exists in the register and will be overwritten." + "A \"some_fun\" binding already exists in the registry and will be overwritten." ) }) From 379589fd0591bd23290093bc65fb762d13dc7064 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 10:28:26 +0100 Subject: [PATCH 120/129] removed duplicate test --- r/tests/testthat/test-dplyr-filter.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index ca21ca4ca3a..aed46d801ce 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -239,14 +239,6 @@ test_that("filter() with between()", { filter(between(chr, 1, 2)) %>% collect() ) - - # with namespacing - compare_dplyr_binding( - .input %>% - filter(dplyr::between(dbl, 1, 2)) %>% - collect(), - tbl - ) }) test_that("filter() with string ops", { From aba65644bee09c6016c33518af4443308d4ce541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 10:34:57 +0100 Subject: [PATCH 121/129] some more trimming --- r/tests/testthat/test-dplyr-funcs-datetime.R | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index edb393d9b56..f0543736404 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -294,16 +294,8 @@ test_that("format_ISO8601", { compare_dplyr_binding( .input %>% mutate( - x = format_ISO8601(x, precision = "ymd", usetz = FALSE) - ) %>% - collect(), - times - ) - - compare_dplyr_binding( - .input %>% - mutate( - x = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE) + a = format_ISO8601(x, precision = "ymd", usetz = FALSE), + a2 = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE) ) %>% collect(), times @@ -367,8 +359,7 @@ test_that("is.* functions from lubridate", { mutate( x = is.POSIXct(datetime), y = is.POSIXct(integer), - x2 = lubridate::is.POSIXct(datetime), - y2 = lubridate::is.POSIXct(integer) + x2 = lubridate::is.POSIXct(datetime) ) %>% collect(), test_df @@ -379,8 +370,7 @@ test_that("is.* functions from lubridate", { mutate( x = is.Date(date), y = is.Date(integer), - x2 = lubridate::is.Date(date), - y2 = lubridate::is.Date(integer) + x2 = lubridate::is.Date(date) ) %>% collect(), test_df @@ -508,7 +498,7 @@ test_that("extract week from timestamp", { .input %>% mutate( x = week(datetime), - x = lubridate::week(datetime) + x2 = lubridate::week(datetime) ) %>% collect(), test_df From 642c23ee865140bdbdee7e416558aecc580f8500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 11:47:02 +0100 Subject: [PATCH 122/129] simplify `wrap_hash_quantile()` --- r/R/dplyr-summarize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index cd7154f8d94..92587f6c685 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -480,7 +480,7 @@ wrap_hash_quantile <- function(expr) { if (length(expr) == 1) { return(expr) } else { - if (is.call(expr) && (expr[[1]] == quote(quantile) || expr[[1]] == quote(stats::quantile))) { + if (is.call(expr) && any(c(quote(quantile), quote(stats::quantile)) == expr[[1]])) { return(str2lang(paste0("arrow_list_element(", deparse1(expr), ", 0L)"))) } else { return(as.call(lapply(expr, wrap_hash_quantile))) From bce4c698a6c76378e0375cc41d9079c8184b8c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 13:51:23 +0100 Subject: [PATCH 123/129] remove duplicate helper data --- r/tests/testthat/test-dplyr-funcs-string.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 423fe1ccd8e..7260b3946d1 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -23,14 +23,6 @@ library(lubridate) library(stringr) library(stringi) -tbl <- example_data -# Add some better string data -tbl$verses <- verses[[1]] -# c(" a ", " b ", " c ", ...) increasing padding -# nchar = 3 5 7 9 11 13 15 17 19 21 -tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") -tbl$some_grouping <- rep(c(1, 2), 5) - test_that("paste, paste0, and str_c", { df <- tibble( v = c("A", "B", "C"), From fb7002e18ae1fff21cb3e29286a81f3a998b2c6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 13:51:45 +0100 Subject: [PATCH 124/129] trimmed condition for binding registration --- r/R/dplyr-funcs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 5c3399e8fae..313116606fc 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -74,7 +74,7 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { } # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed - if (grepl("::", qualified_name) && qualified_name != "::") { + if (grepl("::", qualified_name)) { registry[[unqualified_name]] <- fun registry[[qualified_name]] <- fun } else { From cbf6401b5aca66d3a1d01e035df4004c935df44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 14:11:02 +0100 Subject: [PATCH 125/129] simplify `register_binding()` further --- r/R/dplyr-funcs.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 313116606fc..8914900d80d 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -58,8 +58,7 @@ NULL #' @keywords internal #' register_binding <- function(fun_name, fun, registry = nse_funcs) { - qualified_name <- fun_name - unqualified_name <- gsub("^.*?::", "", qualified_name) + unqualified_name <- sub("^.*?::", "", fun_name) previous_fun <- registry[[unqualified_name]] @@ -74,9 +73,9 @@ register_binding <- function(fun_name, fun, registry = nse_funcs) { } # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed - if (grepl("::", qualified_name)) { + if (grepl("::", fun_name)) { registry[[unqualified_name]] <- fun - registry[[qualified_name]] <- fun + registry[[fun_name]] <- fun } else { registry[[unqualified_name]] <- fun } From 5f468f61b451090016731c65a52d9b610765344e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 14:38:42 +0100 Subject: [PATCH 126/129] use `sub()` with `{+}` --- r/R/dplyr-funcs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 8914900d80d..7c4ed99e2ed 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -58,7 +58,7 @@ NULL #' @keywords internal #' register_binding <- function(fun_name, fun, registry = nse_funcs) { - unqualified_name <- sub("^.*?::", "", fun_name) + unqualified_name <- sub("^.*?:{+}", "", fun_name) previous_fun <- registry[[unqualified_name]] From d6b760154f7bd399e483a878745cb24d4bc39877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 15:08:31 +0100 Subject: [PATCH 127/129] test `transmute()` defusing the dots with `stringr::str_squish()` --- r/tests/testthat/test-dplyr-mutate.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 2a79fc0c454..66e3b4edf0d 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -140,6 +140,20 @@ test_that("transmute() with unsupported arguments", { ) }) +test_that("transmute() defuses dots arguments (ARROW-13262)", { + expect_warning( + tbl %>% + Table$create() %>% + transmute( + a = stringr::str_c(padded_strings, padded_strings), + b = stringr::str_squish(a) + ) %>% + collect(), + "Expression stringr::str_squish(a) not supported in Arrow; pulling data into R", + fixed = TRUE + ) +}) + test_that("mutate and refer to previous mutants", { compare_dplyr_binding( .input %>% From 334c81095bfed3328632482b0d1dc8de61987c21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 15:18:29 +0100 Subject: [PATCH 128/129] lint --- r/tests/testthat/test-dplyr-glimpse.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-glimpse.R b/r/tests/testthat/test-dplyr-glimpse.R index 9deb9087b17..c93273bdeef 100644 --- a/r/tests/testthat/test-dplyr-glimpse.R +++ b/r/tests/testthat/test-dplyr-glimpse.R @@ -17,7 +17,7 @@ # The glimpse output for tests with `example_data` is different on R < 3.6 # because the `lgl` column is generated with `sample()` and the RNG -# algorithm is different in older R versions. +# algorithm is different in older R versions. skip_on_r_older_than("3.6") library(dplyr, warn.conflicts = FALSE) From ac316ee0d748a73114ff9166edd3c045d1c210da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 15 Jul 2022 15:52:45 +0100 Subject: [PATCH 129/129] add `tbl` back in --- r/tests/testthat/test-dplyr-funcs-string.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 7260b3946d1..423fe1ccd8e 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -23,6 +23,14 @@ library(lubridate) library(stringr) library(stringi) +tbl <- example_data +# Add some better string data +tbl$verses <- verses[[1]] +# c(" a ", " b ", " c ", ...) increasing padding +# nchar = 3 5 7 9 11 13 15 17 19 21 +tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") +tbl$some_grouping <- rep(c(1, 2), 5) + test_that("paste, paste0, and str_c", { df <- tibble( v = c("A", "B", "C"),