From 2722e0f007db879ea8b50574221fed712193f59d Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 29 Sep 2022 22:33:39 +0100 Subject: [PATCH 01/15] Add comment, rebuild NAMESPACE file --- r/NAMESPACE | 3 +++ 1 file changed, 3 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index 8b08b940b36..7395b64594c 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -443,6 +443,9 @@ importFrom(rlang,list2) importFrom(rlang,new_data_mask) importFrom(rlang,new_environment) importFrom(rlang,new_quosure) +importFrom(rlang,node_car) +importFrom(rlang,node_cdr) +importFrom(rlang,node_poke_car) importFrom(rlang,parse_expr) importFrom(rlang,quo) importFrom(rlang,quo_get_env) From efa908f2ddb7d7ab8e606c4a2d1e85918b251071 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 21 Sep 2022 09:30:10 +0100 Subject: [PATCH 02/15] Add tests for filter and across --- r/tests/testthat/test-dplyr-filter.R | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index f94450a0257..140e8f11a2f 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -417,3 +417,29 @@ test_that("filter() with namespaced functions", { tbl ) }) + +test_that("filter() with across()", { + + # ARROW-17366: purrr-style lambda functions not yet supported + compare_dplyr_binding( + .input %>% + filter(across(everything(), ~ !is.na(.))) %>% + collect(), + tbl + ) + + compare_dplyr_binding( + .input %>% + filter(if_any(ends_with("l"), ~ is.na(.))) %>% + collect(), + tbl + ) + + compare_dplyr_binding( + .input %>% + filter(if_all(ends_with("l"), ~ !is.na(.))) %>% + collect(), + tbl + ) + +}) From 43523caf3c90ec6447245a701b9ecf95fe9f57ac Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 30 Sep 2022 09:34:05 +0100 Subject: [PATCH 03/15] Update across() to work with if_any() and if_all() and add tests --- r/R/dplyr-across.R | 32 +++++++++++++++++++++++++++- r/R/dplyr-filter.R | 2 +- r/tests/testthat/test-dplyr-filter.R | 3 +-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-across.R b/r/R/dplyr-across.R index 6550978d6f6..fd181104c62 100644 --- a/r/R/dplyr-across.R +++ b/r/R/dplyr-across.R @@ -23,7 +23,7 @@ expand_across <- function(.data, quos_in) { quo_expr <- quo_get_expr(quo_in[[1]]) quo_env <- quo_get_env(quo_in[[1]]) - if (is_call(quo_expr, "across")) { + if (is_call(quo_expr, "across") | is_call(quo_expr, "if_any") | is_call(quo_expr, "if_all")) { new_quos <- list() across_call <- match.call( @@ -60,9 +60,39 @@ expand_across <- function(.data, quos_in) { } } + if (is_call(quo_expr, "if_any")) { + quos_out <- list(reduce(quos_out, combine_if, op = "|")) + } + + if (is_call(quo_expr, "if_all")) { + quos_out <- list(reduce(quos_out, combine_if, op = "&")) + } + quos_out } +# takes multiple expressions and combines them with & or | +combine_if <- function(lhs, rhs, op){ + + expr_text <- paste( + expr_text(quo_get_expr(lhs)), + expr_text(quo_get_expr(rhs)), + sep = paste0(" ", op, " ") + ) + + expr <- parse_expr(expr_text) + quo(!!expr) + +} + +if_any <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { + +} + +if_all <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { + +} + # given a named list of functions and column names, create a list of new quosures quosures_from_setup <- function(setup, quo_env) { if (!is.null(setup$fns)) { diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R index 7db68b43e93..1ef2b6d7e58 100644 --- a/r/R/dplyr-filter.R +++ b/r/R/dplyr-filter.R @@ -20,7 +20,7 @@ filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) { # TODO something with the .preserve argument - filts <- quos(...) + filts <- expand_across(.data, quos(...)) if (length(filts) == 0) { # Nothing to do return(.data) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 140e8f11a2f..b44173498c9 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -420,7 +420,6 @@ test_that("filter() with namespaced functions", { test_that("filter() with across()", { - # ARROW-17366: purrr-style lambda functions not yet supported compare_dplyr_binding( .input %>% filter(across(everything(), ~ !is.na(.))) %>% @@ -437,7 +436,7 @@ test_that("filter() with across()", { compare_dplyr_binding( .input %>% - filter(if_all(ends_with("l"), ~ !is.na(.))) %>% + filter(if_all(everything(), ~ !is.na(.))) %>% collect(), tbl ) From e3f9b44fc9d6fc27fa9a0fbfd55cf94aea8b6da5 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 30 Sep 2022 09:38:52 +0100 Subject: [PATCH 04/15] Vectorise call to is_call --- r/R/dplyr-across.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-across.R b/r/R/dplyr-across.R index fd181104c62..adcb0140eae 100644 --- a/r/R/dplyr-across.R +++ b/r/R/dplyr-across.R @@ -23,7 +23,7 @@ expand_across <- function(.data, quos_in) { quo_expr <- quo_get_expr(quo_in[[1]]) quo_env <- quo_get_env(quo_in[[1]]) - if (is_call(quo_expr, "across") | is_call(quo_expr, "if_any") | is_call(quo_expr, "if_all")) { + if (is_call(quo_expr, c("across", "if_any", "if_all"))) { new_quos <- list() across_call <- match.call( From fdfc27c1a5f7141fae670d51652f5f875f12948f Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 30 Sep 2022 11:14:24 +0100 Subject: [PATCH 05/15] Add tests for if_all and if_any, and update tests to make sure we're comparing quosures with quosures --- r/tests/testthat/helper-expectation.R | 2 +- r/tests/testthat/test-dplyr-across.R | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index 17e57dd4194..2bb0bf205e9 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -323,5 +323,5 @@ split_vector_as_list <- function(vec) { } expect_across_equal <- function(across_expr, expected, tbl) { - expect_identical(expand_across(as_adq(tbl), across_expr), as.list(expected)) + expect_identical(expand_across(as_adq(tbl), across_expr), new_quosures(expected)) } diff --git a/r/tests/testthat/test-dplyr-across.R b/r/tests/testthat/test-dplyr-across.R index d622351a28c..d83c85d3cce 100644 --- a/r/tests/testthat/test-dplyr-across.R +++ b/r/tests/testthat/test-dplyr-across.R @@ -278,3 +278,25 @@ test_that("ARROW-14071 - function(x)-style lambda functions are not supported", regexp = "Anonymous functions are not yet supported in Arrow" ) }) + +test_that("if_all() and if_any() are supported", { + + expect_across_equal( + quos(if_any(everything(), ~is.na(.x))), + quos(is.na(int) | is.na(dbl) | is.na(dbl2) | is.na(lgl) | is.na(false) | is.na(chr) | is.na(fct)), + example_data + ) + + expect_across_equal( + quos(if_all(everything(), ~is.na(.x))), + quos(is.na(int) & is.na(dbl) & is.na(dbl2) & is.na(lgl) & is.na(false) & is.na(chr) & is.na(fct)), + example_data + ) + + expect_across_equal( + quos(if_all(everything(), ~is.na(.x))), + quos(is.na(int) & is.na(dbl) & is.na(dbl2) & is.na(lgl) & is.na(false) & is.na(chr) & is.na(fct)), + example_data + ) + +}) From f4bf802d4fb617cc678b72f1b7a84a6ec49e1e42 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 30 Sep 2022 11:14:45 +0100 Subject: [PATCH 06/15] Add more complex test and delete test of deprecated behaviour --- r/tests/testthat/test-dplyr-filter.R | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index b44173498c9..81a9ba3f6e5 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -420,13 +420,6 @@ test_that("filter() with namespaced functions", { test_that("filter() with across()", { - compare_dplyr_binding( - .input %>% - filter(across(everything(), ~ !is.na(.))) %>% - collect(), - tbl - ) - compare_dplyr_binding( .input %>% filter(if_any(ends_with("l"), ~ is.na(.))) %>% @@ -436,7 +429,11 @@ test_that("filter() with across()", { compare_dplyr_binding( .input %>% - filter(if_all(everything(), ~ !is.na(.))) %>% + filter( + false == FALSE, + if_all(everything(), ~ !is.na(.)), + int > 2 + ) %>% collect(), tbl ) From 998b397aa591da62b7cd42be9b6d2a9f2da39922 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 30 Sep 2022 11:15:22 +0100 Subject: [PATCH 07/15] Add environment to combined expressions --- r/R/dplyr-across.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/r/R/dplyr-across.R b/r/R/dplyr-across.R index adcb0140eae..cd11cf09c15 100644 --- a/r/R/dplyr-across.R +++ b/r/R/dplyr-across.R @@ -61,18 +61,18 @@ expand_across <- function(.data, quos_in) { } if (is_call(quo_expr, "if_any")) { - quos_out <- list(reduce(quos_out, combine_if, op = "|")) + quos_out <- append(list(), reduce(quos_out, combine_if, op = "|", envir = quo_get_env(quos_out[[1]]))) } if (is_call(quo_expr, "if_all")) { - quos_out <- list(reduce(quos_out, combine_if, op = "&")) + quos_out <- append(list(), reduce(quos_out, combine_if, op = "&", envir = quo_get_env(quos_out[[1]]))) } - quos_out + new_quosures(quos_out) } # takes multiple expressions and combines them with & or | -combine_if <- function(lhs, rhs, op){ +combine_if <- function(lhs, rhs, op, envir){ expr_text <- paste( expr_text(quo_get_expr(lhs)), @@ -81,7 +81,8 @@ combine_if <- function(lhs, rhs, op){ ) expr <- parse_expr(expr_text) - quo(!!expr) + + new_quosure(expr, envir) } From e9aae118f26750eb7119bba01a0b4de929fd4a9b Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 30 Sep 2022 13:11:21 +0100 Subject: [PATCH 08/15] Update docs --- r/NAMESPACE | 1 + r/R/arrow-package.R | 1 + r/data-raw/docgen.R | 10 +++++++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 7395b64594c..cb76aa37f66 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -443,6 +443,7 @@ importFrom(rlang,list2) importFrom(rlang,new_data_mask) importFrom(rlang,new_environment) importFrom(rlang,new_quosure) +importFrom(rlang,new_quosures) importFrom(rlang,node_car) importFrom(rlang,node_cdr) importFrom(rlang,node_poke_car) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 298aa94f52b..13d217cec2f 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -25,6 +25,7 @@ #' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure #' @importFrom rlang is_list call2 is_empty as_function as_label arg_match is_symbol is_call call_args #' @importFrom rlang quo_set_env quo_get_env is_formula quo_is_call f_rhs parse_expr f_env new_quosure +#' @importFrom rlang new_quosures #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select #' @importFrom glue glue #' @useDynLib arrow, .registration = TRUE diff --git a/r/data-raw/docgen.R b/r/data-raw/docgen.R index 2e2581c5788..e2c7f94eafc 100644 --- a/r/data-raw/docgen.R +++ b/r/data-raw/docgen.R @@ -128,11 +128,15 @@ docs <- arrow:::.cache$docs # across() is handled by manipulating the quosures, not by nse_funcs docs[["dplyr::across"]] <- c( - # TODO(ARROW-17387): do filter - "not yet supported inside `filter()`;", # TODO(ARROW-17384): implement where - "and use of `where()` selection helper not yet supported" + "Use of `where()` selection helper not yet supported" ) + +# if_any() and if_all() are used instead of across() in filter() +# they are both handled by manipulating the quosures, not by nse_funcs +docs[["dplyr::if_any"]] <- character(0) +docs[["dplyr::if_all"]] <- character(0) + # desc() is a special helper handled inside of arrange() docs[["dplyr::desc"]] <- character(0) From 0ab3d81bad5beeb94188b1b41f46fc514b022e95 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 30 Sep 2022 13:39:15 +0100 Subject: [PATCH 09/15] Namespace reduce call --- r/R/dplyr-across.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/r/R/dplyr-across.R b/r/R/dplyr-across.R index cd11cf09c15..44bd15b5dd8 100644 --- a/r/R/dplyr-across.R +++ b/r/R/dplyr-across.R @@ -58,22 +58,21 @@ expand_across <- function(.data, quos_in) { } else { quos_out <- append(quos_out, quo_in) } - } - if (is_call(quo_expr, "if_any")) { - quos_out <- append(list(), reduce(quos_out, combine_if, op = "|", envir = quo_get_env(quos_out[[1]]))) - } + if (is_call(quo_expr, "if_any")) { + quos_out <- append(list(), purrr::reduce(quos_out, combine_if, op = "|", envir = quo_get_env(quos_out[[1]]))) + } - if (is_call(quo_expr, "if_all")) { - quos_out <- append(list(), reduce(quos_out, combine_if, op = "&", envir = quo_get_env(quos_out[[1]]))) + if (is_call(quo_expr, "if_all")) { + quos_out <- append(list(), purrr::reduce(quos_out, combine_if, op = "&", envir = quo_get_env(quos_out[[1]]))) + } } new_quosures(quos_out) } # takes multiple expressions and combines them with & or | -combine_if <- function(lhs, rhs, op, envir){ - +combine_if <- function(lhs, rhs, op, envir) { expr_text <- paste( expr_text(quo_get_expr(lhs)), expr_text(quo_get_expr(rhs)), @@ -83,7 +82,6 @@ combine_if <- function(lhs, rhs, op, envir){ expr <- parse_expr(expr_text) new_quosure(expr, envir) - } if_any <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { From 9bf2d12547d328b1776eebd42330e82e0a81f861 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 30 Sep 2022 13:39:55 +0100 Subject: [PATCH 10/15] Import expr_text --- r/NAMESPACE | 2 ++ r/R/arrow-package.R | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index cb76aa37f66..6f0ec37e095 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -405,6 +405,7 @@ importFrom(purrr,map_dbl) importFrom(purrr,map_dfr) importFrom(purrr,map_int) importFrom(purrr,map_lgl) +importFrom(purrr,reduce) importFrom(rlang,"%||%") importFrom(rlang,":=") importFrom(rlang,.data) @@ -426,6 +427,7 @@ importFrom(rlang,env_bind) importFrom(rlang,eval_tidy) importFrom(rlang,exec) importFrom(rlang,expr) +importFrom(rlang,expr_text) importFrom(rlang,f_env) importFrom(rlang,f_rhs) importFrom(rlang,is_bare_character) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 13d217cec2f..bb03dd8e78d 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -17,7 +17,8 @@ #' @importFrom stats quantile median na.omit na.exclude na.pass na.fail #' @importFrom R6 R6Class -#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dbl map_dfr map_int map_lgl keep imap imap_chr flatten +#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dbl map_dfr map_int map_lgl keep imap imap_chr +#' @importFrom purrr flatten reduce #' @importFrom assertthat assert_that is.string #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos quo #' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec From ae5aced0092515af6d040c3f451df2b1c4efa8a4 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 3 Oct 2022 11:47:55 +0100 Subject: [PATCH 11/15] Styler doesn't mind but lintr does --- r/R/dplyr-across.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-across.R b/r/R/dplyr-across.R index 44bd15b5dd8..4a7e41b75bc 100644 --- a/r/R/dplyr-across.R +++ b/r/R/dplyr-across.R @@ -186,7 +186,7 @@ across_glue_mask <- function(.col, .fn, .caller_env) { env(.caller_env, .col = .col, .fn = .fn, col = .col, fn = .fn) } -# Substitutes instances of `.` and `.x` with the variable in question +# Substitutes instances of "." and ".x" with `var` as_across_fn_call <- function(fn, var, quo_env) { if (is_formula(fn, lhs = FALSE)) { expr <- f_rhs(fn) From 0e6ab68bb67a28d15b8b86dbff4fc5b0f0678ac1 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Sat, 8 Oct 2022 08:22:04 +0100 Subject: [PATCH 12/15] Run devtools::document --- r/NAMESPACE | 4 ---- 1 file changed, 4 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 6f0ec37e095..759c039dfd5 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -427,7 +427,6 @@ importFrom(rlang,env_bind) importFrom(rlang,eval_tidy) importFrom(rlang,exec) importFrom(rlang,expr) -importFrom(rlang,expr_text) importFrom(rlang,f_env) importFrom(rlang,f_rhs) importFrom(rlang,is_bare_character) @@ -446,9 +445,6 @@ importFrom(rlang,new_data_mask) importFrom(rlang,new_environment) importFrom(rlang,new_quosure) importFrom(rlang,new_quosures) -importFrom(rlang,node_car) -importFrom(rlang,node_cdr) -importFrom(rlang,node_poke_car) importFrom(rlang,parse_expr) importFrom(rlang,quo) importFrom(rlang,quo_get_env) From 7f8b8cf655bd2540f291b743044cc1c1bc659d3b Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Sat, 8 Oct 2022 10:01:39 +0100 Subject: [PATCH 13/15] Import expr_text --- r/NAMESPACE | 1 + r/R/arrow-package.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 759c039dfd5..86e65087cba 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -427,6 +427,7 @@ importFrom(rlang,env_bind) importFrom(rlang,eval_tidy) importFrom(rlang,exec) importFrom(rlang,expr) +importFrom(rlang,expr_text) importFrom(rlang,f_env) importFrom(rlang,f_rhs) importFrom(rlang,is_bare_character) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index bb03dd8e78d..143f4c191bd 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -26,7 +26,7 @@ #' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure #' @importFrom rlang is_list call2 is_empty as_function as_label arg_match is_symbol is_call call_args #' @importFrom rlang quo_set_env quo_get_env is_formula quo_is_call f_rhs parse_expr f_env new_quosure -#' @importFrom rlang new_quosures +#' @importFrom rlang new_quosures expr_text #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select #' @importFrom glue glue #' @useDynLib arrow, .registration = TRUE From 6459ad0bba965892c3fd587f1e36e66881a91227 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 10 Oct 2022 16:41:34 +0100 Subject: [PATCH 14/15] Update r/R/dplyr-across.R Co-authored-by: Neal Richardson --- r/R/dplyr-across.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/r/R/dplyr-across.R b/r/R/dplyr-across.R index 4a7e41b75bc..d23525ddfb5 100644 --- a/r/R/dplyr-across.R +++ b/r/R/dplyr-across.R @@ -84,14 +84,6 @@ combine_if <- function(lhs, rhs, op, envir) { new_quosure(expr, envir) } -if_any <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { - -} - -if_all <- function(.cols = everything(), .fns = NULL, ..., .names = NULL) { - -} - # given a named list of functions and column names, create a list of new quosures quosures_from_setup <- function(setup, quo_env) { if (!is.null(setup$fns)) { From d4d3a0077a8bab03d7c492d6395159ae559d1db4 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 10 Oct 2022 16:44:15 +0100 Subject: [PATCH 15/15] Remove duplicated test --- r/tests/testthat/test-dplyr-across.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/r/tests/testthat/test-dplyr-across.R b/r/tests/testthat/test-dplyr-across.R index d83c85d3cce..5ded2038c4f 100644 --- a/r/tests/testthat/test-dplyr-across.R +++ b/r/tests/testthat/test-dplyr-across.R @@ -293,10 +293,4 @@ test_that("if_all() and if_any() are supported", { example_data ) - expect_across_equal( - quos(if_all(everything(), ~is.na(.x))), - quos(is.na(int) & is.na(dbl) & is.na(dbl2) & is.na(lgl) & is.na(false) & is.na(chr) & is.na(fct)), - example_data - ) - })