From 215d2fa59a6aecfcf35a6edd397fdefaf8dbe576 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 1 Sep 2022 17:02:49 +0100 Subject: [PATCH 1/4] Call summarize on exprs not ... --- r/R/dplyr-summarize.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 92587f6c685..4f290c130e4 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -182,7 +182,7 @@ agg_funcs[["::"]] <- function(lhs, rhs) { summarise.arrow_dplyr_query <- function(.data, ...) { call <- match.call() .data <- as_adq(.data) - exprs <- quos(...) + exprs <- expand_across(.data, quos(...)) # Only retain the columns we need to do our aggregations vars_to_keep <- unique(c( unlist(lapply(exprs, all.vars)), # vars referenced in summarise @@ -198,7 +198,7 @@ summarise.arrow_dplyr_query <- function(.data, ...) { .data <- dplyr::select(.data, intersect(vars_to_keep, names(.data))) # Try stuff, if successful return() - out <- try(do_arrow_summarize(.data, ...), silent = TRUE) + out <- try(do_arrow_summarize(.data, !!!exprs), silent = TRUE) if (inherits(out, "try-error")) { return(abandon_ship(call, .data, format(out))) } else { From 56d1044f709b3bc17d89c94fe36a30fc8252e3d2 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 5 Sep 2022 08:11:50 +0100 Subject: [PATCH 2/4] Add .groups to summarise signature --- r/R/dplyr-summarize.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 4f290c130e4..3181cee1378 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -179,7 +179,7 @@ agg_funcs[["::"]] <- function(lhs, rhs) { # The following S3 methods are registered on load if dplyr is present -summarise.arrow_dplyr_query <- function(.data, ...) { +summarise.arrow_dplyr_query <- function(.data, ..., .groups = NULL) { call <- match.call() .data <- as_adq(.data) exprs <- expand_across(.data, quos(...)) @@ -198,7 +198,7 @@ summarise.arrow_dplyr_query <- function(.data, ...) { .data <- dplyr::select(.data, intersect(vars_to_keep, names(.data))) # Try stuff, if successful return() - out <- try(do_arrow_summarize(.data, !!!exprs), silent = TRUE) + out <- try(do_arrow_summarize(.data, !!!exprs, .groups = .groups), silent = TRUE) if (inherits(out, "try-error")) { return(abandon_ship(call, .data, format(out))) } else { From e48ce92b14a1c1417020b6787aa893baf36c779d Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 5 Sep 2022 08:36:16 +0100 Subject: [PATCH 3/4] Add tests for across() --- r/tests/testthat/test-dplyr-summarize.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 0ee0c5739db..0f3cbe51ed5 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -1126,3 +1126,27 @@ test_that("We don't add unnecessary ProjectNodes when aggregating", { 2 ) }) + +test_that("Can use across() within summarise()", { + + compare_dplyr_binding( + .input %>% + group_by(lgl) %>% + summarise(across(starts_with("dbl"), sum, .names = "sum_{.col}")) %>% + arrange(lgl) %>% + collect(), + example_data + ) + + # across() doesn't work in summarise when input expressions evaluate to bare field references + expect_warning( + example_data %>% + arrow_table() %>% + group_by(lgl) %>% + summarise(across(everything())) %>% + collect(), + regexp = "Expression int is not an aggregate expression or is not supported in Arrow; pulling data into R" + ) + +}) + From 75a856c720226f65db5952dfdb49b3aa6f58cc06 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 5 Sep 2022 12:09:38 +0100 Subject: [PATCH 4/4] Run styler --- r/tests/testthat/test-dplyr-summarize.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 0f3cbe51ed5..29c4619db9c 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -1128,7 +1128,6 @@ test_that("We don't add unnecessary ProjectNodes when aggregating", { }) test_that("Can use across() within summarise()", { - compare_dplyr_binding( .input %>% group_by(lgl) %>% @@ -1147,6 +1146,4 @@ test_that("Can use across() within summarise()", { collect(), regexp = "Expression int is not an aggregate expression or is not supported in Arrow; pulling data into R" ) - }) -