diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 92587f6c685..3181cee1378 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -179,10 +179,10 @@ agg_funcs[["::"]] <- function(lhs, rhs) { # The following S3 methods are registered on load if dplyr is present -summarise.arrow_dplyr_query <- function(.data, ...) { +summarise.arrow_dplyr_query <- function(.data, ..., .groups = NULL) { call <- match.call() .data <- as_adq(.data) - exprs <- quos(...) + exprs <- expand_across(.data, quos(...)) # Only retain the columns we need to do our aggregations vars_to_keep <- unique(c( unlist(lapply(exprs, all.vars)), # vars referenced in summarise @@ -198,7 +198,7 @@ summarise.arrow_dplyr_query <- function(.data, ...) { .data <- dplyr::select(.data, intersect(vars_to_keep, names(.data))) # Try stuff, if successful return() - out <- try(do_arrow_summarize(.data, ...), silent = TRUE) + out <- try(do_arrow_summarize(.data, !!!exprs, .groups = .groups), silent = TRUE) if (inherits(out, "try-error")) { return(abandon_ship(call, .data, format(out))) } else { diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 0ee0c5739db..29c4619db9c 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -1126,3 +1126,24 @@ test_that("We don't add unnecessary ProjectNodes when aggregating", { 2 ) }) + +test_that("Can use across() within summarise()", { + compare_dplyr_binding( + .input %>% + group_by(lgl) %>% + summarise(across(starts_with("dbl"), sum, .names = "sum_{.col}")) %>% + arrange(lgl) %>% + collect(), + example_data + ) + + # across() doesn't work in summarise when input expressions evaluate to bare field references + expect_warning( + example_data %>% + arrow_table() %>% + group_by(lgl) %>% + summarise(across(everything())) %>% + collect(), + regexp = "Expression int is not an aggregate expression or is not supported in Arrow; pulling data into R" + ) +})