diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index efba9f287f9..843b71244ba 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -808,3 +808,27 @@ agg_funcs$all <- function(x, na.rm = FALSE) { options = list(na.rm = na.rm, na.min_count = 0L) ) } + +agg_funcs$mean <- function(x, na.rm = FALSE) { + list( + fun = "mean", + data = x, + options = list(na.rm = na.rm, na.min_count = 0L) + ) +} +# na.rm not currently passed in due to ARROW-13691 +agg_funcs$sd <- function(x, na.rm = FALSE, ddof = 1) { + list( + fun = "stddev", + data = x, + options = list(ddof = ddof) + ) +} +# na.rm not currently passed in due to ARROW-13691 +agg_funcs$var <- function(x, na.rm = FALSE, ddof = 1) { + list( + fun = "variance", + data = x, + options = list(ddof = ddof) + ) +} diff --git a/r/src/compute.cpp b/r/src/compute.cpp index b697ecd96a0..dec8b2adfaa 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -350,7 +350,8 @@ std::shared_ptr make_compute_options( step); } - if (func_name == "variance" || func_name == "stddev") { + if (func_name == "variance" || func_name == "stddev" || func_name == "hash_variance" || + func_name == "hash_stddev") { using Options = arrow::compute::VarianceOptions; return std::make_shared(cpp11::as_cpp(options["ddof"])); } diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R index 25cd0ccabfb..b19b3891dfe 100644 --- a/r/tests/testthat/test-dplyr-aggregate.R +++ b/r/tests/testthat/test-dplyr-aggregate.R @@ -88,10 +88,73 @@ test_that("Group by sum on dataset", { summarize(total = sum(int)) %>% arrange(some_grouping) %>% collect(), + tbl, + ) +}) + +test_that("Group by mean on dataset", { + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(mean = mean(int, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(mean = mean(int, na.rm = FALSE)) %>% + arrange(some_grouping) %>% + collect(), tbl ) }) +test_that("Group by sd on dataset", { + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(sd = sd(int, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + + skip("ARROW-13691 - na.rm not yet implemented for VarianceOptions") + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(sd = sd(int, na.rm = FALSE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) +}) + +test_that("Group by var on dataset", { + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(var = var(int, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + + skip("ARROW-13691 - na.rm not yet implemented for VarianceOptions") + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(var = var(int, na.rm = FALSE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) +}) + + test_that("Group by any/all", { withr::local_options(list(arrow.debug = TRUE))