diff --git a/r/R/compute.R b/r/R/compute.R index 39940eedc8c..2d00bcf10e3 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -120,7 +120,7 @@ max.ArrowDatum <- function(..., na.rm = FALSE) { scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("max") } -scalar_aggregate <- function(FUN, ..., na.rm = FALSE, na.min_count = 0) { +scalar_aggregate <- function(FUN, ..., na.rm = FALSE, min_count = 0L) { a <- collect_arrays_from_dots(list(...)) if (FUN == "min_max" && na.rm && a$null_count == length(a)) { Array$create(data.frame(min = Inf, max = -Inf)) @@ -128,7 +128,7 @@ scalar_aggregate <- function(FUN, ..., na.rm = FALSE, na.min_count = 0) { # Inf/-Inf, which are type double. Since Arrow is type-stable # and does not do that, we handle this special case here. } else { - call_function(FUN, a, options = list(na.rm = na.rm, na.min_count = na.min_count)) + call_function(FUN, a, options = list(skip_nulls = na.rm, min_count = min_count)) } } diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 72731216f50..d2f7892aee8 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -784,44 +784,42 @@ agg_funcs$sum <- function(x, na.rm = FALSE) { list( fun = "sum", data = x, - options = list(na.rm = na.rm, na.min_count = 0L) + options = list(skip_nulls = na.rm, min_count = 0L) ) } agg_funcs$any <- function(x, na.rm = FALSE) { list( fun = "any", data = x, - options = list(na.rm = na.rm, na.min_count = 0L) + options = list(skip_nulls = na.rm, min_count = 0L) ) } agg_funcs$all <- function(x, na.rm = FALSE) { list( fun = "all", data = x, - options = list(na.rm = na.rm, na.min_count = 0L) + options = list(skip_nulls = na.rm, min_count = 0L) ) } agg_funcs$mean <- function(x, na.rm = FALSE) { list( fun = "mean", data = x, - options = list(na.rm = na.rm, na.min_count = 0L) + options = list(skip_nulls = na.rm, min_count = 0L) ) } -# na.rm not currently passed in due to ARROW-13691 agg_funcs$sd <- function(x, na.rm = FALSE, ddof = 1) { list( fun = "stddev", data = x, - options = list(ddof = ddof) + options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) } -# na.rm not currently passed in due to ARROW-13691 agg_funcs$var <- function(x, na.rm = FALSE, ddof = 1) { list( fun = "variance", data = x, - options = list(ddof = ddof) + options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) } diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 446e011f548..0f08b41e85d 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -177,11 +177,11 @@ std::shared_ptr make_compute_options( func_name == "hash_all") { using Options = arrow::compute::ScalarAggregateOptions; auto out = std::make_shared(Options::Defaults()); - if (!Rf_isNull(options["na.min_count"])) { - out->min_count = cpp11::as_cpp(options["na.min_count"]); + if (!Rf_isNull(options["min_count"])) { + out->min_count = cpp11::as_cpp(options["min_count"]); } - if (!Rf_isNull(options["na.rm"])) { - out->skip_nulls = cpp11::as_cpp(options["na.rm"]); + if (!Rf_isNull(options["skip_nulls"])) { + out->skip_nulls = cpp11::as_cpp(options["skip_nulls"]); } return out; } @@ -225,11 +225,11 @@ std::shared_ptr make_compute_options( cpp11::as_cpp( interpolation); } - if (!Rf_isNull(options["na.min_count"])) { - out->min_count = cpp11::as_cpp(options["na.min_count"]); + if (!Rf_isNull(options["min_count"])) { + out->min_count = cpp11::as_cpp(options["min_count"]); } - if (!Rf_isNull(options["na.rm"])) { - out->skip_nulls = cpp11::as_cpp(options["na.rm"]); + if (!Rf_isNull(options["skip_nulls"])) { + out->skip_nulls = cpp11::as_cpp(options["skip_nulls"]); } return out; } @@ -392,8 +392,8 @@ std::shared_ptr make_compute_options( if (!Rf_isNull(options["min_count"])) { out->min_count = cpp11::as_cpp(options["min_count"]); } - if (!Rf_isNull(options["na.rm"])) { - out->skip_nulls = cpp11::as_cpp(options["na.rm"]); + if (!Rf_isNull(options["skip_nulls"])) { + out->skip_nulls = cpp11::as_cpp(options["skip_nulls"]); } return out; } diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 78d36630e56..7235c27dacf 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -134,7 +134,6 @@ test_that("Group by sd on dataset", { tbl ) - skip("ARROW-13691 - na.rm not yet implemented for VarianceOptions") expect_dplyr_equal( input %>% group_by(some_grouping) %>% @@ -153,7 +152,6 @@ test_that("Group by var on dataset", { tbl ) - skip("ARROW-13691 - na.rm not yet implemented for VarianceOptions") expect_dplyr_equal( input %>% group_by(some_grouping) %>%