From f2bdeb1403482a1863348f1683635e6b83382849 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 17 Sep 2021 13:12:45 -0400 Subject: [PATCH 01/13] Add min() and max() bindings --- r/R/dplyr-functions.R | 24 ++++++++++++++++++++++-- r/src/compute.cpp | 9 +++++---- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 808956efe15..0c28317a1bc 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -839,7 +839,6 @@ agg_funcs$var <- function(x, na.rm = FALSE, ddof = 1) { options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) } - agg_funcs$n_distinct <- function(x, na.rm = FALSE) { list( fun = "count_distinct", @@ -847,7 +846,6 @@ agg_funcs$n_distinct <- function(x, na.rm = FALSE) { options = list(na.rm = na.rm) ) } - agg_funcs$n <- function() { list( fun = "sum", @@ -855,6 +853,28 @@ agg_funcs$n <- function() { options = list() ) } +agg_funcs$min <- function(..., na.rm = FALSE) { + args <- list2(...) + if (length(args) > 1) { + abort("Multiple arguments to min() not supported in Arrow") + } + list( + fun = "min", + data = args[[1]], + options = list(skip_nulls = na.rm, min_count = 0L) + ) +} +agg_funcs$max <- function(..., na.rm = FALSE) { + args <- list2(...) + if (length(args) > 1) { + abort("Multiple arguments to max() not supported in Arrow") + } + list( + fun = "max", + data = args[[1]], + options = list(skip_nulls = na.rm, min_count = 0L) + ) +} output_type <- function(fun, input_type) { # These are quick and dirty heuristics. diff --git a/r/src/compute.cpp b/r/src/compute.cpp index c6ba0a28046..a01c35be4ef 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -171,10 +171,11 @@ std::shared_ptr make_compute_options( return out; } - if (func_name == "min_max" || func_name == "sum" || func_name == "mean" || - func_name == "any" || func_name == "all" || func_name == "hash_min_max" || - func_name == "hash_sum" || func_name == "hash_mean" || func_name == "hash_any" || - func_name == "hash_all") { + if (func_name == "all" || func_name == "hash_all" || func_name == "any" || + func_name == "hash_any" || func_name == "mean" || func_name == "hash_mean" || + func_name == "min_max" || func_name == "hash_min_max" || func_name == "min" || + func_name == "hash_min" || func_name == "max" || func_name == "hash_max" || + func_name == "sum" || func_name == "hash_sum") { using Options = arrow::compute::ScalarAggregateOptions; auto out = std::make_shared(Options::Defaults()); if (!Rf_isNull(options["min_count"])) { From 88b5db436eb4909aa8dcc74b5a99013ab446ba8b Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 17 Sep 2021 13:14:19 -0400 Subject: [PATCH 02/13] Add tests --- r/tests/testthat/test-dplyr-summarize.R | 96 ++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 12bb50fb3d5..923c9441929 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -30,25 +30,105 @@ tbl$verses <- verses[[1]] tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") tbl$some_grouping <- rep(c(1, 2), 5) -test_that("summarize", { +test_that("summarize() with min() and max()", { expect_dplyr_equal( input %>% select(int, chr) %>% - filter(int > 5) %>% - summarize(min_int = min(int)) %>% + filter(int > 5) %>% # this filters out the NAs in `int` + summarize(min_int = min(int), max_int = max(int)) %>% collect(), tbl, - warning = TRUE ) - expect_dplyr_equal( input %>% select(int, chr) %>% - filter(int > 5) %>% - summarize(min_int = min(int) / 2) %>% + filter(int > 5) %>% # this filters out the NAs in `int` + summarize( + min_int = min(int + 4) / 2, + max_int = 3 / max(42 - int) + ) %>% + collect(), + tbl, + ) + expect_dplyr_equal( + input %>% + select(int, chr) %>% + summarize(min_int = min(int), max_int = max(int)) %>% + collect(), + tbl, + ) + expect_dplyr_equal( + input %>% + select(int) %>% + summarize( + min_int = min(int, na.rm = TRUE), + max_int = max(int, na.rm = TRUE) + ) %>% + collect(), + tbl, + ) + expect_dplyr_equal( + input %>% + select(dbl, int) %>% + summarize( + min_int = -min(log(ceiling(dbl)), na.rm = TRUE), + max_int = log(max(as.double(int), na.rm = TRUE)) + ) %>% + collect(), + tbl, + ) + + # multiple dots arguments to min(), max() not supported + expect_dplyr_equal( + input %>% + summarize(min_mult = min(dbl, int)) %>% + collect(), + tbl, + warning = "Multiple arguments to min\\(\\) not supported in Arrow" + ) + expect_dplyr_equal( + input %>% + select(int, dbl, dbl2) %>% + summarize(max_mult = max(int, dbl, dbl2)) %>% + collect(), + tbl, + warning = "Multiple arguments to max\\(\\) not supported in Arrow" + ) + + # min(logical) or max(logical) yields integer in R + # min(Boolean) or max(Boolean) yields Boolean in Arrow + expect_dplyr_equal( + input %>% + select(lgl) %>% + summarize( + max_lgl = as.logical(max(lgl, na.rm = TRUE)), + min_lgl = as.logical(min(lgl, na.rm = TRUE)) + ) %>% + collect(), + tbl, + ) +}) + +test_that("min() and max() on character strings", { + expect_dplyr_equal( + input %>% + summarize( + min_chr = min(chr, na.rm = TRUE), + max_chr = max(chr, na.rm = TRUE) + ) %>% + collect(), + tbl, + ) + skip("Strings not supported by hash_min_max (ARROW-13988)") + expect_dplyr_equal( + input %>% + group_by(fct) %>% + summarize( + min_chr = min(chr, na.rm = TRUE), + max_chr = max(chr, na.rm = TRUE) + ) %>% collect(), tbl, - warning = TRUE ) }) From 6a1261ad50991d5d83efb6f3208aa249dbc614a3 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 17 Sep 2021 13:39:43 -0400 Subject: [PATCH 03/13] Fix failing tests --- r/tests/testthat/test-dataset.R | 15 ++++++--------- r/tests/testthat/test-dplyr-group-by.R | 8 ++++---- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 14157545d61..5bcf83ffaf8 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -623,15 +623,12 @@ test_that("Creating UnionDataset", { test_that("map_batches", { skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = "part") - expect_warning( - expect_equivalent( - ds %>% - filter(int > 5) %>% - select(int, lgl) %>% - map_batches(~ summarize(., min_int = min(int))), - tibble(min_int = c(6L, 101L)) - ), - "pulling data into R" # ARROW-13502 + expect_equivalent( + ds %>% + filter(int > 5) %>% + select(int, lgl) %>% + map_batches(~ summarize(., min_int = min(int))), + tibble(min_int = c(6L, 101L)) ) }) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index 18be2a9304a..3f20debe6cf 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -28,9 +28,9 @@ test_that("group_by groupings are recorded", { group_by(chr) %>% select(int, chr) %>% filter(int > 5) %>% + collect() %>% summarize(min_int = min(int)), - tbl, - warning = TRUE + tbl ) }) @@ -62,9 +62,9 @@ test_that("ungroup", { select(int, chr) %>% ungroup() %>% filter(int > 5) %>% + collect() %>% summarize(min_int = min(int)), - tbl, - warning = TRUE + tbl ) }) From bf0c70599b964965a90655e4d0af253044fd292f Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 17 Sep 2021 16:18:35 -0400 Subject: [PATCH 04/13] Skip failing map_batches() test (ARROW-14029) --- r/tests/testthat/test-dataset.R | 1 + 1 file changed, 1 insertion(+) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 5bcf83ffaf8..837bf8048c5 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -621,6 +621,7 @@ test_that("Creating UnionDataset", { }) test_that("map_batches", { + skip("map_batches() is broken (ARROW-14029)") skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = "part") expect_equivalent( From 6ee92006c1bd6be6f124f58319b86970403d8eb8 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Sat, 18 Sep 2021 13:50:40 -0400 Subject: [PATCH 05/13] Move test --- r/tests/testthat/test-dplyr-summarize.R | 204 ++++++++++++------------ 1 file changed, 102 insertions(+), 102 deletions(-) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 923c9441929..3fb4179d978 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -30,108 +30,6 @@ tbl$verses <- verses[[1]] tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") tbl$some_grouping <- rep(c(1, 2), 5) -test_that("summarize() with min() and max()", { - expect_dplyr_equal( - input %>% - select(int, chr) %>% - filter(int > 5) %>% # this filters out the NAs in `int` - summarize(min_int = min(int), max_int = max(int)) %>% - collect(), - tbl, - ) - expect_dplyr_equal( - input %>% - select(int, chr) %>% - filter(int > 5) %>% # this filters out the NAs in `int` - summarize( - min_int = min(int + 4) / 2, - max_int = 3 / max(42 - int) - ) %>% - collect(), - tbl, - ) - expect_dplyr_equal( - input %>% - select(int, chr) %>% - summarize(min_int = min(int), max_int = max(int)) %>% - collect(), - tbl, - ) - expect_dplyr_equal( - input %>% - select(int) %>% - summarize( - min_int = min(int, na.rm = TRUE), - max_int = max(int, na.rm = TRUE) - ) %>% - collect(), - tbl, - ) - expect_dplyr_equal( - input %>% - select(dbl, int) %>% - summarize( - min_int = -min(log(ceiling(dbl)), na.rm = TRUE), - max_int = log(max(as.double(int), na.rm = TRUE)) - ) %>% - collect(), - tbl, - ) - - # multiple dots arguments to min(), max() not supported - expect_dplyr_equal( - input %>% - summarize(min_mult = min(dbl, int)) %>% - collect(), - tbl, - warning = "Multiple arguments to min\\(\\) not supported in Arrow" - ) - expect_dplyr_equal( - input %>% - select(int, dbl, dbl2) %>% - summarize(max_mult = max(int, dbl, dbl2)) %>% - collect(), - tbl, - warning = "Multiple arguments to max\\(\\) not supported in Arrow" - ) - - # min(logical) or max(logical) yields integer in R - # min(Boolean) or max(Boolean) yields Boolean in Arrow - expect_dplyr_equal( - input %>% - select(lgl) %>% - summarize( - max_lgl = as.logical(max(lgl, na.rm = TRUE)), - min_lgl = as.logical(min(lgl, na.rm = TRUE)) - ) %>% - collect(), - tbl, - ) -}) - -test_that("min() and max() on character strings", { - expect_dplyr_equal( - input %>% - summarize( - min_chr = min(chr, na.rm = TRUE), - max_chr = max(chr, na.rm = TRUE) - ) %>% - collect(), - tbl, - ) - skip("Strings not supported by hash_min_max (ARROW-13988)") - expect_dplyr_equal( - input %>% - group_by(fct) %>% - summarize( - min_chr = min(chr, na.rm = TRUE), - max_chr = max(chr, na.rm = TRUE) - ) %>% - collect(), - tbl, - ) -}) - test_that("summarize() doesn't evaluate eagerly", { expect_s3_class( Table$create(tbl) %>% @@ -331,6 +229,108 @@ test_that("Group by n_distinct() on dataset", { ) }) +test_that("summarize() with min() and max()", { + expect_dplyr_equal( + input %>% + select(int, chr) %>% + filter(int > 5) %>% # this filters out the NAs in `int` + summarize(min_int = min(int), max_int = max(int)) %>% + collect(), + tbl, + ) + expect_dplyr_equal( + input %>% + select(int, chr) %>% + filter(int > 5) %>% # this filters out the NAs in `int` + summarize( + min_int = min(int + 4) / 2, + max_int = 3 / max(42 - int) + ) %>% + collect(), + tbl, + ) + expect_dplyr_equal( + input %>% + select(int, chr) %>% + summarize(min_int = min(int), max_int = max(int)) %>% + collect(), + tbl, + ) + expect_dplyr_equal( + input %>% + select(int) %>% + summarize( + min_int = min(int, na.rm = TRUE), + max_int = max(int, na.rm = TRUE) + ) %>% + collect(), + tbl, + ) + expect_dplyr_equal( + input %>% + select(dbl, int) %>% + summarize( + min_int = -min(log(ceiling(dbl)), na.rm = TRUE), + max_int = log(max(as.double(int), na.rm = TRUE)) + ) %>% + collect(), + tbl, + ) + + # multiple dots arguments to min(), max() not supported + expect_dplyr_equal( + input %>% + summarize(min_mult = min(dbl, int)) %>% + collect(), + tbl, + warning = "Multiple arguments to min\\(\\) not supported in Arrow" + ) + expect_dplyr_equal( + input %>% + select(int, dbl, dbl2) %>% + summarize(max_mult = max(int, dbl, dbl2)) %>% + collect(), + tbl, + warning = "Multiple arguments to max\\(\\) not supported in Arrow" + ) + + # min(logical) or max(logical) yields integer in R + # min(Boolean) or max(Boolean) yields Boolean in Arrow + expect_dplyr_equal( + input %>% + select(lgl) %>% + summarize( + max_lgl = as.logical(max(lgl, na.rm = TRUE)), + min_lgl = as.logical(min(lgl, na.rm = TRUE)) + ) %>% + collect(), + tbl, + ) +}) + +test_that("min() and max() on character strings", { + expect_dplyr_equal( + input %>% + summarize( + min_chr = min(chr, na.rm = TRUE), + max_chr = max(chr, na.rm = TRUE) + ) %>% + collect(), + tbl, + ) + skip("Strings not supported by hash_min_max (ARROW-13988)") + expect_dplyr_equal( + input %>% + group_by(fct) %>% + summarize( + min_chr = min(chr, na.rm = TRUE), + max_chr = max(chr, na.rm = TRUE) + ) %>% + collect(), + tbl, + ) +}) + test_that("Filter and aggregate", { expect_dplyr_equal( input %>% From c7ed91d7c5d6dacbc4ad391ce61a784b124c9b61 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Sat, 18 Sep 2021 13:52:06 -0400 Subject: [PATCH 06/13] Add test of summarise(fun(!!sym(string_column_name))) --- r/tests/testthat/test-dplyr-summarize.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 3fb4179d978..9c3047d1fa7 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -331,6 +331,29 @@ test_that("min() and max() on character strings", { ) }) +test_that("summarise() with !!sym()", { + test_chr_col <- "int" + test_dbl_col <- "dbl" + test_lgl_col <- "lgl" + expect_dplyr_equal( + input %>% + group_by(false) %>% + summarise( + sum = sum(!!sym(test_dbl_col)), + any = any(!!sym(test_lgl_col)), + all = all(!!sym(test_lgl_col)), + mean = mean(!!sym(test_dbl_col)), + sd = sd(!!sym(test_dbl_col)), + var = var(!!sym(test_dbl_col)), + n_distinct = n_distinct(!!sym(test_chr_col)), + min = min(!!sym(test_dbl_col)), + max = max(!!sym(test_dbl_col)) + ) %>% + collect(), + tbl + ) +}) + test_that("Filter and aggregate", { expect_dplyr_equal( input %>% From 1a57209b5c01240f70311f3871fb285cf5373131 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 20 Sep 2021 10:25:36 -0400 Subject: [PATCH 07/13] Use arrow_not_supported() instead of abort() Co-authored-by: Nic --- r/R/dplyr-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 0c28317a1bc..c4a1a28b099 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -856,7 +856,7 @@ agg_funcs$n <- function() { agg_funcs$min <- function(..., na.rm = FALSE) { args <- list2(...) if (length(args) > 1) { - abort("Multiple arguments to min() not supported in Arrow") + arrow_not_supported("Multiple arguments to min()") } list( fun = "min", From 1d3f49a8a4f8d28560b277e629ae05852371f7ff Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 20 Sep 2021 10:25:42 -0400 Subject: [PATCH 08/13] Use arrow_not_supported() instead of abort() Co-authored-by: Nic --- r/tests/testthat/test-dplyr-group-by.R | 1 + 1 file changed, 1 insertion(+) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index 3f20debe6cf..e66d43c509f 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -62,6 +62,7 @@ test_that("ungroup", { select(int, chr) %>% ungroup() %>% filter(int > 5) %>% + # TODO: ARROW-13550 - remove summarize() from here (expect_dplyr_equal will check attributes) collect() %>% summarize(min_int = min(int)), tbl From 4cf81f9236747510696ce7dfed97bd9f7ea32fec Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 20 Sep 2021 10:25:49 -0400 Subject: [PATCH 09/13] Use arrow_not_supported() instead of abort() Co-authored-by: Nic --- r/R/dplyr-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index c4a1a28b099..e9d0d17f730 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -867,7 +867,7 @@ agg_funcs$min <- function(..., na.rm = FALSE) { agg_funcs$max <- function(..., na.rm = FALSE) { args <- list2(...) if (length(args) > 1) { - abort("Multiple arguments to max() not supported in Arrow") + arrow_not_supported("Multiple arguments to max()") } list( fun = "max", From 3da00f52de8bff5057ef2e78caefb01d6b0a6848 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 20 Sep 2021 16:35:09 -0400 Subject: [PATCH 10/13] Modify test for ARROW-13550 --- r/tests/testthat/test-dplyr-group-by.R | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index e66d43c509f..66559c95ef2 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -62,11 +62,24 @@ test_that("ungroup", { select(int, chr) %>% ungroup() %>% filter(int > 5) %>% - # TODO: ARROW-13550 - remove summarize() from here (expect_dplyr_equal will check attributes) - collect() %>% - summarize(min_int = min(int)), + collect(), tbl ) + + # to confirm that the above expectation is actually testing what we think it's + # testing, verify that expect_dplyr_equal() distinguishes between grouped and + # ungrouped tibbles + expect_error( + expect_dplyr_equal( + input %>% + group_by(chr) %>% + select(int, chr) %>% + (function(x) if (inherits(x, "tbl_df")) ungroup(x) else x) %>% + filter(int > 5) %>% + collect(), + tbl + ) + ) }) test_that("group_by then rename", { From f041b826b66909d08dc2640262499d59920c1c60 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 20 Sep 2021 16:57:44 -0400 Subject: [PATCH 11/13] Fix expectation --- r/tests/testthat/test-dplyr-summarize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 9c3047d1fa7..c5e072447bc 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -283,7 +283,7 @@ test_that("summarize() with min() and max()", { summarize(min_mult = min(dbl, int)) %>% collect(), tbl, - warning = "Multiple arguments to min\\(\\) not supported in Arrow" + warning = "Multiple arguments to min\\(\\) not supported by Arrow" ) expect_dplyr_equal( input %>% From 72c77b87d0d4db3097c50b9b449b8af00d69ec59 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 20 Sep 2021 16:59:05 -0400 Subject: [PATCH 12/13] Fix expectation --- r/tests/testthat/test-dplyr-summarize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index c5e072447bc..c74ed6aa938 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -291,7 +291,7 @@ test_that("summarize() with min() and max()", { summarize(max_mult = max(int, dbl, dbl2)) %>% collect(), tbl, - warning = "Multiple arguments to max\\(\\) not supported in Arrow" + warning = "Multiple arguments to max\\(\\) not supported by Arrow" ) # min(logical) or max(logical) yields integer in R From 98e168ce9cf3c31128fb9aa71a1741870e1fa497 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 20 Sep 2021 20:02:39 -0400 Subject: [PATCH 13/13] Modify test for ARROW-13550 Co-authored-by: Nic --- r/tests/testthat/test-dplyr-group-by.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index 66559c95ef2..d6abb20c01c 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -28,8 +28,7 @@ test_that("group_by groupings are recorded", { group_by(chr) %>% select(int, chr) %>% filter(int > 5) %>% - collect() %>% - summarize(min_int = min(int)), + collect(), tbl ) })