From 661cdc9bc699719d847fbf7ea334030311ba5e88 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:28 +0000 Subject: [PATCH 01/11] fix to return a Table Signed-off-by: SHIMA Tatsuya --- r/R/dplyr-collect.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index 8049e46eb5d..f4e101744fb 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -70,9 +70,8 @@ restore_dplyr_features <- function(df, query) { ) } else { # This is a Table, via compute() or collect(as_data_frame = FALSE) - df <- as_adq(df) - df$group_by_vars <- query$group_by_vars - df$drop_empty_groups <- query$drop_empty_groups + df <- as_arrow_table(df) + df$metadata$r$attributes$.group_vars <- query$group_by_vars } } df From 4481819976b9026deec738fba863d93355093d12 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:28 +0000 Subject: [PATCH 02/11] collapse should return arrow dplyr query Signed-off-by: SHIMA Tatsuya --- r/R/dplyr-collect.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index f4e101744fb..ab18787883c 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -81,7 +81,10 @@ collapse.arrow_dplyr_query <- function(x, ...) { # Figure out what schema will result from the query x$schema <- implicit_schema(x) # Nest inside a new arrow_dplyr_query (and keep groups) - restore_dplyr_features(arrow_dplyr_query(x), x) + out <- arrow_dplyr_query(x) + out$group_by_vars <- x$group_by_vars + out$drop_empty_groups <- x$drop_empty_groups + out } collapse.Dataset <- collapse.ArrowTabular <- collapse.RecordBatchReader <- function(x, ...) { arrow_dplyr_query(x) From 4b5f0c1a70abacca059d21674cb9b22a6a021896 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:28 +0000 Subject: [PATCH 03/11] add tests for compute Signed-off-by: SHIMA Tatsuya --- r/tests/testthat/test-dplyr-collapse.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index 1809cb6e388..424d382a079 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -276,3 +276,17 @@ test_that("collapse doesn't unnecessarily add ProjectNodes", { ) expect_length(grep("ProjectNode", plan), 1) }) + +test_that("compute", { + tab1 <- tbl %>% + arrow_table() %>% + group_by(int) %>% + compute() + expect_true(inherits(tab1, "Table")) + expect_equal( + as.data.frame(tab1), + tbl %>% + group_by(int) %>% + collect() + ) +}) From 93f2f31fb9b5ccedf39a2e6b91b4f3f6badaac58 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:28 +0000 Subject: [PATCH 04/11] rename file to match name of file to be tested Signed-off-by: SHIMA Tatsuya --- r/tests/testthat/{test-dplyr-collapse.R => test-dplyr-collect.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/tests/testthat/{test-dplyr-collapse.R => test-dplyr-collect.R} (100%) diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collect.R similarity index 100% rename from r/tests/testthat/test-dplyr-collapse.R rename to r/tests/testthat/test-dplyr-collect.R From dc0ef4e6126e37e22e00d9aed5799b945801238d Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:29 +0000 Subject: [PATCH 05/11] fix tests Signed-off-by: SHIMA Tatsuya --- r/tests/testthat/test-dataset-dplyr.R | 10 +++------- r/tests/testthat/test-dplyr-collect.R | 2 +- r/tests/testthat/test-dplyr-query.R | 4 ++-- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R index f3ec858c6ce..04b1a460dc4 100644 --- a/r/tests/testthat/test-dataset-dplyr.R +++ b/r/tests/testthat/test-dataset-dplyr.R @@ -284,13 +284,9 @@ test_that("compute()/collect(as_data_frame=FALSE)", { group_by(fct) %>% compute() - # the group_by() prevents compute() from returning a Table... - expect_s3_class(tab5, "arrow_dplyr_query") - - # ... but $.data is a Table... - expect_r6_class(tab5$.data, "Table") - # ... and the mutate() was evaluated - expect_true("negint" %in% names(tab5$.data)) + expect_r6_class(tab5, "Table") + # mutate() was evaluated + expect_true("negint" %in% names(tab5)) }) test_that("head/tail on query on dataset", { diff --git a/r/tests/testthat/test-dplyr-collect.R b/r/tests/testthat/test-dplyr-collect.R index 424d382a079..78d542a8ea1 100644 --- a/r/tests/testthat/test-dplyr-collect.R +++ b/r/tests/testthat/test-dplyr-collect.R @@ -282,7 +282,7 @@ test_that("compute", { arrow_table() %>% group_by(int) %>% compute() - expect_true(inherits(tab1, "Table")) + expect_r6_class(tab1, "Table") expect_equal( as.data.frame(tab1), tbl %>% diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index b08016f2170..25d01ea3d50 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -119,7 +119,7 @@ test_that("collect(as_data_frame=FALSE)", { filter(int > 5) %>% group_by(int) %>% collect(as_data_frame = FALSE) - expect_s3_class(b4, "arrow_dplyr_query") + expect_r6_class(b4, "Table") expect_equal( as.data.frame(b4), expected %>% @@ -156,7 +156,7 @@ test_that("compute()", { filter(int > 5) %>% group_by(int) %>% compute() - expect_s3_class(b4, "arrow_dplyr_query") + expect_r6_class(b4, "Table") expect_equal( as.data.frame(b4), expected %>% From b2fbb1559b1eeafe0cdefe7100aa7fc680600118 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:29 +0000 Subject: [PATCH 06/11] remove unused line Signed-off-by: SHIMA Tatsuya --- r/tests/testthat/test-dplyr-collect.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/r/tests/testthat/test-dplyr-collect.R b/r/tests/testthat/test-dplyr-collect.R index 78d542a8ea1..63d15970df6 100644 --- a/r/tests/testthat/test-dplyr-collect.R +++ b/r/tests/testthat/test-dplyr-collect.R @@ -286,7 +286,6 @@ test_that("compute", { expect_equal( as.data.frame(tab1), tbl %>% - group_by(int) %>% - collect() + group_by(int) ) }) From 6231c174d4d16f1269afaea565c15c1452b5e005 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:29 +0000 Subject: [PATCH 07/11] remove unused line Signed-off-by: SHIMA Tatsuya --- r/R/dplyr-collect.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index ab18787883c..22d17099cad 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -70,7 +70,6 @@ restore_dplyr_features <- function(df, query) { ) } else { # This is a Table, via compute() or collect(as_data_frame = FALSE) - df <- as_arrow_table(df) df$metadata$r$attributes$.group_vars <- query$group_by_vars } } From 784f4832907f62a5eb17d7dab311f9d489e1bb95 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:29 +0000 Subject: [PATCH 08/11] Revert "rename file to match name of file to be tested" This reverts commit 83eafbeec3b3879df5a0280bf37ba3500944f07f. --- r/tests/testthat/{test-dplyr-collect.R => test-dplyr-collapse.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/tests/testthat/{test-dplyr-collect.R => test-dplyr-collapse.R} (100%) diff --git a/r/tests/testthat/test-dplyr-collect.R b/r/tests/testthat/test-dplyr-collapse.R similarity index 100% rename from r/tests/testthat/test-dplyr-collect.R rename to r/tests/testthat/test-dplyr-collapse.R From 42c14987d962e8722d9d6c41b285c7615342bd76 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Sat, 1 Oct 2022 05:08:29 +0000 Subject: [PATCH 09/11] move test to the other file and rename the test case Signed-off-by: SHIMA Tatsuya --- r/tests/testthat/test-dplyr-collapse.R | 13 ------------- r/tests/testthat/test-dplyr-query.R | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index 63d15970df6..1809cb6e388 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -276,16 +276,3 @@ test_that("collapse doesn't unnecessarily add ProjectNodes", { ) expect_length(grep("ProjectNode", plan), 1) }) - -test_that("compute", { - tab1 <- tbl %>% - arrow_table() %>% - group_by(int) %>% - compute() - expect_r6_class(tab1, "Table") - expect_equal( - as.data.frame(tab1), - tbl %>% - group_by(int) - ) -}) diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index 25d01ea3d50..c15ac5199a2 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -579,3 +579,16 @@ test_that("needs_projection unit tests", { tab %>% relocate(lgl) )) }) + +test_that("compute() on a grouped query returns a Table with groups in metadata", { + tab1 <- tbl %>% + arrow_table() %>% + group_by(int) %>% + compute() + expect_r6_class(tab1, "Table") + expect_equal( + as.data.frame(tab1), + tbl %>% + group_by(int) + ) +}) From 3ae0ca0a732f1057faacd2cdc55296e568eb9da5 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Thu, 6 Oct 2022 10:48:55 +0000 Subject: [PATCH 10/11] add tests for compute and collect Signed-off-by: SHIMA Tatsuya --- r/tests/testthat/test-dplyr-query.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index c15ac5199a2..0d44613f847 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -592,3 +592,25 @@ test_that("compute() on a grouped query returns a Table with groups in metadata" group_by(int) ) }) + +test_that("collect() is identical to compute() %>% collect()", { + tab1 <- tbl %>% + arrow_table() + adq1 <- tab1 %>% + group_by(int) + + expect_equal( + tab1 %>% + compute() %>% + collect(), + tab1 %>% + collect() + ) + expect_equal( + adq1 %>% + compute() %>% + collect(), + adq1 %>% + collect() + ) +}) From 47ec68dbf03c8d78580c83ca4eb6d4f6db9ac395 Mon Sep 17 00:00:00 2001 From: SHIMA Tatsuya Date: Thu, 6 Oct 2022 10:56:02 +0000 Subject: [PATCH 11/11] more test Signed-off-by: SHIMA Tatsuya --- r/tests/testthat/test-dplyr-query.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index 0d44613f847..c40815df69d 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -591,6 +591,11 @@ test_that("compute() on a grouped query returns a Table with groups in metadata" tbl %>% group_by(int) ) + expect_equal( + collect(tab1), + tbl %>% + group_by(int) + ) }) test_that("collect() is identical to compute() %>% collect()", {