From 66590805c91259bf4abc955d274b3239a1f1ba9d Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Sat, 1 Oct 2022 05:08:28 +0000
Subject: [PATCH 01/22] add tests for compute

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-collapse.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index 1809cb6e388..424d382a079 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -276,3 +276,17 @@ test_that("collapse doesn't unnecessarily add ProjectNodes", {
   )
   expect_length(grep("ProjectNode", plan), 1)
 })
+
+test_that("compute", {
+  tab1 <- tbl %>%
+      arrow_table() %>%
+      group_by(int) %>%
+      compute()
+  expect_true(inherits(tab1, "Table"))
+  expect_equal(
+    as.data.frame(tab1),
+    tbl %>%
+      group_by(int) %>%
+      collect()
+  )
+})

From 55d803d121e604a4dda37bdf0a91fc0910f30246 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Sat, 1 Oct 2022 05:08:28 +0000
Subject: [PATCH 02/22] rename file to match name of file to be tested

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/{test-dplyr-collapse.R => test-dplyr-collect.R} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename r/tests/testthat/{test-dplyr-collapse.R => test-dplyr-collect.R} (100%)

diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collect.R
similarity index 100%
rename from r/tests/testthat/test-dplyr-collapse.R
rename to r/tests/testthat/test-dplyr-collect.R

From cd8fa1ddf3bf0881bd97fbbc6b7eb1253fb076fe Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Sat, 1 Oct 2022 05:08:29 +0000
Subject: [PATCH 03/22] fix tests

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-collect.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-collect.R b/r/tests/testthat/test-dplyr-collect.R
index 424d382a079..78d542a8ea1 100644
--- a/r/tests/testthat/test-dplyr-collect.R
+++ b/r/tests/testthat/test-dplyr-collect.R
@@ -282,7 +282,7 @@ test_that("compute", {
       arrow_table() %>%
       group_by(int) %>%
       compute()
-  expect_true(inherits(tab1, "Table"))
+  expect_r6_class(tab1, "Table")
   expect_equal(
     as.data.frame(tab1),
     tbl %>%

From 9e1c73b65fb25e131eb8cfac4d93e919ba1430ab Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Sat, 1 Oct 2022 05:08:29 +0000
Subject: [PATCH 04/22] remove unused line

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-collect.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-collect.R b/r/tests/testthat/test-dplyr-collect.R
index 78d542a8ea1..63d15970df6 100644
--- a/r/tests/testthat/test-dplyr-collect.R
+++ b/r/tests/testthat/test-dplyr-collect.R
@@ -286,7 +286,6 @@ test_that("compute", {
   expect_equal(
     as.data.frame(tab1),
     tbl %>%
-      group_by(int) %>%
-      collect()
+      group_by(int)
   )
 })

From 71c170f1bf198c37e3eeda4c29a15190c4d5c3ef Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Sat, 1 Oct 2022 05:08:29 +0000
Subject: [PATCH 05/22] Revert "rename file to match name of file to be tested"

This reverts commit 83eafbeec3b3879df5a0280bf37ba3500944f07f.
---
 r/tests/testthat/{test-dplyr-collect.R => test-dplyr-collapse.R} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename r/tests/testthat/{test-dplyr-collect.R => test-dplyr-collapse.R} (100%)

diff --git a/r/tests/testthat/test-dplyr-collect.R b/r/tests/testthat/test-dplyr-collapse.R
similarity index 100%
rename from r/tests/testthat/test-dplyr-collect.R
rename to r/tests/testthat/test-dplyr-collapse.R

From fd4b15e06e8024459f9159260d8b4e36b3a16120 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Sat, 1 Oct 2022 05:08:29 +0000
Subject: [PATCH 06/22] move test to the other file and rename the test case

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-collapse.R | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index 63d15970df6..1809cb6e388 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -276,16 +276,3 @@ test_that("collapse doesn't unnecessarily add ProjectNodes", {
   )
   expect_length(grep("ProjectNode", plan), 1)
 })
-
-test_that("compute", {
-  tab1 <- tbl %>%
-      arrow_table() %>%
-      group_by(int) %>%
-      compute()
-  expect_r6_class(tab1, "Table")
-  expect_equal(
-    as.data.frame(tab1),
-    tbl %>%
-      group_by(int)
-  )
-})

From 2fab6349af1795d8a82e255773bc168aa7624265 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Thu, 6 Oct 2022 10:48:55 +0000
Subject: [PATCH 07/22] add tests for compute and collect

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-query.R | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R
index db9a3bb30d0..bd261874375 100644
--- a/r/tests/testthat/test-dplyr-query.R
+++ b/r/tests/testthat/test-dplyr-query.R
@@ -620,3 +620,25 @@ test_that("collect() is identical to compute() %>% collect()", {
       collect()
   )
 })
+
+test_that("collect() is identical to compute() %>% collect()", {
+  tab1 <- tbl %>%
+    arrow_table()
+  adq1 <- tab1 %>%
+    group_by(int)
+
+  expect_equal(
+    tab1 %>%
+      compute() %>%
+      collect(),
+    tab1 %>%
+      collect()
+  )
+  expect_equal(
+    adq1 %>%
+      compute() %>%
+      collect(),
+    adq1 %>%
+      collect()
+  )
+})

From 7830d2cfb2822d460ddd2117e382915c0fccfc20 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Thu, 6 Oct 2022 10:56:02 +0000
Subject: [PATCH 08/22] more test

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-query.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R
index bd261874375..6a8cd851ca6 100644
--- a/r/tests/testthat/test-dplyr-query.R
+++ b/r/tests/testthat/test-dplyr-query.R
@@ -597,6 +597,11 @@ test_that("compute() on a grouped query returns a Table with groups in metadata"
     tbl %>%
       group_by(int)
   )
+  expect_equal(
+    collect(tab1),
+    tbl %>%
+      group_by(int)
+  )
 })
 
 test_that("collect() is identical to compute() %>% collect()", {

From dc19476a9a5ea639435aabf98cc6c1bce31f6a59 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 09/22] use NULL for empty group vars, and remove group vars
 metadata from .data

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr-group-by.R | 2 +-
 r/R/dplyr.R          | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index 57cf417c9ad..9524331cfbc 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -54,7 +54,7 @@ group_by.Dataset <- group_by.ArrowTabular <- group_by.RecordBatchReader <- group
 groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
 groups.Dataset <- groups.ArrowTabular <- groups.RecordBatchReader <- groups.arrow_dplyr_query
 
-group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
+group_vars.arrow_dplyr_query <- function(x) x$group_by_vars %||% character()
 group_vars.Dataset <- function(x) character()
 group_vars.RecordBatchReader <- function(x) character()
 group_vars.ArrowTabular <- function(x) {
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 1f529066dd3..c85b2714025 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -43,12 +43,16 @@ arrow_dplyr_query <- function(.data) {
     # If dplyr is not available, or if the input doesn't have a group_vars
     # method, assume no group vars
     dplyr::group_vars(.data),
-    error = function(e) character()
+    error = function(e) NULL
   )
 
   if (inherits(.data, "data.frame")) {
     .data <- Table$create(.data)
   }
+
+  # Remove group vars metadata from the Table
+  .data$metadata$r$attributes$.group_vars <- NULL
+
   # Evaluating expressions on a dataset with duplicated fieldnames will error
   dupes <- duplicated(names(.data))
   if (any(dupes)) {

From cae1f26bea4fe4d048de03eec9cf5b5fef5d502c Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 10/22] restore group vars from arrow dplyr query to Table

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr-collect.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index deb26859e8b..6244b4f02b2 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -62,18 +62,18 @@ restore_dplyr_features <- function(df, query) {
   # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
   # After calling collect(), make sure these features are carried over
 
-  if (length(query$group_by_vars) > 0) {
+  if (is.data.frame(df)) {
     # Preserve groupings, if present
-    if (is.data.frame(df)) {
+    if (length(query$group_by_vars) > 0) {
       df <- dplyr::grouped_df(
         df,
         dplyr::group_vars(query),
         drop = dplyr::group_by_drop_default(query)
       )
-    } else {
-      # This is a Table, via compute() or collect(as_data_frame = FALSE)
-      df$metadata$r$attributes$.group_vars <- query$group_by_vars
     }
+  } else {
+    # This is a Table, via compute() or collect(as_data_frame = FALSE)
+    df$metadata$r$attributes$.group_vars <- query$group_by_vars
   }
   df
 }

From 73edf1238370529dae1817dc5fe55017898b8e16 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 11/22] arrow_dplyr_query$group_by_vars should character, and
 metadata$r$attributes$.group_vars should not character(0)

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr-collect.R  | 7 ++++++-
 r/R/dplyr-group-by.R | 2 +-
 r/R/dplyr.R          | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 6244b4f02b2..1b17a297a87 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -73,7 +73,12 @@ restore_dplyr_features <- function(df, query) {
     }
   } else {
     # This is a Table, via compute() or collect(as_data_frame = FALSE)
-    df$metadata$r$attributes$.group_vars <- query$group_by_vars
+    if (length(query$group_by_vars) > 0) {
+      gv <- query$group_by_vars
+    } else {
+      gv <- NULL
+    }
+    df$metadata$r$attributes$.group_vars <- gv
   }
   df
 }
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index 9524331cfbc..57cf417c9ad 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -54,7 +54,7 @@ group_by.Dataset <- group_by.ArrowTabular <- group_by.RecordBatchReader <- group
 groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
 groups.Dataset <- groups.ArrowTabular <- groups.RecordBatchReader <- groups.arrow_dplyr_query
 
-group_vars.arrow_dplyr_query <- function(x) x$group_by_vars %||% character()
+group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
 group_vars.Dataset <- function(x) character()
 group_vars.RecordBatchReader <- function(x) character()
 group_vars.ArrowTabular <- function(x) {
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index c85b2714025..0d516022736 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -43,7 +43,7 @@ arrow_dplyr_query <- function(.data) {
     # If dplyr is not available, or if the input doesn't have a group_vars
     # method, assume no group vars
     dplyr::group_vars(.data),
-    error = function(e) NULL
+    error = function(e) character()
   )
 
   if (inherits(.data, "data.frame")) {

From f6d2a0d4d1490621d3aa7330d376b3dc67611155 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 12/22] fix

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr.R | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 0d516022736..2db3c89d3ce 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -50,9 +50,6 @@ arrow_dplyr_query <- function(.data) {
     .data <- Table$create(.data)
   }
 
-  # Remove group vars metadata from the Table
-  .data$metadata$r$attributes$.group_vars <- NULL
-
   # Evaluating expressions on a dataset with duplicated fieldnames will error
   dupes <- duplicated(names(.data))
   if (any(dupes)) {

From da80d7657cfa92b4ec4dae29e87724c454e64382 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 13/22] use as_arrow_table instead of collect because of Table
 with 0 columns handling

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 2db3c89d3ce..1cb0c8a91bd 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -183,7 +183,7 @@ dim.arrow_dplyr_query <- function(x) {
     # Query on in-memory Table, so evaluate the filter
     # Don't need any columns
     x <- select.arrow_dplyr_query(x, NULL)
-    rows <- nrow(compute.arrow_dplyr_query(x))
+    rows <- nrow(as_arrow_table(x))
   }
   c(rows, cols)
 }

From 3012a54430f699967eb9fc4f93aafd0cbea2b698 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 14/22] fix typo

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 1cb0c8a91bd..0351afe0d45 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -49,7 +49,6 @@ arrow_dplyr_query <- function(.data) {
   if (inherits(.data, "data.frame")) {
     .data <- Table$create(.data)
   }
-
   # Evaluating expressions on a dataset with duplicated fieldnames will error
   dupes <- duplicated(names(.data))
   if (any(dupes)) {

From d0c545fa1944c2715f02849f8bf67781ec77f861 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 15/22] add tests

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-group-by.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 9bb6aa9600d..601db1911d7 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -196,6 +196,20 @@ test_that("group_by() with .add", {
       collect(),
     tbl
   )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(.add = FALSE) %>%
+      collect(),
+    tbl %>%
+      group_by(dbl2)
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(.add = TRUE) %>%
+      collect(),
+    tbl %>%
+      group_by(dbl2)
+  )
   compare_dplyr_binding(
     .input %>%
       group_by(chr, .add = FALSE) %>%

From ce5d581611ec4a7b873f6346393ae69d25f90049 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 16/22] add tests

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-group-by.R | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 601db1911d7..82401801c16 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -62,6 +62,22 @@ test_that("ungroup", {
       collect(),
     tbl
   )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(chr, .add = FALSE) %>%
+      ungroup() %>%
+      collect(),
+    tbl %>%
+      group_by(int)
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(chr, .add = TRUE) %>%
+      ungroup() %>%
+      collect(),
+    tbl %>%
+      group_by(int)
+  )
 
   # to confirm that the above expectation is actually testing what we think it's
   # testing, verify that compare_dplyr_binding() distinguishes between grouped and

From e117e8ca2e2fa527af8982415c24505f6e05715b Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 17/22] ensure not to convert to a grouped_df

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr-collect.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 1b17a297a87..8098c99ccd4 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -27,6 +27,8 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
       augment_io_error_msg(e, call, schema = x$.data$schema)
     }
   )
+  # Remove grouping metadata from the Table before converting to a data frame
+  out$metadata$r$attributes$.group_vars <- NULL
 
   if (as_data_frame) {
     out <- as.data.frame(out)

From 36674d1f5563ef8c82ccda4261719c458f3d4449 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 18/22] change to manipulate groups only when necessary

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr-collect.R | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 8098c99ccd4..a97428a19b4 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -27,8 +27,10 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
       augment_io_error_msg(e, call, schema = x$.data$schema)
     }
   )
-  # Remove grouping metadata from the Table before converting to a data frame
-  out$metadata$r$attributes$.group_vars <- NULL
+  # Ungroup the Table before converting this to a data frame
+  if (length(group_vars.ArrowTabular(out))) {
+    out <- ungroup.ArrowTabular(out)
+  }
 
   if (as_data_frame) {
     out <- as.data.frame(out)
@@ -64,23 +66,22 @@ restore_dplyr_features <- function(df, query) {
   # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
   # After calling collect(), make sure these features are carried over
 
-  if (is.data.frame(df)) {
-    # Preserve groupings, if present
-    if (length(query$group_by_vars) > 0) {
-      df <- dplyr::grouped_df(
+  if (length(dplyr::group_vars(df))) {
+    stop("df must be ungrouped", call. = FALSE)
+  }
+
+  if (length(dplyr::group_vars(query))) {
+    if (is.data.frame(df)) {
+      # Preserve groupings, if present
+      df <- dplyr::group_by(
         df,
-        dplyr::group_vars(query),
-        drop = dplyr::group_by_drop_default(query)
+        !!!syms(dplyr::group_vars(query)),
+        .drop = dplyr::group_by_drop_default(query)
       )
-    }
-  } else {
-    # This is a Table, via compute() or collect(as_data_frame = FALSE)
-    if (length(query$group_by_vars) > 0) {
-      gv <- query$group_by_vars
     } else {
-      gv <- NULL
+      # This is a Table, via compute() or collect(as_data_frame = FALSE)
+      df$metadata$r$attributes$.group_vars <- dplyr::group_vars(query)
     }
-    df$metadata$r$attributes$.group_vars <- gv
   }
   df
 }

From 8b5ef460d9eea4ff9f392c4e118c56e57d3f9bff Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:01:07 +0000
Subject: [PATCH 19/22] separate the test case

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-group-by.R | 33 ++++++++++++++------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 82401801c16..319eec25ed7 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -62,6 +62,24 @@ test_that("ungroup", {
       collect(),
     tbl
   )
+
+  # to confirm that the above expectation is actually testing what we think it's
+  # testing, verify that compare_dplyr_binding() distinguishes between grouped and
+  # ungrouped tibbles
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        group_by(chr) %>%
+        select(int, chr) %>%
+        (function(x) if (inherits(x, "tbl_df")) ungroup(x) else x) %>%
+        filter(int > 5) %>%
+        collect(),
+      tbl
+    )
+  )
+})
+
+test_that("Groups before conversion to a Table must not be restored after collect() (ARROW-17737)", {
   compare_dplyr_binding(
     .input %>%
       group_by(chr, .add = FALSE) %>%
@@ -78,21 +96,6 @@ test_that("ungroup", {
     tbl %>%
       group_by(int)
   )
-
-  # to confirm that the above expectation is actually testing what we think it's
-  # testing, verify that compare_dplyr_binding() distinguishes between grouped and
-  # ungrouped tibbles
-  expect_error(
-    compare_dplyr_binding(
-      .input %>%
-        group_by(chr) %>%
-        select(int, chr) %>%
-        (function(x) if (inherits(x, "tbl_df")) ungroup(x) else x) %>%
-        filter(int > 5) %>%
-        collect(),
-      tbl
-    )
-  )
 })
 
 test_that("group_by then rename", {

From 8a1f7cdb7996fa5ca49b474138ca6f87e67f88df Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Fri, 7 Oct 2022 22:22:42 +0000
Subject: [PATCH 20/22] fix rebasing

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/tests/testthat/test-dplyr-query.R | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R
index 6a8cd851ca6..db9a3bb30d0 100644
--- a/r/tests/testthat/test-dplyr-query.R
+++ b/r/tests/testthat/test-dplyr-query.R
@@ -597,33 +597,6 @@ test_that("compute() on a grouped query returns a Table with groups in metadata"
     tbl %>%
       group_by(int)
   )
-  expect_equal(
-    collect(tab1),
-    tbl %>%
-      group_by(int)
-  )
-})
-
-test_that("collect() is identical to compute() %>% collect()", {
-  tab1 <- tbl %>%
-    arrow_table()
-  adq1 <- tab1 %>%
-    group_by(int)
-
-  expect_equal(
-    tab1 %>%
-      compute() %>%
-      collect(),
-    tab1 %>%
-      collect()
-  )
-  expect_equal(
-    adq1 %>%
-      compute() %>%
-      collect(),
-    adq1 %>%
-      collect()
-  )
 })
 
 test_that("collect() is identical to compute() %>% collect()", {

From 72a5ab6cd10ed4975e4db0f1db3b0ff64e9904b4 Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Tue, 11 Oct 2022 12:43:19 +0000
Subject: [PATCH 21/22] ensure ungroup .data if it is a Table

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr-collect.R | 4 ----
 r/R/dplyr.R         | 5 +++++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index a97428a19b4..8a08f7f0448 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -27,10 +27,6 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
       augment_io_error_msg(e, call, schema = x$.data$schema)
     }
   )
-  # Ungroup the Table before converting this to a data frame
-  if (length(group_vars.ArrowTabular(out))) {
-    out <- ungroup.ArrowTabular(out)
-  }
 
   if (as_data_frame) {
     out <- as.data.frame(out)
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 0351afe0d45..010cb7c5ce1 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -49,6 +49,11 @@ arrow_dplyr_query <- function(.data) {
   if (inherits(.data, "data.frame")) {
     .data <- Table$create(.data)
   }
+  # If .data is a Table, it must be ungrouped (ARROW-17737)
+  if (length(group_vars.ArrowTabular(.data))) {
+    .data <- ungroup.ArrowTabular(.data)
+  }
+
   # Evaluating expressions on a dataset with duplicated fieldnames will error
   dupes <- duplicated(names(.data))
   if (any(dupes)) {

From a7062684aa827387c0237bae6e14c159c0ece92b Mon Sep 17 00:00:00 2001
From: SHIMA Tatsuya <ts1s1andn@gmail.com>
Date: Thu, 13 Oct 2022 10:45:32 +0000
Subject: [PATCH 22/22] more simplify

Signed-off-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
---
 r/R/dplyr-collect.R | 7 ++-----
 r/R/dplyr.R         | 5 +++--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 8a08f7f0448..5640565b153 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -62,17 +62,14 @@ restore_dplyr_features <- function(df, query) {
   # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
   # After calling collect(), make sure these features are carried over
 
-  if (length(dplyr::group_vars(df))) {
-    stop("df must be ungrouped", call. = FALSE)
-  }
-
   if (length(dplyr::group_vars(query))) {
     if (is.data.frame(df)) {
       # Preserve groupings, if present
       df <- dplyr::group_by(
         df,
         !!!syms(dplyr::group_vars(query)),
-        .drop = dplyr::group_by_drop_default(query)
+        .drop = dplyr::group_by_drop_default(query),
+        .add = FALSE
       )
     } else {
       # This is a Table, via compute() or collect(as_data_frame = FALSE)
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 010cb7c5ce1..c7b58bc0275 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -49,8 +49,9 @@ arrow_dplyr_query <- function(.data) {
   if (inherits(.data, "data.frame")) {
     .data <- Table$create(.data)
   }
-  # If .data is a Table, it must be ungrouped (ARROW-17737)
-  if (length(group_vars.ArrowTabular(.data))) {
+  # ARROW-17737: If .data is a Table, remove groups from metadata
+  # (we've already grabbed the groups above)
+  if (inherits(.data, "ArrowTabular")) {
     .data <- ungroup.ArrowTabular(.data)
   }