From a4205e499c23ec09256c51ec760daf4260f1f97a Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Thu, 1 Aug 2019 10:21:57 +0200
Subject: [PATCH 1/5] fix autonaaming issue in groupings sets for special
 symbols, closes #3653

---
 NEWS.md               |  2 ++
 R/groupingsets.R      | 13 ++++++++++---
 inst/tests/tests.Rraw | 10 ++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 411a589857..1ff2688fb6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -198,6 +198,8 @@
 
 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report.
 
+25. `groupingsets` functions will now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/R/groupingsets.R b/R/groupingsets.R
index e1dda42654..954d007034 100644
--- a/R/groupingsets.R
+++ b/R/groupingsets.R
@@ -67,6 +67,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
   # input arguments handling
   jj = if (!missing(jj)) jj else substitute(j)
   av = all.vars(jj, TRUE)
+  jsym = if (length(av)==1L) {if (".N"==av) "N" else if (".I"==av) "I" else if (".GRP"==av) "GRP"} # workaround for autonamed columns in grand total #3653
   if (":=" %chin% av)
     stop("Expression passed to grouping sets function must not update by reference. Use ':=' on results of your grouping function.")
   if (missing(.SDcols))
@@ -97,13 +98,19 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
     stop("Using integer64 class columns require to have 'bit64' package installed.") # nocov
   int64.by.cols = intersect(int64.cols, by)
   # aggregate function called for each grouping set
-  aggregate.set = function(by.set) {
+  aggregate.set = function(by.set, jsym) {
     if (length(by.set)) {
       r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set]
     } else {
+      ## workaround for grand total single var as data.table too, change to drop=FALSE after #648 solved
       r = if (length(.SDcols)) x[, eval(jj), .SDcols=.SDcols] else x[, eval(jj)]
-      # workaround for grand total single var as data.table too, change to drop=FALSE after #648 solved
       if (!is.data.table(r)) r = setDT(list(r))
+      if (!is.null(jsym)) {
+        if (!"V1" %in% names(r))
+          stop("internal error in groupingsets, V1 is not present, please report") # nocov
+        else
+          setnames(r, "V1", jsym)
+      }
     }
     if (id) {
       # integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE
@@ -121,6 +128,6 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
   # actually processing everything here
   rbindlist(c(
     list(empty), # 0 rows template for colorder and type
-    lapply(sets, aggregate.set) # all aggregations
+    lapply(sets, aggregate.set, jsym) # all aggregations
   ), use.names=TRUE, fill=TRUE)
 }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index dbfb740e69..f396f29635 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15428,6 +15428,16 @@ test(2071.10, dcast(data.table(a=1, b=1, l=list(list(1))), a ~ b, value.var='l')
 test(2071.11, dcast(data.table(a = 1, b = 2, c = 3), a ~ b, value.var = 'c', fill = '2'),
      data.table(a=1, `2`=3, key='a'))
 
+# groupingsets j=.N by character(0) set #3653
+d = data.table(x = c("a", "a", "b"))
+test(2073.01, groupingsets(d, j = .N, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), N=c(2L,1L,3L)))
+test(2073.02, groupingsets(d, j = .N, by = "x", sets = list(character())), data.table(x=NA_character_, N=3L))
+test(2073.03, groupingsets(d, j = .GRP, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), GRP=c(1L,2L,1L)))
+test(2073.04, groupingsets(d, j = .GRP, by = "x", sets = list(character())), data.table(x=NA_character_, GRP=1L))
+test(2073.05, groupingsets(d, j = .I, by = "x", sets = list("x", character())), data.table(x=c("a","a","b",rep(NA_character_,3L)), I=c(1:3,1:3)))
+test(2073.06, groupingsets(d, j = .I, by = "x", sets = list(character())), data.table(x=rep(NA_character_,3L), I=1:3))
+
+
 ###################################
 #  Add new tests above this line  #
 ###################################

From eb07b988ee70d0363c2dcff2a89e98c59a6ae850 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 5 Aug 2019 23:26:21 +0200
Subject: [PATCH 2/5] remove workarounds thanks to #3270

---
 R/groupingsets.R | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/R/groupingsets.R b/R/groupingsets.R
index 954d007034..3cc66bac8c 100644
--- a/R/groupingsets.R
+++ b/R/groupingsets.R
@@ -67,21 +67,12 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
   # input arguments handling
   jj = if (!missing(jj)) jj else substitute(j)
   av = all.vars(jj, TRUE)
-  jsym = if (length(av)==1L) {if (".N"==av) "N" else if (".I"==av) "I" else if (".GRP"==av) "GRP"} # workaround for autonamed columns in grand total #3653
   if (":=" %chin% av)
     stop("Expression passed to grouping sets function must not update by reference. Use ':=' on results of your grouping function.")
   if (missing(.SDcols))
     .SDcols = if (".SD" %chin% av) setdiff(names(x), by) else NULL
   # 0 rows template data.table to keep colorder and type
-  if (length(by)) {
-    empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by]
-  } else {
-    empty = if (length(.SDcols)) x[0L, eval(jj), .SDcols=.SDcols] else x[0L, eval(jj)]
-    if (!is.data.table(empty)) {
-      if (length(empty)>0) empty = empty[0L] # fix for #3173 when no grouping and j constant
-      empty = setDT(list(empty)) # improve after #648, see comment in aggregate.set
-    }
-  }
+  empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by]
   if (id && "grouping" %chin% names(empty)) # `j` could have been evaluated to `grouping` field
     stop("When using `id=TRUE` the 'j' expression must not evaluate to column named 'grouping'.")
   if (anyDuplicated(names(empty)) > 0L)
@@ -99,19 +90,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
   int64.by.cols = intersect(int64.cols, by)
   # aggregate function called for each grouping set
   aggregate.set = function(by.set, jsym) {
-    if (length(by.set)) {
-      r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set]
-    } else {
-      ## workaround for grand total single var as data.table too, change to drop=FALSE after #648 solved
-      r = if (length(.SDcols)) x[, eval(jj), .SDcols=.SDcols] else x[, eval(jj)]
-      if (!is.data.table(r)) r = setDT(list(r))
-      if (!is.null(jsym)) {
-        if (!"V1" %in% names(r))
-          stop("internal error in groupingsets, V1 is not present, please report") # nocov
-        else
-          setnames(r, "V1", jsym)
-      }
-    }
+    r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set]
     if (id) {
       # integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE
       # 3267: strtoi("", base = 2L) output apparently unstable across platforms
@@ -128,6 +107,6 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
   # actually processing everything here
   rbindlist(c(
     list(empty), # 0 rows template for colorder and type
-    lapply(sets, aggregate.set, jsym) # all aggregations
+    lapply(sets, aggregate.set)
   ), use.names=TRUE, fill=TRUE)
 }

From 34be1179008bcbd10f32bb9571c8c29716b6f904 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 5 Aug 2019 23:44:11 +0200
Subject: [PATCH 3/5] rm leftover

---
 R/groupingsets.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/groupingsets.R b/R/groupingsets.R
index 3cc66bac8c..87d7ec015b 100644
--- a/R/groupingsets.R
+++ b/R/groupingsets.R
@@ -89,7 +89,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
     stop("Using integer64 class columns require to have 'bit64' package installed.") # nocov
   int64.by.cols = intersect(int64.cols, by)
   # aggregate function called for each grouping set
-  aggregate.set = function(by.set, jsym) {
+  aggregate.set = function(by.set) {
     r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set]
     if (id) {
       # integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE

From 857c6aa06eb93ee17cf76846757f30a7f81109e3 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 5 Aug 2019 23:44:40 +0200
Subject: [PATCH 4/5] keepp comment

---
 R/groupingsets.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/groupingsets.R b/R/groupingsets.R
index 87d7ec015b..9f2a6b63fb 100644
--- a/R/groupingsets.R
+++ b/R/groupingsets.R
@@ -107,6 +107,6 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
   # actually processing everything here
   rbindlist(c(
     list(empty), # 0 rows template for colorder and type
-    lapply(sets, aggregate.set)
+    lapply(sets, aggregate.set) # all aggregations
   ), use.names=TRUE, fill=TRUE)
 }

From b8ad229e37569ba426bb96158d87d0da1bf2a0df Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Fri, 23 Aug 2019 20:23:52 -0700
Subject: [PATCH 5/5] merge test number increasing

---
 inst/tests/tests.Rraw | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 43acdaf4bd..af01a1addd 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15738,15 +15738,14 @@ if (test_bit64) {
   test(2083.2, rbind(data.table(a=1:2, b=as.integer64(c(1,NA))), data.table(a=3L), fill=TRUE)$b, as.integer64(c(1, NA, NA)))
 }
 
-
 # groupingsets j=.N by character(0) set #3653
 d = data.table(x = c("a", "a", "b"))
-test(2073.01, groupingsets(d, j = .N, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), N=c(2L,1L,3L)))
-test(2073.02, groupingsets(d, j = .N, by = "x", sets = list(character())), data.table(x=NA_character_, N=3L))
-test(2073.03, groupingsets(d, j = .GRP, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), GRP=c(1L,2L,1L)))
-test(2073.04, groupingsets(d, j = .GRP, by = "x", sets = list(character())), data.table(x=NA_character_, GRP=1L))
-test(2073.05, groupingsets(d, j = .I, by = "x", sets = list("x", character())), data.table(x=c("a","a","b",rep(NA_character_,3L)), I=c(1:3,1:3)))
-test(2073.06, groupingsets(d, j = .I, by = "x", sets = list(character())), data.table(x=rep(NA_character_,3L), I=1:3))
+test(2084.01, groupingsets(d, j = .N, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), N=c(2L,1L,3L)))
+test(2084.02, groupingsets(d, j = .N, by = "x", sets = list(character())), data.table(x=NA_character_, N=3L))
+test(2084.03, groupingsets(d, j = .GRP, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), GRP=c(1L,2L,1L)))
+test(2084.04, groupingsets(d, j = .GRP, by = "x", sets = list(character())), data.table(x=NA_character_, GRP=1L))
+test(2084.05, groupingsets(d, j = .I, by = "x", sets = list("x", character())), data.table(x=c("a","a","b",rep(NA_character_,3L)), I=c(1:3,1:3)))
+test(2084.06, groupingsets(d, j = .I, by = "x", sets = list(character())), data.table(x=rep(NA_character_,3L), I=1:3))
 
 
 ###################################