From 0020358169dfd459c7a59dcf687632e6ac05363c Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Tue, 14 May 2019 16:02:13 +0530
Subject: [PATCH] group by NULL and zero rows DT now consistent, closes #3530

---
 NEWS.md               |  2 ++
 R/data.table.R        | 20 +++++++++++---------
 inst/tests/tests.Rraw |  6 ++++++
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index f06fde9693..99329253f0 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -89,6 +89,8 @@
 
 6. Printing could occur unexpectedly when code is run with `source`, [#2369](https://github.com/Rdatatable/data.table/issues/2369). Thanks to @jan-glx for the report and reproducible example.
 
+7. Grouping by `NULL` on zero rows data.table now behaves consistently to non-zero rows data.table, [#3530](https://github.com/Rdatatable/data.table/issues/3530). Thanks to @SymbolixAU for the report and reproducible example.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/R/data.table.R b/R/data.table.R
index 13d9b8fb73..209c23fc34 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -246,6 +246,7 @@ replace_dot_alias <- function(e) {
     by=bysub= if (missing(by)) NULL else substitute(by)
     keyby=FALSE
   }
+  bynull = !missingby && is.null(by) #3530
   byjoin = !is.null(by) && is.symbol(bysub) && bysub==".EACHI"
   if (missing(i) && missing(j)) {
     tt_isub = substitute(i)
@@ -709,7 +710,6 @@ replace_dot_alias <- function(e) {
   } else {  # missing(i)
     i = NULL
   }
-
   byval = NULL
   xnrow = nrow(x)
   xcols = xcolsAns = icols = icolsAns = integer()
@@ -882,12 +882,12 @@ replace_dot_alias <- function(e) {
           }
         }
 
-        if (is.null(irows))
+        if (is.null(irows)) {
           if (is.call(bysub) && length(bysub) == 3L && bysub[[1L]] == ":" && is.name(bysub[[2L]]) && is.name(bysub[[3L]])) {
             byval = eval(bysub, setattr(as.list(seq_along(x)), 'names', names(x)), parent.frame())
             byval = as.list(x)[byval]
           } else byval = eval(bysub, x, parent.frame())
-        else {
+        } else {
           # length 0 when i returns no rows
           if (!is.integer(irows)) stop("Internal error: irows isn't integer") # nocov
           # Passing irows as i to x[] below has been troublesome in a rare edge case.
@@ -935,13 +935,14 @@ replace_dot_alias <- function(e) {
           }
         }
         if (!is.list(byval)) stop("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)")
-        for (jj in seq_len(length(byval))) {
+        if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)()
+        if (!bynull) for (jj in seq_len(length(byval))) {
           if (!typeof(byval[[jj]]) %chin% c("integer","logical","character","double")) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]")
         }
         tt = vapply_1i(byval,length)
         if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).")
         if (is.null(bynames)) bynames = rep.int("",length(byval))
-        if (any(bynames=="")) {
+        if (any(bynames=="") && !bynull) {
           for (jj in seq_along(bynames)) {
             if (bynames[jj]=="") {
               # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2
@@ -1272,9 +1273,8 @@ replace_dot_alias <- function(e) {
     }
     assign(sym, get(getName, parent.frame()), SDenv)
   }
-
   # hash=TRUE (the default) does seem better as expected using e.g. test 645.  TO DO experiment with 'size' argument
-  if (missingby || (!byjoin && !length(byval))) {
+  if (missingby || bynull || (!byjoin && !length(byval))) {
     # No grouping: 'by' = missing | NULL | character() | "" | list()
     # Considered passing a one-group to dogroups but it doesn't do the recycling of i within group, that's done here
     if (length(ansvars)) {
@@ -1399,7 +1399,10 @@ replace_dot_alias <- function(e) {
 
       # Fix for #813 and #758. Ex: DT[c(FALSE, FALSE), list(integer(0L), y)]
       # where DT = data.table(x=1:2, y=3:4) should return an empty data.table!!
-      if (!is.null(irows) && (identical(irows, integer(0L)) || (!anyNA(irows) && all(irows==0L)))) ## anyNA() because all() returns NA (not FALSE) when irows is all-NA. TODO: any way to not check all 'irows' values?
+      if (!is.null(irows) && `||`(
+          identical(irows, integer(0L)) && !bynull,
+          length(irows) && !anyNA(irows) && all(irows==0L) ## anyNA() because all() returns NA (not FALSE) when irows is all-NA. TODO: any way to not check all 'irows' values?
+          ))
         if (is.atomic(jval)) jval = jval[0L] else jval = lapply(jval, `[`, 0L)
       if (is.atomic(jval)) {
         setattr(jval,"names",NULL)
@@ -1844,7 +1847,6 @@ replace_dot_alias <- function(e) {
   # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x
   # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__)
   # TO DO: setkey could mark the key whether it is unique or not.
-
   if (!is.null(lhs)) {
     if (any(names(x)[cols] %chin% key(x)))
       setkey(x,NULL)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 3521b397fd..9d4685dbf9 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -14568,6 +14568,12 @@ if (test_bit64) {
   options(old)
 }
 
+# zero rows table group by NULL #3530
+dt = data.table(x = c("a","b","a","b"), y = c(1,2,3,4))
+test(2040.1, dt[0, .N, by = NULL], data.table(N=0L))
+f = function(...) NULL
+test(2040.2, dt[0, .N, by = f()], data.table(N=0L))
+
 
 ###################################
 #  Add new tests above this line  #