From 8947a0a50321909effef9f5dc111a9e65cb54daf Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Tue, 3 Sep 2019 10:20:09 +0800
Subject: [PATCH 1/3] Correctly deal with {}-wrapped by

---
 NEWS.md               |  2 ++
 R/data.table.R        | 44 +++++++++++++++++++++----------------------
 inst/tests/tests.Rraw |  4 ++++
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 65dfb8f3a8..ea8ce30e58 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -259,6 +259,8 @@
 
 38. `rbindlist` now returns correct idcols for lists with different length vectors, [#3785](https://github.com/Rdatatable/data.table/issues/3785), [#3786](https://github.com/Rdatatable/data.table/pull/3786). Thanks to @shrektan for the report and fix.
 
+39. `by` correctly handles extended expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/R/data.table.R b/R/data.table.R
index eb9bc40379..ab02def4b2 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -842,29 +842,29 @@ replace_order = function(isub, verbose, env) {
         }
         tt = vapply_1i(byval,length)
         if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).")
-        if (is.null(bynames)) bynames = rep.int("",length(byval))
-        if (any(bynames=="") && !bynull) {
-          for (jj in seq_along(bynames)) {
-            if (bynames[jj]=="") {
-              # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2
-              byvars = all.vars(bysubl[[jj+1L]], functions = TRUE)
-              if (length(byvars) == 1L) tt = byvars
-              else {
-                # take the first variable that is (1) not eval (#3758) and (2) starts with a character that can't start a variable name
-                tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE)
-                # byvars but exclude functions or `0`+`1` becomes `+`
-                tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L]
-              }
-              # fix for #497
-              if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) {
-                bynames[jj] = deparse(bysubl[[jj+1L]])
-                if (verbose)
-                  cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt,
-                      "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="")
-              }
-              else bynames[jj] = tt
-              # if user doesn't like this inferred name, user has to use by=list() to name the column
+        if (is.null(bynames)) bynames = rep.int("", length(byval))
+        if (length(idx <- which(!nzchar(bynames))) && !bynull) {
+          # TODO: improve this and unify auto-naming of jsub and bysub
+          if (is.name(bysubl[[1L]]) && bysubl[[1L]] == '{') bysubl = bysubl[[length(bysubl)]] # fix for #3156
+          for (jj in idx) {
+            # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2
+            byvars = all.vars(bysubl[[jj+1L]], functions = TRUE)
+            if (length(byvars) == 1L) tt = byvars
+            else {
+              # take the first variable that is (1) not eval (#3758) and (2) starts with a character that can't start a variable name
+              tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE)
+              # byvars but exclude functions or `0`+`1` becomes `+`
+              tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L]
+            }
+            # fix for #497
+            if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) {
+              bynames[jj] = deparse(bysubl[[jj+1L]])
+              if (verbose)
+                cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt,
+                    "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="")
             }
+            else bynames[jj] = tt
+            # if user doesn't like this inferred name, user has to use by=list() to name the column
           }
           # Fix for #1334
           if (any(duplicated(bynames))) {
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 78514cae0f..cd680cd6f6 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15870,6 +15870,10 @@ test(2097.1, DT[.(NA_character_)], data.table(fac=NA_character_, v=2L, key="fac"
 test(2097.2, DT[J(NA)],          error="Factor columns must join to factor or character columns")
 test(2097.3, DT[J(NA_integer_)], error="Factor columns must join to factor or character columns")
 
+# bug in autonaming by #3156
+library(data.table)
+DT = data.table(State=c("ERROR", "COMPLETED", "ERROR"), ExitCode=c(1, 0, 2))
+test(2098, DT[ , list(count=.N), by={list(State, ExitCode)}], DT[ , count := 1L])
 
 ###################################
 #  Add new tests above this line  #

From 1730b2d1fc0f904d698b66920e3a5542804cc6ff Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 11 Sep 2019 18:53:15 -0700
Subject: [PATCH 2/3] news item tweak, and dummy comment to help visual diff
 show indent-only changes

---
 NEWS.md        | 2 +-
 R/data.table.R | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 491c514f24..7a559dc5f8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -302,7 +302,7 @@
 
 41. Using `get`/`mget` in `j` could cause `.SDcols` to be ignored or reordered, [#1744](https://github.com/Rdatatable/data.table/issues/1744), [#1965](https://github.com/Rdatatable/data.table/issues/1965), [#2036](https://github.com/Rdatatable/data.table/issues/2036), and [#2946](https://github.com/Rdatatable/data.table/issues/2946). Thanks @franknarf1, @MichaelChirico, @TonyBonen, and Steffen J. (StackOverflow) for the reports.
 
-39. `by` correctly handles extended expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report.
+42. `DT[...,by={...}]` now handles expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report.
 
 #### NOTES
 
diff --git a/R/data.table.R b/R/data.table.R
index 033602ae6f..7c87fefb4f 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -817,7 +817,7 @@ replace_dot_alias = function(e) {
         }
         tt = vapply_1i(byval,length)
         if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).")
-        if (is.null(bynames)) bynames = rep.int("", length(byval))
+        if (is.null(bynames)) bynames = rep.int("",length(byval))
         if (length(idx <- which(!nzchar(bynames))) && !bynull) {
           # TODO: improve this and unify auto-naming of jsub and bysub
           if (is.name(bysubl[[1L]]) && bysubl[[1L]] == '{') bysubl = bysubl[[length(bysubl)]] # fix for #3156
@@ -830,14 +830,14 @@ replace_dot_alias = function(e) {
               tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE)
               # byvars but exclude functions or `0`+`1` becomes `+`
               tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L]
-            }
+            } # dummy comment to help visual diff show just indent changes; can delete this dummy comment
             # fix for #497
             if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) {
               bynames[jj] = deparse(bysubl[[jj+1L]])
               if (verbose)
                 cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt,
                     "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="")
-            }
+            } # dummy comment to help visual diff show just indent changes; can delete this dummy comment
             else bynames[jj] = tt
             # if user doesn't like this inferred name, user has to use by=list() to name the column
           }

From 22651b27dae86c09fcfe9f063dd2fd71010cc667 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 11 Sep 2019 19:00:48 -0700
Subject: [PATCH 3/3] remove dummy comments

---
 R/data.table.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 7c87fefb4f..a8498899f5 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -830,14 +830,14 @@ replace_dot_alias = function(e) {
               tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE)
               # byvars but exclude functions or `0`+`1` becomes `+`
               tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L]
-            } # dummy comment to help visual diff show just indent changes; can delete this dummy comment
+            }
             # fix for #497
             if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) {
               bynames[jj] = deparse(bysubl[[jj+1L]])
               if (verbose)
                 cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt,
                     "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="")
-            } # dummy comment to help visual diff show just indent changes; can delete this dummy comment
+            }
             else bynames[jj] = tt
             # if user doesn't like this inferred name, user has to use by=list() to name the column
           }