Rdatatable · mattdowle · Sep 12, 2019 · Sep 3, 2019 · Sep 12, 2019 · Sep 12, 2019
@@ -302,6 +302,8 @@
 
 41. Using `get`/`mget` in `j` could cause `.SDcols` to be ignored or reordered, [#1744](https://github.com/Rdatatable/data.table/issues/1744), [#1965](https://github.com/Rdatatable/data.table/issues/1965), [#2036](https://github.com/Rdatatable/data.table/issues/2036), and [#2946](https://github.com/Rdatatable/data.table/issues/2946). Thanks @franknarf1, @MichaelChirico, @TonyBonen, and Steffen J. (StackOverflow) for the reports.
 
+42. `DT[...,by={...}]` now handles expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.

@@ -818,28 +818,28 @@ replace_dot_alias = function(e) {
         tt = vapply_1i(byval,length)
         if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).")
         if (is.null(bynames)) bynames = rep.int("",length(byval))
-        if (any(bynames=="") && !bynull) {
-          for (jj in seq_along(bynames)) {
-            if (bynames[jj]=="") {
-              # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2
-              byvars = all.vars(bysubl[[jj+1L]], functions = TRUE)
-              if (length(byvars) == 1L) tt = byvars
-              else {
-                # take the first variable that is (1) not eval (#3758) and (2) starts with a character that can't start a variable name
-                tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE)
-                # byvars but exclude functions or `0`+`1` becomes `+`
-                tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L]
-              }
-              # fix for #497
-              if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) {
-                bynames[jj] = deparse(bysubl[[jj+1L]])
-                if (verbose)
-                  cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt,
-                      "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="")
-              }
-              else bynames[jj] = tt
-              # if user doesn't like this inferred name, user has to use by=list() to name the column
+        if (length(idx <- which(!nzchar(bynames))) && !bynull) {
+          # TODO: improve this and unify auto-naming of jsub and bysub
+          if (is.name(bysubl[[1L]]) && bysubl[[1L]] == '{') bysubl = bysubl[[length(bysubl)]] # fix for #3156
+          for (jj in idx) {
+            # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2
+            byvars = all.vars(bysubl[[jj+1L]], functions = TRUE)
+            if (length(byvars) == 1L) tt = byvars
+            else {
+              # take the first variable that is (1) not eval (#3758) and (2) starts with a character that can't start a variable name
+              tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE)
+              # byvars but exclude functions or `0`+`1` becomes `+`
+              tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L]
+            }
+            # fix for #497
+            if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) {
+              bynames[jj] = deparse(bysubl[[jj+1L]])
+              if (verbose)
+                cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt,
+                    "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="")
             }
+            else bynames[jj] = tt
+            # if user doesn't like this inferred name, user has to use by=list() to name the column
           }
           # Fix for #1334
           if (any(duplicated(bynames))) {

@@ -16060,6 +16060,10 @@ test(2108.11, first(df), df, notOutput="xts")
 test(2108.12, tail(df), df, notOutput="xts")
 options(old)
 
+# error in autonaming by={...}, #3156
+DT = data.table(State=c("ERROR", "COMPLETED", "ERROR"), ExitCode=c(1, 0, 2))
+test(2109, DT[ , list(count=.N), by={list(State, ExitCode)}], DT[ , count := 1L])
+
 
 ###################################
 #  Add new tests above this line  #