From 0b2e058e0f05e4d7fb51223b8e912266a2d95260 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 30 Aug 2019 02:55:43 +0800
Subject: [PATCH 01/13] clean up use of vapply

---
 R/data.table.R       | 21 +++++++++++----------
 R/fread.R            |  4 ++--
 R/groupingsets.R     |  2 +-
 R/print.data.table.R |  2 +-
 R/setops.R           |  5 ++---
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 43541e9afb..9e8868028e 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -76,7 +76,7 @@ data.table = function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, str
     }
   }
   if (isTRUE(stringsAsFactors)) {
-    for (j in which(vapply(ans, is.character, TRUE))) set(ans, NULL, j, as_factor(.subset2(ans, j)))
+    for (j in which(vapply_1b(ans, is.character))) set(ans, NULL, j, as_factor(.subset2(ans, j)))
     # as_factor is internal function in fread.R currently
   }
   alloc.col(ans)  # returns a NAMED==0 object, unlike data.frame()
@@ -881,7 +881,7 @@ replace_order = function(isub, verbose, env) {
           jvnames = gsub("^[.](N|I|GRP|BY)$","\\1",as.character(jsub))
           # jsub is list()ed after it's eval'd inside dogroups.
         }
-      } else if (is.call(jsub) && as.character(jsub[[1L]])[[1L]] %chin% c("list",".")) {
+      } else if (is.call(jsub) && as.character(jsub[[1L]])[1L] %chin% c("list", ".")) {
         jsub[[1L]] = quote(list)
         jsubl = as.list.default(jsub)  # TO DO: names(jsub) and names(jsub)="" seem to work so make use of that
         if (length(jsubl)>1L) {
@@ -1177,6 +1177,7 @@ replace_order = function(isub, verbose, env) {
     assign(sym, get(getName, parent.frame()), SDenv)
   }
   # hash=TRUE (the default) does seem better as expected using e.g. test 645.  TO DO experiment with 'size' argument
+  #browser()
   if (missingby || bynull || (!byjoin && !length(byval))) {
     # No grouping: 'by' = missing | NULL | character() | "" | list()
     # Considered passing a one-group to dogroups but it doesn't do the recycling of i within group, that's done here
@@ -1310,8 +1311,8 @@ replace_order = function(isub, verbose, env) {
         jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead?
       } else {
         if (is.null(jvnames)) jvnames=names(jval)
-        lenjval = vapply(jval, length, 0L)
-        nulljval = vapply(jval, is.null, FALSE)
+        lenjval = vapply_1i(jval, length)
+        nulljval = vapply_1b(jval, is.null)
         if (lenjval[1L]==0L || any(lenjval != lenjval[1L])) {
           jval = as.data.table.list(jval)   # does the vector expansion to create equal length vectors, and drops any NULL items
           jvnames = jvnames[!nulljval] # fix for #1477
@@ -1462,7 +1463,7 @@ replace_order = function(isub, verbose, env) {
   lockBinding(".iSD",SDenv)
 
   GForce = FALSE
-  if ( getOption("datatable.optimize")>=1 && (is.call(jsub) || (is.name(jsub) && as.character(jsub)[[1L]] %chin% c(".SD",".N"))) ) {  # Ability to turn off if problems or to benchmark the benefit
+  if ( getOption("datatable.optimize")>=1 && (is.call(jsub) || (is.name(jsub) && as.character(jsub)[1L] %chin% c(".SD", ".N"))) ) {  # Ability to turn off if problems or to benchmark the benefit
     # Optimization to reduce overhead of calling lapply over and over for each group
     oldjsub = jsub
     funi = 1L # Fix for #985
@@ -1621,7 +1622,7 @@ replace_order = function(isub, verbose, env) {
     if (getOption("datatable.optimize")>=2 && !is.data.table(i) && !byjoin && length(f__) && !length(lhs)) {
       if (!length(ansvars) && !use.I) {
         GForce = FALSE
-        if ( (is.name(jsub) && jsub == ".N") || (is.call(jsub) && length(jsub)==2L && length(as.character(jsub[[1L]])) && as.character(jsub[[1L]])[1L] == "list" && length(as.character(jsub[[2L]])) && as.character(jsub[[2L]])[1L] == ".N") ) {
+        if ( (is.name(jsub) && jsub == ".N") || (is.call(jsub) && length(jsub)==2L && jsub[[1L]]== "list" && jsub[[2L]] == ".N") ) {
           GForce = TRUE
           if (verbose) cat("GForce optimized j to '",deparse(jsub,width.cutoff=200L),"'\n",sep="")
         }
@@ -2315,7 +2316,7 @@ copy = function(x) {
   if (!is.data.table(x)) {
     # fix for #1476. TODO: find if a cleaner fix is possible..
     if (is.list(x)) {
-      anydt = vapply(x, is.data.table, TRUE, USE.NAMES=FALSE)
+      anydt = vapply_1b(x, is.data.table, use.names=FALSE)
       if (sum(anydt)) {
         newx[anydt] = lapply(newx[anydt], function(x) {
           .Call(C_unlock, x)
@@ -2618,7 +2619,7 @@ setDF = function(x, rownames=NULL) {
     }
     x
   } else {
-    n = vapply(x, length, 0L)
+    n = vapply_1i(x, length)
     mn = max(n)
     if (any(n<mn))
       stop("All elements in argument 'x' to 'setDF' must be of same length")
@@ -2694,7 +2695,7 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
                                   # fail for NULL columns will give helpful error at that point, #3480 and #3471
       if (inherits(x[[i]], "POSIXlt")) stop("Column ", i, " is of POSIXlt type. Please convert it to POSIXct using as.POSIXct and run setDT again. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
     }
-    n = vapply(x, length, 0L)
+    n = vapply_1i(x, length)
     n_range = range(n)
     if (n_range[1L] != n_range[2L]) {
       tbl = sort(table(n))
@@ -2909,7 +2910,7 @@ isReallyReal = function(x) {
   }
   if (length(i) == 0L) stop("Internal error in .isFastSubsettable. Please report to data.table developers") # nocov
   ## convert i to data.table with all combinations in rows.
-  if(length(i) > 1L && prod(vapply(i, length, integer(1L))) > 1e4){
+  if(length(i) > 1L && prod(vapply_1i(i, length)) > 1e4){
     ## CJ would result in more than 1e4 rows. This would be inefficient, especially memory-wise #2635
     if (verbose) {cat("Subsetting optimization disabled because the cross-product of RHS values exceeds 1e4, causing memory problems.\n");flush.console()}
     return(NULL)
diff --git a/R/fread.R b/R/fread.R
index 46fa812e7c..707697bfb6 100644
--- a/R/fread.R
+++ b/R/fread.R
@@ -312,9 +312,9 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir())
   if (stringsAsFactors) {
     if (is.double(stringsAsFactors)) { #2025
       should_be_factor = function(v) is.character(v) && uniqueN(v) < nr * stringsAsFactors
-      cols_to_factor = which(vapply(ans, should_be_factor, logical(1L)))
+      cols_to_factor = which(vapply_1b(ans, should_be_factor))
     } else {
-      cols_to_factor = which(vapply(ans, is.character, logical(1L)))
+      cols_to_factor = which(vapply_1b(ans, is.character))
     }
     if (verbose) cat("stringsAsFactors=", stringsAsFactors, " converted ", length(cols_to_factor), " column(s): ", brackify(names(ans)[cols_to_factor]), "\n", sep="")
     for (j in cols_to_factor) set(ans, j=j, value=as_factor(.subset2(ans, j)))
diff --git a/R/groupingsets.R b/R/groupingsets.R
index dadcbb0fe8..c39c6b2096 100644
--- a/R/groupingsets.R
+++ b/R/groupingsets.R
@@ -83,7 +83,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
     setcolorder(empty, c("grouping", by, setdiff(names(empty), c("grouping", by))))
   }
   # workaround for rbindlist fill=TRUE on integer64 #1459
-  int64.cols = vapply(empty, inherits, logical(1L), "integer64")
+  int64.cols = vapply_1b(empty, inherits, "integer64")
   int64.cols = names(int64.cols)[int64.cols]
   if (length(int64.cols) && !requireNamespace("bit64", quietly=TRUE))
     stop("Using integer64 class columns require to have 'bit64' package installed.") # nocov
diff --git a/R/print.data.table.R b/R/print.data.table.R
index ac82dcfafa..37c43f5e95 100644
--- a/R/print.data.table.R
+++ b/R/print.data.table.R
@@ -81,7 +81,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
       factor = "<fctr>", POSIXct = "<POSc>", logical = "<lgcl>",
       IDate = "<IDat>", integer64 = "<i64>", raw = "<raw>",
       expression = "<expr>", ordered = "<ord>")
-    classes = vapply(x, function(col) class(col)[1L], "", USE.NAMES=FALSE)
+    classes = vapply_1c(x, function(col) class(col)[1L], use.names=FALSE)
     abbs = unname(class_abb[classes])
     if ( length(idx <- which(is.na(abbs))) ) abbs[idx] = paste0("<", classes[idx], ">")
     toprint = rbind(abbs, toprint)
diff --git a/R/setops.R b/R/setops.R
index 1dce93702a..0c11032589 100644
--- a/R/setops.R
+++ b/R/setops.R
@@ -41,8 +41,7 @@ funique = function(x) {
   if (!identical(sort(names(x)), sort(names(y)))) stop("x and y must have the same column names")
   if (!identical(names(x), names(y))) stop("x and y must have the same column order")
   bad_types = c("raw", "complex", if (block_list) "list")
-  found = bad_types %chin% c(vapply(x, typeof, FUN.VALUE = ""),
-                             vapply(y, typeof, FUN.VALUE = ""))
+  found = bad_types %chin% c(vapply_1c(x, typeof), vapply_1c(y, typeof))
   if (any(found)) stop("unsupported column type", if (sum(found) > 1L) "s" else "",
                        " found in x or y: ", brackify(bad_types[found]))
   if (!identical(lapply(x, class), lapply(y, class))) stop("x and y must have the same column classes")
@@ -168,7 +167,7 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu
   if (ignore.row.order) {
     if (".seqn" %chin% names(target))
       stop("None of the datasets to compare should contain a column named '.seqn'")
-    bad.type = setNames(c("raw","complex","list") %chin% c(vapply(current, typeof, FUN.VALUE = ""), vapply(target, typeof, FUN.VALUE = "")), c("raw","complex","list"))
+    bad.type = setNames(c("raw","complex","list") %chin% c(vapply_1c(current, typeof), vapply_1c(target, typeof)), c("raw","complex","list"))
     if (any(bad.type))
       stop("Datasets to compare with 'ignore.row.order' must not have unsupported column types: ", brackify(names(bad.type)[bad.type]))
     if (between(tolerance, 0, sqrt(.Machine$double.eps), incbounds=FALSE)) {

From b5b014fd24cbc8357b0e149d044a1b12c588aecc Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 30 Aug 2019 04:05:48 +0800
Subject: [PATCH 02/13] Automatic naming of j output is more powerful

---
 NEWS.md               |  2 ++
 R/data.table.R        | 54 +++++++++++++++++++++++++++++--------------
 inst/tests/tests.Rraw | 11 +++++++--
 3 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 0b762ca3db..a278b82948 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -164,6 +164,8 @@
 
 24. More efficient optimization of many columns in `j` (e.g. from `.SD`), [#1470](https://github.com/Rdatatable/data.table/issues/1470). Thanks @Jorges1000 for the report.
 
+25. Auto-naming in `j` now handles more complicated expressions (e.g. in `{` or `if`) and is hence more consistent, [#2478](https://github.com/Rdatatable/data.table/issues/2478).
+
 #### BUG FIXES
 
 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
diff --git a/R/data.table.R b/R/data.table.R
index 9e8868028e..e1679cf83c 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -874,30 +874,16 @@ replace_order = function(isub, verbose, env) {
         setattr(byval, "names", bynames)  # byval is just a list not a data.table hence setattr not setnames
       }
 
+      #browser()
       jvnames = NULL
+      env = environment()
       if (is.name(jsub)) {
         # j is a single unquoted column name
         if (jsub!=".SD") {
           jvnames = gsub("^[.](N|I|GRP|BY)$","\\1",as.character(jsub))
           # jsub is list()ed after it's eval'd inside dogroups.
         }
-      } else if (is.call(jsub) && as.character(jsub[[1L]])[1L] %chin% c("list", ".")) {
-        jsub[[1L]] = quote(list)
-        jsubl = as.list.default(jsub)  # TO DO: names(jsub) and names(jsub)="" seem to work so make use of that
-        if (length(jsubl)>1L) {
-          jvnames = names(jsubl)[-1L]   # check list(a=sum(v),v)
-          if (is.null(jvnames)) jvnames = rep.int("", length(jsubl)-1L)
-          for (jj in seq.int(2L,length(jsubl))) {
-            if (jvnames[jj-1L] == "" && mode(jsubl[[jj]])=="name") {
-              if (jsubl[[jj]]=="") stop("Item ", jj-1L, " of the .() or list() passed to j is missing") #3507
-              jvnames[jj-1L] = gsub("^[.](N|I|GRP|BY)$", "\\1", deparse(jsubl[[jj]]))
-            }
-            # TO DO: if call to a[1] for example, then call it 'a' too
-          }
-          setattr(jsubl, "names", NULL)  # drops the names from the list so it's faster to eval the j for each group. We'll put them back afterwards on the result.
-          jsub = as.call(jsubl)
-        } # else empty list is needed for test 468: adding an empty list column
-      } # else maybe a call to transform or something which returns a list.
+      } else jsub = do_j_names(jsub, env) # else maybe a call to transform or something which returns a list.
       av = all.vars(jsub,TRUE)  # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c)
       use.I = ".I" %chin% av
       if (any(c(".SD","eval","get","mget") %chin% av)) {
@@ -3071,3 +3057,37 @@ isReallyReal = function(x) {
   names(on) = xCols
   return(list(on = on, ops = idx_op))
 }
+
+# function to handle auto-naming of j for potentially complicated expressions, #2478
+do_j_names = function(q, env) {
+  if (!is.call(q) || !is.name(q[[1L]])) return(q)
+  if (as.character(q[[1L]]) %chin% c('list', '.')) {
+    q[[1L]] = quote(list)
+    qlen = length(q)
+    if (qlen>1L) {
+      nm = names(q[-1L])   # check list(a=sum(v),v)
+      if (is.null(nm)) nm = rep.int("", qlen-1L)
+      for (jj in seq.int(2L, qlen)) {
+        if (nm[jj-1L] == "" && is.name(q[[jj]])) {
+          if (q[[jj]] == "") stop("Item ", jj-1L, " of the .() or list() passed to j is missing") #3507
+          nm[jj-1L] = gsub("^[.](N|I|GRP|BY)$", "\\1", deparse(q[[jj]]))
+        }
+        # TO DO: if call to a[1] for example, then call it 'a' too
+      }
+      assign('jvnames', nm, envir=env) # TODO: handle if() list(a, b) else list(b, a) better
+      setattr(q, "names", NULL)  # drops the names from the list so it's faster to eval the j for each group. We'll put them back afterwards on the result.
+    }
+    return(q) # else empty list is needed for test 468: adding an empty list column
+  }
+  if (q[[1L]] == '{') {
+    q[[length(q)]] = do_j_names(q[[length(q)]], env)
+    return(q)
+  }
+  if (q[[1L]] == 'if') {
+    #explicit NULL would return NULL, assigning NULL would delete that from the expression
+    if (!is.null(q[[3L]])) q[[3L]] = do_j_names(q[[3L]], env)
+    if (length(q) == 4L && !is.null(q[[4L]])) q[[4L]] = do_j_names(q[[4L]], env)
+    return(q)
+  }
+  return(q)
+}
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 8a308c19c8..e5e0c870a8 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -10218,11 +10218,11 @@ test(1725, DT, data.table(a=1:3, add0=NA_real_, add1=c(NA,NA,1.1), add2=c(NA,NA,
 DT = data.table(grp=rep(3:1,each=3), val=1:9)
 lastGrp = 0L
 test(1726.1, DT[, {ans=mean(val)+lastGrp; lastGrp<<-min(val); .(ans, .GRP)}, keyby=grp],
-           data.table(grp=1:3, V1=c(8,12,6), V2=1:3, key="grp") )
+           data.table(grp=1:3, ans=c(8,12,6), GRP=1:3, key="grp") )
 test(1726.2, lastGrp, 1L)
 lastGrp = -1L
 test(1726.3, DT[, {ans=mean(val)+lastGrp; lastGrp<<-min(val); .(ans, .GRP)}, by=grp],
-           data.table(grp=3:1, V1=c(1,6,12), V2=1:3) )
+           data.table(grp=3:1, ans=c(1,6,12), GRP=1:3) )
 test(1726.4, lastGrp, 7L)
 rm(lastGrp)
 
@@ -15828,6 +15828,13 @@ df2 = data.table(c=c(1, 1, 2, 2, 3), d=c(3, 4, 3, 5, 4))
 test(2092.3, copy(df2)[ , s := df1[.SD, on=.(a >= c, a <= d), sum(b), by=.EACHI]$V1],
      df2[ , s := c(1, 1, 1, 3, 1)])
 
+# #2478 auto-name j even in complicated expressions
+DT = data.table(a = c(1, 1, 2), b = 4:6)
+test(2093.1, DT[ , {b = b; .(a, b = b + 1)}], DT[ , .(a, b=b+1)])
+test(2093.2, DT[ , {{{b = b; .(a, b = b + 1)}}}], DT[ , .(a, b=b+1)])
+test(2093.3, DT[ , if (.N > 1L) .(b), by=a], DT[1:2])
+
+
 ###################################
 #  Add new tests above this line  #
 ###################################

From 247cbcf522ba4ccfc7689f7197c7a748c569fea3 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 30 Aug 2019 04:06:56 +0800
Subject: [PATCH 03/13] remove browser()

---
 R/data.table.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index e1679cf83c..0c530effb1 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -874,7 +874,6 @@ replace_order = function(isub, verbose, env) {
         setattr(byval, "names", bynames)  # byval is just a list not a data.table hence setattr not setnames
       }
 
-      #browser()
       jvnames = NULL
       env = environment()
       if (is.name(jsub)) {

From b5c736afcf65d279177be5d770f643019715bd95 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 30 Aug 2019 04:26:29 +0800
Subject: [PATCH 04/13] merge master & increase test #s

---
 inst/tests/tests.Rraw | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 8d1373ca65..e61663143d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15827,7 +15827,7 @@ df1 = data.table(a=1:5, b=c(0, 0, 1, 0, 2))
 df2 = data.table(c=c(1, 1, 2, 2, 3), d=c(3, 4, 3, 5, 4))
 test(2092.3, copy(df2)[ , s := df1[.SD, on=.(a >= c, a <= d), sum(b), by=.EACHI]$V1],
      df2[ , s := c(1, 1, 1, 3, 1)])
-     
+
 # POSIXct overflow to NA before 1901 and after 2038, #3780
 date=as.POSIXct("1900-01-01", tz="UTC")
 test(2093.1, as.IDate(date), as.IDate(-25567L))
@@ -15835,12 +15835,11 @@ test(2093.2, hour(date), 0L)
 test(2093.3, minute(date), 0L)
 test(2093.4, second(date), 0L)
 
-
 # #2478 auto-name j even in complicated expressions
 DT = data.table(a = c(1, 1, 2), b = 4:6)
-test(2093.1, DT[ , {b = b; .(a, b = b + 1)}], DT[ , .(a, b=b+1)])
-test(2093.2, DT[ , {{{b = b; .(a, b = b + 1)}}}], DT[ , .(a, b=b+1)])
-test(2093.3, DT[ , if (.N > 1L) .(b), by=a], DT[1:2])
+test(2094.1, DT[ , {b = b; .(a, b = b + 1)}], DT[ , .(a, b=b+1)])
+test(2094.2, DT[ , {{{b = b; .(a, b = b + 1)}}}], DT[ , .(a, b=b+1)])
+test(2094.3, DT[ , if (.N > 1L) .(b), by=a], DT[1:2])
 
 
 ###################################

From 3e29d6158b878d446e79d0d2d0de9e05448a4741 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 31 Aug 2019 02:37:48 +0800
Subject: [PATCH 05/13] another refactor for readability&efficieny, also handle
 if/else mismatch

---
 R/data.table.R        | 86 +++++++++++++++++++++++--------------------
 inst/tests/tests.Rraw |  2 +
 2 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 0c530effb1..4fba57da8b 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -875,14 +875,54 @@ replace_order = function(isub, verbose, env) {
       }
 
       jvnames = NULL
-      env = environment()
+      DOT_REGEX = "^[.](N|I|GRP|BY)$"
+      # function to handle auto-naming of j for potentially complicated expressions, #2478
+      #   defined here to have [.data.table as a parent
+      #   j-ending (ends of if/else or {}) for expression like
+      #     list(a=sum(v), v)
+      #   should create columns named a, v
+      do_j_names = function(q) {
+        if (!is.call(q) || !is.name(q[[1L]])) return(q)
+        if (as.character(q[[1L]]) %chin% c('list', '.')) {
+          q[[1L]] = quote(list)
+          qlen = length(q)
+          if (qlen>1L) {
+            nm = names(q[-1L])   # check list(a=sum(v),v)
+            if (is.null(nm)) nm = rep.int("", qlen-1L)
+            # attempt to auto-name unnamed columns
+            idx = which(!nzchar(nm))
+            for (jj in idx) {
+              thisq = q[[jj + 1L]]
+              if (missing(thisq)) stop("Item ", jj, " of the .() or list() passed to j is missing") #3507
+              if (is.name(thisq)) nm[jj] = deparse(thisq, width.cutoff = 50L, backtick = FALSE, nlines = 1L)
+              # TO DO: if call to a[1] for example, then call it 'a' too
+            }
+            nm[idx] = gsub(DOT_REGEX, "\\1", nm[idx])
+            if (!is.null(jvnames) && any(idx <- nm != jvnames))
+              warning("Different branches of j expression produced different auto-named columns: ", brackify(sprintf('%s!=%s', nm[idx], jvnames[idx])), '; using the most "last" names', call. = FALSE)
+            jvnames <<- nm # TODO: handle if() list(a, b) else list(b, a) better
+            setattr(q, "names", NULL)  # drops the names from the list so it's faster to eval the j for each group. We'll put them back afterwards on the result.
+          }
+          return(q) # else empty list is needed for test 468: adding an empty list column
+        }
+        if (q[[1L]] == '{') {
+          q[[length(q)]] = do_j_names(q[[length(q)]])
+          return(q)
+        }
+        if (q[[1L]] == 'if') {
+          #explicit NULL would return NULL, assigning NULL would delete that from the expression
+          if (!is.null(q[[3L]])) q[[3L]] = do_j_names(q[[3L]])
+          if (length(q) == 4L && !is.null(q[[4L]])) q[[4L]] = do_j_names(q[[4L]])
+          return(q)
+        }
+        return(q)
+      }
+
       if (is.name(jsub)) {
         # j is a single unquoted column name
-        if (jsub!=".SD") {
-          jvnames = gsub("^[.](N|I|GRP|BY)$","\\1",as.character(jsub))
-          # jsub is list()ed after it's eval'd inside dogroups.
-        }
-      } else jsub = do_j_names(jsub, env) # else maybe a call to transform or something which returns a list.
+        if (jsub!=".SD") jvnames = gsub(DOT_REGEX, "\\1", jsub)
+        # jsub is list()ed after it's eval'd inside dogroups.
+      } else jsub = do_j_names(jsub) # else maybe a call to transform or something which returns a list.
       av = all.vars(jsub,TRUE)  # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c)
       use.I = ".I" %chin% av
       if (any(c(".SD","eval","get","mget") %chin% av)) {
@@ -3056,37 +3096,3 @@ isReallyReal = function(x) {
   names(on) = xCols
   return(list(on = on, ops = idx_op))
 }
-
-# function to handle auto-naming of j for potentially complicated expressions, #2478
-do_j_names = function(q, env) {
-  if (!is.call(q) || !is.name(q[[1L]])) return(q)
-  if (as.character(q[[1L]]) %chin% c('list', '.')) {
-    q[[1L]] = quote(list)
-    qlen = length(q)
-    if (qlen>1L) {
-      nm = names(q[-1L])   # check list(a=sum(v),v)
-      if (is.null(nm)) nm = rep.int("", qlen-1L)
-      for (jj in seq.int(2L, qlen)) {
-        if (nm[jj-1L] == "" && is.name(q[[jj]])) {
-          if (q[[jj]] == "") stop("Item ", jj-1L, " of the .() or list() passed to j is missing") #3507
-          nm[jj-1L] = gsub("^[.](N|I|GRP|BY)$", "\\1", deparse(q[[jj]]))
-        }
-        # TO DO: if call to a[1] for example, then call it 'a' too
-      }
-      assign('jvnames', nm, envir=env) # TODO: handle if() list(a, b) else list(b, a) better
-      setattr(q, "names", NULL)  # drops the names from the list so it's faster to eval the j for each group. We'll put them back afterwards on the result.
-    }
-    return(q) # else empty list is needed for test 468: adding an empty list column
-  }
-  if (q[[1L]] == '{') {
-    q[[length(q)]] = do_j_names(q[[length(q)]], env)
-    return(q)
-  }
-  if (q[[1L]] == 'if') {
-    #explicit NULL would return NULL, assigning NULL would delete that from the expression
-    if (!is.null(q[[3L]])) q[[3L]] = do_j_names(q[[3L]], env)
-    if (length(q) == 4L && !is.null(q[[4L]])) q[[4L]] = do_j_names(q[[4L]], env)
-    return(q)
-  }
-  return(q)
-}
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e61663143d..24af96ddfe 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15840,6 +15840,8 @@ DT = data.table(a = c(1, 1, 2), b = 4:6)
 test(2094.1, DT[ , {b = b; .(a, b = b + 1)}], DT[ , .(a, b=b+1)])
 test(2094.2, DT[ , {{{b = b; .(a, b = b + 1)}}}], DT[ , .(a, b=b+1)])
 test(2094.3, DT[ , if (.N > 1L) .(b), by=a], DT[1:2])
+test(2094.4, DT[ , if (.N > 1L) .(b) else .(c=b), by=a], DT[ , .(a, c=b)],
+     warning="Different branches of j expression produced different auto-named columns")
 
 
 ###################################

From 80adb9a89ea5f84a82ed7725a4e068d3da23a319 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 31 Aug 2019 06:11:25 +0800
Subject: [PATCH 06/13] order->forder replacement suffered the same issue with
 needing env, may as well solve here

---
 R/data.table.R | 47 ++++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 4fba57da8b..1c2dec953d 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -120,26 +120,6 @@ replace_dot_alias = function(e) {
   }
 }
 
-# replace order -> forder wherever it appears in i
-replace_order = function(isub, verbose, env) {
-  if (length(isub) == 1L) return(isub)
-  for (ii in seq_along(isub)) {
-    isub_el = isub[[ii]]
-    if (missing(isub_el)) break
-    if (is.name(isub_el)) {
-      # stop base::order from becoming forder(x, base, order)
-      if (isub_el == '::') break
-      if (isub_el == 'order') {
-        if (verbose) cat("order optimisation is on, changed 'order(...)' in i to 'forder(x, ...)'.\n")
-        env$eval_forder = TRUE
-        return(as.call(c(list(quote(forder), quote(x)), as.list(isub)[-1L])))
-      }
-    }
-    if (is.call(isub_el)) isub[[ii]] = replace_order(isub_el, verbose, env)
-  }
-  return(isub)
-}
-
 "[.data.table" = function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch", NA), mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL)
 {
   # ..selfcount <<- ..selfcount+1  # in dev, we check no self calls, each of which doubles overhead, or could
@@ -351,12 +331,29 @@ replace_order = function(isub, verbose, env) {
     if (is.null(isub)) return( null.data.table() )
 
     # optimize here so that we can switch it off if needed
-    check_eval_env = environment()
-    check_eval_env$eval_forder = FALSE
-    if (getOption("datatable.optimize") >= 1) {
-      isub = replace_order(isub, verbose, check_eval_env)
+    # replace order -> forder wherever it appears in i
+    #   replace_order defined here to have right env inheritance
+    replace_order = function(isub, verbose) {
+      if (length(isub) == 1L) return(isub)
+      for (ii in seq_along(isub)) {
+        isub_el = isub[[ii]]
+        if (missing(isub_el)) break
+        if (is.name(isub_el)) {
+          # stop base::order from becoming forder(x, base, order)
+          if (isub_el == '::') break
+          if (isub_el == 'order') {
+            if (verbose) cat("order optimisation is on, changed 'order(...)' in i to 'forder(x, ...)'.\n")
+            eval_forder = TRUE
+            return(as.call(c(list(quote(forder), quote(x)), as.list(isub)[-1L])))
+          }
+        }
+        if (is.call(isub_el)) isub[[ii]] = replace_order(isub_el, verbose)
+      }
+      return(isub)
     }
-    if (check_eval_env$eval_forder) {
+    eval_forder = FALSE
+    if (getOption("datatable.optimize") >= 1) isub = replace_order(isub, verbose)
+    if (eval_forder) {
       order_env = new.env(parent=parent.frame())            # until 'forder' is exported
       assign("forder", forder, order_env)
       assign("x", x, order_env)

From a7e1c4bd5bbb2385ef9b86acba8248d495efa461 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 31 Aug 2019 22:53:06 +0800
Subject: [PATCH 07/13] remove <<<

---
 R/data.table.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index 78194253ad..7f1145c247 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -331,7 +331,6 @@ replace_dot_alias = function(e) {
     if (is.null(isub)) return( null.data.table() )
 
     # optimize here so that we can switch it off if needed
-<<<<<<< HEAD
     # replace order -> forder wherever it appears in i
     #   replace_order defined here to have right env inheritance
     replace_order = function(isub, verbose) {

From ed77e1c4ec5dde152101a7f0c40f5547b87693cd Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 31 Aug 2019 22:53:46 +0800
Subject: [PATCH 08/13] remove browser()

---
 R/data.table.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index 7f1145c247..b88c3891a9 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1199,7 +1199,6 @@ replace_dot_alias = function(e) {
     assign(sym, get(getName, parent.frame()), SDenv)
   }
   # hash=TRUE (the default) does seem better as expected using e.g. test 645.  TO DO experiment with 'size' argument
-  #browser()
   if (missingby || bynull || (!byjoin && !length(byval))) {
     # No grouping: 'by' = missing | NULL | character() | "" | list()
     # Considered passing a one-group to dogroups but it doesn't do the recycling of i within group, that's done here

From 1609991b7a1b2f4555a015ff15f9d0cd968b52bd Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 31 Aug 2019 23:04:33 +0800
Subject: [PATCH 09/13] need <<- not =

---
 R/data.table.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index b88c3891a9..291c2d368f 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -343,7 +343,7 @@ replace_dot_alias = function(e) {
           if (isub_el == '::') break
           if (isub_el == 'order') {
             if (verbose) cat("order optimisation is on, changed 'order(...)' in i to 'forder(x, ...)'.\n")
-            eval_forder = TRUE
+            eval_forder <<- TRUE
             return(as.call(c(list(quote(forder), quote(x)), as.list(isub)[-1L])))
           }
         }

From b4b8d3add10d07ff377272cbad1b09994167c096 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Thu, 7 Nov 2019 17:48:19 -0800
Subject: [PATCH 10/13] further merge resolution

---
 R/data.table.R | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 26d1110d2f..68e8bb76ef 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -338,6 +338,7 @@ replace_dot_alias = function(e) {
     }
 
     if (is.null(isub)) return( null.data.table() )
+
     if (length(o <- .prepareFastSubset(isub = isub, x = x,
                                               enclos =  parent.frame(),
                                               notjoin = notjoin, verbose = verbose))){
@@ -1309,19 +1310,8 @@ replace_dot_alias = function(e) {
           ))
         jval = lapply(jval, `[`, 0L)
       if (is.atomic(jval)) {
-        setattr(jval,"names",NULL) # discard names of named vectors otherwise each cell in the column would have a name
+        setattr(jval,"names",NULL)  # discard names of named vectors otherwise each cell in the column would have a name
         jval = list(jval)
-      } else {
-        if (is.null(jvnames)) jvnames=names(jval)
-        lenjval = vapply_1i(jval, length)
-        nulljval = vapply_1b(jval, is.null)
-        if (lenjval[1L]==0L || any(lenjval != lenjval[1L])) {
-          jval = as.data.table.list(jval)   # does the vector expansion to create equal length vectors, and drops any NULL items
-          jvnames = jvnames[!nulljval] # fix for #1477
-        } else {
-          # all columns same length and at least 1 row; avoid copy. TODO: remove when as.data.table.list is ported to C
-          # setDT(jval)
-        }
       }
       if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames)  # e.g. jvnames=="N" for DT[,.N,]
       jval = as.data.table.list(jval, .named=NULL)

From e825d28e875158c533f70111c42f4e6745f53f36 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Thu, 7 Nov 2019 17:58:13 -0800
Subject: [PATCH 11/13] issue number at the end

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 21e51bb76e..62eecec926 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -16370,7 +16370,7 @@ test(2120.07, iDT[(i_id), order(e_date, e_time)], c(3L,4L,1L,2L))  # wrapping wi
 test(2120.08, tmp[iDT[(i_id), order(e_date, e_time)]],             # different result with the NA
               data.table(i_id=c("A",NA,"B","C"), N=c(5L,NA,5L,5L)))
 
-# #2478 auto-name j even in complicated expressions
+# auto-name j even in complicated expressions, #2478
 DT = data.table(a = c(1, 1, 2), b = 4:6)
 test(2121.1, DT[ , {b = b; .(a, b = b + 1)}], DT[ , .(a, b=b+1)])
 test(2121.2, DT[ , {{{b = b; .(a, b = b + 1)}}}], DT[ , .(a, b=b+1)])

From fbe3f0a78c07c44adda7a0df40b529b77361a0d3 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Fri, 8 Nov 2019 14:00:46 -0800
Subject: [PATCH 12/13] news item refined

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index dbb45df4cf..54359e2e0e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,7 +6,7 @@
 
 ## NEW FEATURES
 
-1. Auto-naming in `j` now handles more complicated expressions (e.g. in `{` or `if`), [#2478](https://github.com/Rdatatable/data.table/issues/2478). For example, both `DT[ , .(x, y)]` and `DT[ , {x = 4+x; y=4+y; .(x, y)}]` now return columns named `x` and `y`.
+1. `DT[, {...; .(A,B)}]` (when `.()` is the final item of a multi-statement `{...}`) now auto-names the columns `A` and `B` (just like `DT[, .(A,B)]`) rather than `V1` and `V2`, [#2478](https://github.com/Rdatatable/data.table/issues/2478) [#609](https://github.com/Rdatatable/data.table/issues/609). Similarly, `DT[, if (.N>1) .(B), by=A]` now auto-names the column `B` rather than `V1`. Explicit names are unaffected; e.g. `DT[, {... y= ...; .(A=C+y)}, by=...]` named the result column `A` before, and still does.
 
 ## BUG FIXES
 

From 86c444aa7ad6a0fa76e219b1d851b7189d22fa42 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 13 Nov 2019 18:39:53 -0800
Subject: [PATCH 13/13] avoid gsub and deparse for a tiny bit of efficiency

---
 NEWS.md               |  2 +-
 R/data.table.R        | 20 ++++++++++----------
 inst/tests/tests.Rraw |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 54359e2e0e..52b5d76768 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,7 +6,7 @@
 
 ## NEW FEATURES
 
-1. `DT[, {...; .(A,B)}]` (when `.()` is the final item of a multi-statement `{...}`) now auto-names the columns `A` and `B` (just like `DT[, .(A,B)]`) rather than `V1` and `V2`, [#2478](https://github.com/Rdatatable/data.table/issues/2478) [#609](https://github.com/Rdatatable/data.table/issues/609). Similarly, `DT[, if (.N>1) .(B), by=A]` now auto-names the column `B` rather than `V1`. Explicit names are unaffected; e.g. `DT[, {... y= ...; .(A=C+y)}, by=...]` named the result column `A` before, and still does.
+1. `DT[, {...; .(A,B)}]` (when `.()` is the final item of a multi-statement `{...}`) now auto-names the columns `A` and `B` (just like `DT[, .(A,B)]`) rather than `V1` and `V2`, [#2478](https://github.com/Rdatatable/data.table/issues/2478) [#609](https://github.com/Rdatatable/data.table/issues/609). Similarly, `DT[, if (.N>1) .(B), by=A]` now auto-names the column `B` rather than `V1`. Explicit names are unaffected; e.g. `DT[, {... y= ...; .(A=C+y)}, by=...]` named the column `A` before, and still does.
 
 ## BUG FIXES
 
diff --git a/R/data.table.R b/R/data.table.R
index 68e8bb76ef..08ef6872b1 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -854,12 +854,13 @@ replace_dot_alias = function(e) {
       }
 
       jvnames = NULL
-      DOT_REGEX = "^[.](N|I|GRP|BY)$"
-      # function to handle auto-naming of j for potentially complicated expressions, #2478
-      #   defined here to have [.data.table as a parent
-      #   j-ending (ends of if/else or {}) for expression like
-      #     list(a=sum(v), v)
-      #   should create columns named a, v
+      drop_dot = function(x) {
+        tt = x %chin% c(".N",".I",".GRP",".BY")
+        if (any(tt)) x[tt] = substring(x[tt], 2L)
+        x
+      }
+      # handle auto-naming of last item of j (e.g. within {} or if/else, #2478)
+      #   e.g. DT[, .(a=sum(v), v, .N), by=] should create columns named a, v, N
       do_j_names = function(q) {
         if (!is.call(q) || !is.name(q[[1L]])) return(q)
         if (as.character(q[[1L]]) %chin% c('list', '.')) {
@@ -873,10 +874,10 @@ replace_dot_alias = function(e) {
             for (jj in idx) {
               thisq = q[[jj + 1L]]
               if (missing(thisq)) stop("Item ", jj, " of the .() or list() passed to j is missing") #3507
-              if (is.name(thisq)) nm[jj] = deparse(thisq, width.cutoff = 50L, backtick = FALSE, nlines = 1L)
+              if (is.name(thisq)) nm[jj] = as.character(thisq)
               # TO DO: if call to a[1] for example, then call it 'a' too
             }
-            nm[idx] = gsub(DOT_REGEX, "\\1", nm[idx])
+            nm[idx] = drop_dot(nm[idx])
             if (!is.null(jvnames) && any(idx <- nm != jvnames))
               warning("Different branches of j expression produced different auto-named columns: ", brackify(sprintf('%s!=%s', nm[idx], jvnames[idx])), '; using the most "last" names', call. = FALSE)
             jvnames <<- nm # TODO: handle if() list(a, b) else list(b, a) better
@@ -896,10 +897,9 @@ replace_dot_alias = function(e) {
         }
         return(q)
       }
-
       if (is.name(jsub)) {
         # j is a single unquoted column name
-        if (jsub!=".SD") jvnames = gsub(DOT_REGEX, "\\1", jsub)
+        if (jsub!=".SD") jvnames = drop_dot(as.character(jsub))
         # jsub is list()ed after it's eval'd inside dogroups.
       } else jsub = do_j_names(jsub) # else maybe a call to transform or something which returns a list.
       av = all.vars(jsub,TRUE)  # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 62eecec926..0c6494d594 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -16370,7 +16370,7 @@ test(2120.07, iDT[(i_id), order(e_date, e_time)], c(3L,4L,1L,2L))  # wrapping wi
 test(2120.08, tmp[iDT[(i_id), order(e_date, e_time)]],             # different result with the NA
               data.table(i_id=c("A",NA,"B","C"), N=c(5L,NA,5L,5L)))
 
-# auto-name j even in complicated expressions, #2478
+# auto-name .() when it's the last item of {...} or wrapped with if(), #2478 #609
 DT = data.table(a = c(1, 1, 2), b = 4:6)
 test(2121.1, DT[ , {b = b; .(a, b = b + 1)}], DT[ , .(a, b=b+1)])
 test(2121.2, DT[ , {{{b = b; .(a, b = b + 1)}}}], DT[ , .(a, b=b+1)])